C#调用ICTCLAS30
时间:2010-09-01 来源:让大家开心的人
SharpICTCLAS虽说是开源的,但07年以后就没有人再进行维护,跑搜狗的语料问题不少,就连C#版本的作者也承认有不少问题。
想得到更为准确的分词结果,还是研究SharpICTCLAS3.0,也就是ICTCLAS 2009版。dll 文件不是C#开发的,所以引入要通过DllImport
先自己写了个类
代码
using System;
using System.Collections.Generic;
using System.Text;
using System.Runtime.InteropServices;
namespace test
{
[StructLayout(LayoutKind.Explicit)]
public struct result_t
{
[FieldOffset(0)]
public int start;
[FieldOffset(4)]
public int length;
[FieldOffset(8)]
public int sPos;
[FieldOffset(12)]
public int sPosLow;
[FieldOffset(16)]
public int POS_id;
[FieldOffset(20)]
public int word_ID;
[FieldOffset(24)]
public int word_type;
[FieldOffset(28)]
public int weight;
}
class ICTCLAS30
{
const string path = @"ICTCLAS30.dll";
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Init")]
public static extern bool Init(String sInitDirPath);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcess")]
public static extern String ParagraphProcess(String sParagraph, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Exit")]
public static extern bool Exit();
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ImportUserDict")]
public static extern int ImportUserDict(String sFilename);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_FileProcess")]
public static extern bool FileProcess(String sSrcFilename, String sDestFilename, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_FileProcessEx")]
public static extern bool FileProcessEx(String sSrcFilename, String sDestFilename);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_GetParagraphProcessAWordCount")]
public static extern int GetParagraphProcessAWordCount(String sParagraph);
//ICTCLAS_GetParagraphProcessAWordCount
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcessAW")]
public static extern void ParagraphProcessAW(int nCount, [Out, MarshalAs(UnmanagedType.LPArray)] result_t[] result);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_AddUserWord")]
public static extern int AddUserWord(String sWord);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_SaveTheUsrDic")]
public static extern int SaveTheUsrDic();
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_DelUsrWord")]
static extern int DelUsrWord(String sWord);
public ICTCLAS30()
{
}
}
}
调用:
代码
if (!ICTCLAS30.Init(null))
{
System.Console.WriteLine("Init ICTCLAS failed!");
return;
}
System.Console.WriteLine("Init ICTCLAS Success!");
String pResult;
pResult = ICTCLAS30.ParagraphProcess("点击下载超女纪敏佳深受观众喜爱。禽流感爆发在非典之后。", 1);
System.Console.WriteLine(pResult);
ICTCLAS30.Exit();
注:
使用的时候把ICTCLAS30.dll,Configure.xml和Data文件夹copy到程序exe运行的位置,否则需要制定他们的位置。
如何把pResult搞成昨天博文里的wordResult格式还是个问题。还需要好好研究www.qichepeijian.com。。。。
附上:官方网站的C#调用示例
代码
using System;
using System.IO;
using System.Runtime.InteropServices;
namespace win_csharp
{
[StructLayout(LayoutKind.Explicit)]
public struct result_t
{
[FieldOffset(0)]
public int start;
[FieldOffset(4)]
public int length;
[FieldOffset(8)]
public int POS_id;
[FieldOffset(12)]
public int word_ID;
}
/// <summary>
/// Class1 的摘要说明。
/// </summary>
class Class1
{
const string path = @"ICTCLAS30.dll";
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Init")]
public static extern bool ICTCLAS_Init(String sInitDirPath);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_ParagraphProcess")]
public static extern String ICTCLAS_ParagraphProcess(String sParagraph, int bPOStagged);
[DllImport(path, CharSet = CharSet.Ansi, EntryPoint = "ICTCLAS_Exit")]
public static extern bool ICTCLAS_Exit();
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main(string[] args)
{
//
// TODO: 在此处添加代码以启动应用程序
//
if (!ICTCLAS_Init(null))
{
System.Console.WriteLine("Init ICTCLAS failed!");
return;
}
System.Console.WriteLine("Init ICTCLAS Success!");
String pResult;
pResult = ICTCLAS_ParagraphProcess("点击下载超女纪敏佳深受观众喜爱。禽流感爆发在非典之后。", 1);
System.Console.WriteLine(pResult);
ICTCLAS_Exit();
}
}
}
参考:1. format Blog http://www.cnblogs.com/format
2.ICTCLAS 官方文档: http://ictclas.org/zy_2.html