MeCab
|
00001 /* 00002 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer 00003 00004 Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org> 00005 Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation 00006 */ 00007 #ifndef MECAB_MECAB_H_ 00008 #define MECAB_MECAB_H_ 00009 00010 /* C/C++ common data structures */ 00011 00015 struct mecab_dictionary_info_t { 00020 const char *filename; 00021 00025 const char *charset; 00026 00030 unsigned int size; 00031 00036 int type; 00037 00041 unsigned int lsize; 00042 00046 unsigned int rsize; 00047 00051 unsigned short version; 00052 00056 struct mecab_dictionary_info_t *next; 00057 }; 00058 00062 struct mecab_path_t { 00066 struct mecab_node_t* rnode; 00067 00071 struct mecab_path_t* rnext; 00072 00076 struct mecab_node_t* lnode; 00077 00082 struct mecab_path_t* lnext; 00083 00087 int cost; 00088 00092 float prob; 00093 }; 00094 00098 struct mecab_node_t { 00102 struct mecab_node_t *prev; 00103 00107 struct mecab_node_t *next; 00108 00112 struct mecab_node_t *enext; 00113 00117 struct mecab_node_t *bnext; 00118 00123 struct mecab_path_t *rpath; 00124 00129 struct mecab_path_t *lpath; 00130 00136 const char *surface; 00137 00141 const char *feature; 00142 00146 unsigned int id; 00147 00151 unsigned short length; 00152 00156 unsigned short rlength; 00157 00161 unsigned short rcAttr; 00162 00166 unsigned short lcAttr; 00167 00171 unsigned short posid; 00172 00176 unsigned char char_type; 00177 00182 unsigned char stat; 00183 00187 unsigned char isbest; 00188 00193 float alpha; 00194 00199 float beta; 00200 00205 float prob; 00206 00210 short wcost; 00211 00215 long cost; 00216 }; 00217 00221 enum { 00225 MECAB_NOR_NODE = 0, 00229 MECAB_UNK_NODE = 1, 00233 MECAB_BOS_NODE = 2, 00237 MECAB_EOS_NODE = 3, 00238 00242 MECAB_EON_NODE = 4 00243 }; 00244 00248 enum { 00252 MECAB_SYS_DIC = 0, 00253 00257 MECAB_USR_DIC = 1, 00258 00262 MECAB_UNK_DIC = 2 00263 }; 00264 00268 enum { 00272 MECAB_ONE_BEST = 1, 00276 MECAB_NBEST = 2, 00280 MECAB_PARTIAL = 4, 00286 MECAB_MARGINAL_PROB = 8, 00291 MECAB_ALTERNATIVE = 16, 00296 MECAB_ALL_MORPHS = 32, 00297 00302 MECAB_ALLOCATE_SENTENCE = 64 00303 }; 00304 00305 /* C interface */ 00306 #ifdef __cplusplus 00307 #include <cstdio> 00308 #else 00309 #include <stdio.h> 00310 #endif 00311 00312 #ifdef __cplusplus 00313 extern "C" { 00314 #endif 00315 00316 #ifdef _WIN32 00317 #include <windows.h> 00318 # ifdef DLL_EXPORT 00319 # define MECAB_DLL_EXTERN __declspec(dllexport) 00320 # define MECAB_DLL_CLASS_EXTERN __declspec(dllexport) 00321 # else 00322 # define MECAB_DLL_EXTERN __declspec(dllimport) 00323 # endif 00324 #endif 00325 00326 #ifndef MECAB_DLL_EXTERN 00327 # define MECAB_DLL_EXTERN extern 00328 #endif 00329 00330 #ifndef MECAB_DLL_CLASS_EXTERN 00331 # define MECAB_DLL_CLASS_EXTERN 00332 #endif 00333 00334 typedef struct mecab_t mecab_t; 00335 typedef struct mecab_model_t mecab_model_t; 00336 typedef struct mecab_lattice_t mecab_lattice_t; 00337 typedef struct mecab_dictionary_info_t mecab_dictionary_info_t; 00338 typedef struct mecab_node_t mecab_node_t; 00339 typedef struct mecab_path_t mecab_path_t; 00340 00341 #ifndef SWIG 00342 /* C interface */ 00343 00344 /* old mecab interface */ 00348 MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv); 00349 00353 MECAB_DLL_EXTERN mecab_t* mecab_new2(const char *arg); 00354 00358 MECAB_DLL_EXTERN const char* mecab_version(); 00359 00363 MECAB_DLL_EXTERN const char* mecab_strerror(mecab_t *mecab); 00364 00368 MECAB_DLL_EXTERN void mecab_destroy(mecab_t *mecab); 00369 00373 MECAB_DLL_EXTERN int mecab_get_partial(mecab_t *mecab); 00374 00378 MECAB_DLL_EXTERN void mecab_set_partial(mecab_t *mecab, int partial); 00379 00383 MECAB_DLL_EXTERN float mecab_get_theta(mecab_t *mecab); 00384 00388 MECAB_DLL_EXTERN void mecab_set_theta(mecab_t *mecab, float theta); 00389 00393 MECAB_DLL_EXTERN int mecab_get_lattice_level(mecab_t *mecab); 00394 00398 MECAB_DLL_EXTERN void mecab_set_lattice_level(mecab_t *mecab, int level); 00399 00403 MECAB_DLL_EXTERN int mecab_get_all_morphs(mecab_t *mecab); 00404 00408 MECAB_DLL_EXTERN void mecab_set_all_morphs(mecab_t *mecab, int all_morphs); 00409 00413 MECAB_DLL_EXTERN int mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice); 00414 00418 MECAB_DLL_EXTERN const char* mecab_sparse_tostr(mecab_t *mecab, const char *str); 00419 00423 MECAB_DLL_EXTERN const char* mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len); 00424 00428 MECAB_DLL_EXTERN char* mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len, 00429 char *ostr, size_t olen); 00430 00434 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*); 00435 00439 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t); 00440 00444 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str); 00445 00449 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N, 00450 const char *str, size_t len); 00451 00455 MECAB_DLL_EXTERN char* mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N, 00456 const char *str, size_t len, 00457 char *ostr, size_t olen); 00458 00462 MECAB_DLL_EXTERN int mecab_nbest_init(mecab_t *mecab, const char *str); 00463 00467 MECAB_DLL_EXTERN int mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len); 00468 00472 MECAB_DLL_EXTERN const char* mecab_nbest_next_tostr(mecab_t *mecab); 00473 00477 MECAB_DLL_EXTERN char* mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen); 00478 00482 MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab); 00483 00487 MECAB_DLL_EXTERN const char* mecab_format_node(mecab_t *mecab, const mecab_node_t *node); 00488 00492 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab); 00493 00494 /* lattice interface */ 00498 MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new(); 00499 00503 MECAB_DLL_EXTERN void mecab_lattice_destroy(mecab_lattice_t *lattice); 00504 00508 MECAB_DLL_EXTERN void mecab_lattice_clear(mecab_lattice_t *lattice); 00509 00514 MECAB_DLL_EXTERN int mecab_lattice_is_available(mecab_lattice_t *lattice); 00515 00519 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_bos_node(mecab_lattice_t *lattice); 00520 00524 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_eos_node(mecab_lattice_t *lattice); 00525 00530 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice); 00534 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice); 00535 00539 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos); 00540 00544 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos); 00545 00549 MECAB_DLL_EXTERN const char *mecab_lattice_get_sentence(mecab_lattice_t *lattice); 00550 00554 MECAB_DLL_EXTERN void mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence); 00555 00560 MECAB_DLL_EXTERN void mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len); 00561 00565 MECAB_DLL_EXTERN size_t mecab_lattice_get_size(mecab_lattice_t *lattice); 00566 00570 MECAB_DLL_EXTERN double mecab_lattice_get_z(mecab_lattice_t *lattice); 00571 00575 MECAB_DLL_EXTERN void mecab_lattice_set_z(mecab_lattice_t *lattice, double Z); 00576 00580 MECAB_DLL_EXTERN double mecab_lattice_get_theta(mecab_lattice_t *lattice); 00581 00586 MECAB_DLL_EXTERN void mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta); 00587 00591 MECAB_DLL_EXTERN int mecab_lattice_next(mecab_lattice_t *lattice); 00592 00596 MECAB_DLL_EXTERN int mecab_lattice_get_request_type(mecab_lattice_t *lattice); 00597 00601 MECAB_DLL_EXTERN int mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type); 00602 00606 MECAB_DLL_EXTERN void mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type); 00607 00612 MECAB_DLL_EXTERN void mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type); 00613 00617 MECAB_DLL_EXTERN void mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type); 00618 00622 MECAB_DLL_EXTERN const char *mecab_lattice_tostr(mecab_lattice_t *lattice); 00623 00627 MECAB_DLL_EXTERN const char *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size); 00628 00632 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N); 00633 00638 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size); 00639 00643 MECAB_DLL_EXTERN const char *mecab_lattice_strerror(mecab_lattice_t *lattice); 00644 00645 00646 /* model interface */ 00650 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new(int argc, char **argv); 00651 00655 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new2(const char *arg); 00656 00661 MECAB_DLL_EXTERN void mecab_model_destroy(mecab_model_t *model); 00662 00666 MECAB_DLL_EXTERN mecab_t *mecab_model_new_tagger(mecab_model_t *model); 00667 00671 MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model); 00672 00676 MECAB_DLL_EXTERN bool mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model); 00677 00681 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model); 00682 00683 /* static functions */ 00684 MECAB_DLL_EXTERN int mecab_do(int argc, char **argv); 00685 MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv); 00686 MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv); 00687 MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv); 00688 MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv); 00689 MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv); 00690 #endif 00691 00692 #ifdef __cplusplus 00693 } 00694 #endif 00695 00696 /* C++ interface */ 00697 #ifdef __cplusplus 00698 00699 namespace MeCab { 00700 typedef struct mecab_dictionary_info_t DictionaryInfo; 00701 typedef struct mecab_path_t Path; 00702 typedef struct mecab_node_t Node; 00703 00704 template <typename N, typename P> class Allocator; 00705 class Tagger; 00706 00710 class MECAB_DLL_CLASS_EXTERN Lattice { 00711 public: 00715 virtual void clear() = 0; 00716 00721 virtual bool is_available() const = 0; 00722 00728 virtual Node *bos_node() const = 0; 00729 00734 virtual Node *eos_node() const = 0; 00735 00736 #ifndef SWIG 00737 00740 virtual Node **begin_nodes() const = 0; 00741 00745 virtual Node **end_nodes() const = 0; 00746 #endif 00747 00754 virtual Node *end_nodes(size_t pos) const = 0; 00755 00762 virtual Node *begin_nodes(size_t pos) const = 0; 00763 00769 virtual const char *sentence() const = 0; 00770 00775 virtual void set_sentence(const char *sentence) = 0; 00776 00777 #ifndef SWIG 00778 00783 virtual void set_sentence(const char *sentence, size_t len) = 0; 00784 #endif 00785 00790 virtual size_t size() const = 0; 00791 00796 virtual void set_Z(double Z) = 0; 00797 00802 virtual double Z() const = 0; 00803 00808 virtual void set_theta(float theta) = 0; 00809 00814 virtual float theta() const = 0; 00815 00822 virtual bool next() = 0; 00823 00828 virtual int request_type() const = 0; 00829 00834 virtual bool has_request_type(int request_type) const = 0; 00835 00840 virtual void set_request_type(int request_type) = 0; 00841 00846 virtual void add_request_type(int request_type) = 0; 00847 00852 virtual void remove_request_type(int request_type) = 0; 00853 00854 #ifndef SWIG 00855 00858 virtual Allocator<Node, Path> *allocator() const = 0; 00859 #endif 00860 00867 virtual const char *toString() = 0; 00868 00876 virtual const char *toString(const Node *node) = 0; 00877 00885 virtual const char *enumNBestAsString(size_t N) = 0; 00886 00887 #ifndef SWIG 00888 00895 virtual const char *toString(char *buf, size_t size) = 0; 00896 00905 virtual const char *toString(const Node *node, 00906 char *buf, size_t size) = 0; 00907 00916 virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0; 00917 #endif 00918 00923 virtual const char *what() const = 0; 00924 00929 virtual void set_what(const char *str) = 0; 00930 00931 #ifndef SWIG 00932 00936 static Lattice *create(); 00937 #endif 00938 00939 virtual ~Lattice() {} 00940 }; 00941 00945 class MECAB_DLL_CLASS_EXTERN Model { 00946 public: 00951 virtual const DictionaryInfo *dictionary_info() const = 0; 00952 00959 virtual Tagger *createTagger() const = 0; 00960 00965 virtual Lattice *createLattice() const = 0; 00966 00978 virtual bool swap(Model *model) = 0; 00979 00984 static const char *version(); 00985 00986 virtual ~Model() {} 00987 00988 #ifndef SIWG 00989 00997 static Model* create(int argc, char **argv); 00998 01007 static Model* create(const char *arg); 01008 #endif 01009 }; 01010 01014 class MECAB_DLL_CLASS_EXTERN Tagger { 01015 public: 01028 static bool parse(const Model &model, Lattice *lattice); 01029 01039 virtual bool parse(Lattice *lattice) const = 0; 01040 01049 virtual const char* parse(const char *str) = 0; 01050 01060 virtual const Node* parseToNode(const char *str) = 0; 01061 01072 virtual const char* parseNBest(size_t N, const char *str) = 0; 01073 01083 virtual bool parseNBestInit(const char *str) = 0; 01084 01092 virtual const Node* nextNode() = 0; 01093 01101 virtual const char* next() = 0; 01102 01113 virtual const char* formatNode(const Node *node) = 0; 01114 01115 #ifndef SWIG 01116 01126 virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0; 01127 01134 virtual const char* parse(const char *str, size_t len) = 0; 01135 01142 virtual const Node* parseToNode(const char *str, size_t len) = 0; 01143 01151 virtual const char* parseNBest(size_t N, const char *str, size_t len) = 0; 01152 01160 virtual bool parseNBestInit(const char *str, size_t len) = 0; 01161 01169 virtual const char* next(char *ostr , size_t olen) = 0; 01170 01181 virtual const char* parseNBest(size_t N, const char *str, 01182 size_t len, char *ostr, size_t olen) = 0; 01183 01192 virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0; 01193 #endif 01194 01200 virtual void set_request_type(int request_type) = 0; 01201 01207 virtual int request_type() const = 0; 01208 01214 virtual bool partial() const = 0; 01215 01221 virtual void set_partial(bool partial) = 0; 01222 01228 virtual int lattice_level() const = 0; 01229 01235 virtual void set_lattice_level(int level) = 0; 01236 01242 virtual bool all_morphs() const = 0; 01243 01249 virtual void set_all_morphs(bool all_morphs) = 0; 01250 01255 virtual void set_theta(float theta) = 0; 01256 01261 virtual float theta() const = 0; 01262 01267 virtual const DictionaryInfo* dictionary_info() const = 0; 01268 01273 virtual const char* what() const = 0; 01274 01275 virtual ~Tagger() {} 01276 01277 #ifndef SIWG 01278 01286 static Tagger *create(int argc, char **argv); 01287 01296 static Tagger *create(const char *arg); 01297 #endif 01298 01303 static const char *version(); 01304 }; 01305 01306 #ifndef SWIG 01307 01310 MECAB_DLL_EXTERN Lattice *createLattice(); 01311 01315 MECAB_DLL_EXTERN Model *createModel(int argc, char **argv); 01316 01320 MECAB_DLL_EXTERN Model *createModel(const char *arg); 01321 01325 MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv); 01326 01330 MECAB_DLL_EXTERN Tagger *createTagger(const char *arg); 01331 01338 MECAB_DLL_EXTERN void deleteLattice(Lattice *lattice); 01339 01340 01347 MECAB_DLL_EXTERN void deleteModel(Model *model); 01348 01355 MECAB_DLL_EXTERN void deleteTagger(Tagger *tagger); 01356 01361 MECAB_DLL_EXTERN const char* getLastError(); 01362 01368 MECAB_DLL_EXTERN const char* getTaggerError(); 01369 #endif 01370 } 01371 #endif 01372 #endif /* MECAB_MECAB_H_ */