144 #include <hash_table.h> 154 #define LM_DICTWID_BADMAP -16000 155 #define LM_CLASSID_BASE 0x01000000 158 #define LM_LEGACY_CONSTANT BAD_S3LMWID 163 #define LM_SPHINX_CONSTANT BAD_S3LMWID32 172 #define LM_CLASSID_TO_CLASS(m,i) ((m)->lmclass[(i)-LM_CLASSID_BASE]) 174 #define MIN_PROB_F -99.0 182 #define LM_ALLOC_BLOCK 16 191 #define LM_NOT_FOUND -1 193 #define LM_OFFSET_TOO_LARGE -2 199 #define LM_NO_DATA_MARK -3 202 #define LM_UNKNOWN_NG -4 204 #define LM_BAD_LM_COUNT -5 206 #define LM_UNKNOWN_WORDS -6 209 #define LM_BAD_BIGRAM -7 215 #define LM_BAD_TRIGRAM -8 221 #define LM_BAD_QUADGRAM -9 228 #define LM_BAD_QUINGRAM -10 239 #define LM_BAD_NGRAM -11 245 #define LM_TOO_MANY_NGRAM -12 249 #define LM_NO_MINUS_1GRAM -13 252 #define LM_FILE_NOT_FOUND -14 254 #define LM_CANNOT_ALLOCATE -15 258 #define LMDMP_VERSIONNULL 0 264 #define LMDMP_VERSION_TG_16BIT -1 268 #define LMDMP_VERSION_TG_16BIT_V2 -2 271 #define LMDMP_VERSION_TG_32BIT -3 277 #define LMTXT_VERSION 1000 278 #define LMFST_VERSION 1001 279 #define LMFORCED_TXT32VERSION 1002 548 #define LOG2_BG_SEG_SZ 9 549 #define BG_SEG_SZ (1 << (LOG2_BG_SEG_SZ)) 550 #define LM_TGCACHE_SIZE 100003 559 typedef struct lm_s {
571 uint32 log_bg_seg_sz;
642 int32 isLM_IN_MEMORY;
653 int32 *inclass_ugscore;
667 int32 max_sorted_entries;
688 #define lm_lmwid2dictwid(lm,u) ((lm)->ug[u].dictwid) 689 #define lm_n_ug(lm) ((lm)->n_ug) 690 #define lm_n_bg(lm) ((lm)->n_bg) 691 #define lm_n_tg(lm) ((lm)->n_tg) 692 #define lm_wordstr(lm,u) ((lm)->wordstr[u]) 693 #define lm_startwid(lm) ((lm)->startlwid) 694 #define lm_finishwid(lm) ((lm)->finishlwid) 695 #define lm_access_type(lm) ((lm)->access_type) 763 const char* lmctlfile,
766 const char* lmdumpdir,
789 const char *lmdumpdir,
802 const char* lmdumpdir,
920 int32 lm_bg_wordprob(
lm_t *lm,
1052 logmath_t *logmath);
1136 const char *outputfile,
1137 const char *filename,
1173 const char *outputfile,
1174 const char *filename,
1176 const char* inputenc,
1206 const char* filename
1351 #define LM_TGPROB(lm,tgptr) ((lm)->tgprob[(tgptr)->probid].l) 1352 #define LM_BGPROB(lm,bgptr) ((lm)->bgprob[(bgptr)->probid].l) 1353 #define LM_UGPROB(lm,ugptr) ((ugptr)->prob.l) 1354 #define LM_RAWSCORE(lm,score) ((score - (lm)->wip) / ((lm)->lw)) 1355 #define LM_DICTWID(lm,lmwid) ((lm)->ug[(lmwid)].dictwid) lm_t * lmset_get_lm_wname(lmset_t *lms, const char *lmname)
int32 lm_bglist(lm_t *lmp, s3lmwid32_t w, bg_t **bg, int32 *bowt)
lm_t * lmset_get_lm_widx(lmset_t *lms, int32 lmidx)
S3DECODER_EXPORT lmset_t * lmset_init(const char *lmfile, const char *lmctlfile, const char *ctl_lm, const char *lmname, const char *lmdumpdir, float32 lw, float32 wip, float32 uw, dict_t *dict, logmath_t *logmath)
A unigram structure Please see.
Definition: lm.h:446
struct sorted_entry_s sorted_entry_t
S3DECODER_EXPORT void lm_cache_reset(lm_t *lmp)
A bigram structure.
Definition: lm.h:460
int32 lm_tg_exists(lm_t *lm, s3lmwid32_t lw1, s3lmwid32_t lw2, s3lmwid32_t lw3)
uint16 s3lmwid_t
Definition: s3types.h:142
Generic structure that could be used at any n-gram level.
Definition: lm.h:783
int32 s3wid_t
Definition: s3types.h:136
int32 lm_is32bits(lm_t *model)
lmset_t * lmset_read_ctl(const char *ctlfile, dict_t *dict, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath)
Structure for multiple LM, provide operations for addition/deletion/read Structure for multiple...
void copy_tg32_to_tg(lm_t *lm)
void ug_write(FILE *fp, ug_t *ug)
int32 find_tg(tg_t *tg, int32 n, s3lmwid32_t w)
void lm_null_struct(lm_t *lm)
void tg32_write(FILE *fp, tg32_t *tg)
S3DECODER_EXPORT int32 lm_write(lm_t *model, const char *outputfile, const char *filename, const char *fmt)
S3DECODER_EXPORT void lmset_free(lmset_t *lms)
S3DECODER_EXPORT void lm_cache_stats_dump(lm_t *lmp)
int32 lmset_name_to_idx(lmset_t *lms, const char *lmname)
S3DECODER_EXPORT void lm_free(lm_t *lm)
void bg_write(FILE *fp, bg_t *bg)
Operations on dictionary.
void lmset_set_curlm_widx(lmset_t *lms, int32 lmidx)
S3DECODER_EXPORT void lmset_set_curlm_wname(lmset_t *lms, const char *lmname)
void lmset_add_lm(lmset_t *lms, lm_t *lm, const char *lmname)
void swap_tg32(tg32_t *tg)
int32 lm_ug_score(lm_t *lmp, s3lmwid32_t lwid, s3wid_t wid)
ug_t * NewUnigramTable(int32 n_ug)
char * lmset_idx_to_name(lmset_t *lms, int32 lmidx)
int32 lm_ug_exists(lm_t *lm, s3lmwid32_t lwid)
A trigram structure.
Definition: lm.h:483
void swap_bg32(bg32_t *bg)
void bg32_write(FILE *fp, bg32_t *bg)
Size definition of semantically units. Common for both s3 and s3.X decoder.
int32 find_bg32(bg32_t *bg, int32 n, s3lmwid32_t w)
The sorted list used lm reading. list is a (64K long) array. The first entry is the root of the tree ...
Definition: lm.h:437
#define S3DECODER_EXPORT
Definition: sphinx3_export.h:15
void copy_bg32_to_bg(lm_t *lm)
int32 find_bg(bg_t *bg, int32 n, s3lmwid32_t w)
struct tginfo_s * next
Definition: lm.h:537
single entry used in the linked list structure of lm reading
uint32 lower
Definition: lm.h:425
int32 lm_bg_score(lm_t *lmp, s3lmwid32_t lw1, s3lmwid32_t lw2, s3wid_t w2)
void lmset_delete_lm(lmset_t *lms, const char *lmname)
a structure for a dictionary.
Definition: dict.h:146
int32 lm_tg_score(lm_t *lmp, s3lmwid32_t lw1, s3lmwid32_t lw2, s3lmwid32_t lw3, s3wid_t w3)
trigram cache that enhance locating trigram for a given bigram (w_1,w_2)
int32 lm_get_classid(lm_t *model, const char *name)
int32 lm_bg32list(lm_t *lmp, s3lmwid32_t w, bg32_t **bg, int32 *bowt)
struct tginfo32_s tginfo32_t
Log quantities represented in either floating or integer format.
Definition: lm.h:412
void copy_bg_to_bg32(lm_t *lm)
S3DECODER_EXPORT lm_t * lm_read_advance2(const char *file, const char *lmname, float64 lw, float64 wip, float64 uw, int32 ndict, const char *fmt, int32 applyweight, int lminmemory, logmath_t *logmath)
int32 lm_write_advance(lm_t *model, const char *outputfile, const char *filename, const char *fmt, const char *inputenc, char *outputenc)
void copy_tg_to_tg32(lm_t *lm)
int32 lm_tglist(lm_t *lmp, s3lmwid32_t w1, s3lmwid32_t w2, tg_t **tg, int32 *bowt)
s3lmwid32_t lm_wid(lm_t *lm, const char *wd)
A 32 bits version of tg_t.
Definition: lm.h:493
int32 lm_bg_exists(lm_t *lm, s3lmwid32_t lw1, s3lmwid32_t lw2)
int32 lm_ug_wordprob(lm_t *lm, dict_t *dict, int32 th, wordprob_t *wp)
int32 lm_tg32list(lm_t *lmp, s3lmwid32_t w1, s3lmwid32_t w2, tg32_t **tg, int32 *bowt)
Management of in-memory bigrams. Not used if all bigrams in memory.
Definition: lm.h:502
The language model. All unigrams are read into memory on initialization. Bigrams and trigrams read in...
void lm_convert_structure(lm_t *model, int32 is32bits)
struct tginfo32_s * next
Definition: lm.h:552
uint32 s3lmwid32_t
Definition: s3types.h:149
int32 lm_add_word_to_ug(lm_t *lm, dict_t *dict, const char *newword)
int32 lm_uglist(lm_t *lmp, ug_t **ug)
lm_t * lm_read(const char *file, const char *lmname, cmd_ln_t *config, logmath_t *logmath)
A bigram structure which has 32 bits.
Definition: lm.h:471
int32 lm_add_wordlist(lm_t *lm, dict_t *dict, const char *filename)
Language model class modules. This module maintains classes of words and associated probabilities (P(...
void tg_write(FILE *fp, tg_t *tg)
#define dict_size(d)
Definition: dict.h:225
void lm_set_param(lm_t *lm, float64 lw, float64 wip)
S3DECODER_EXPORT int32 lm_rawscore(lm_t *lm, int32 score)
lm_t * lm_read_advance(const char *file, const char *lmname, float64 lw, float64 wip, float64 uw, int32 ndict, const char *fmt, int32 applyweight, logmath_t *logmath)
lmset_t * lmset_read_lm(const char *lmfile, dict_t *dict, const char *lmname, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath)
int32 find_tg32(tg32_t *tg, int32 n, s3lmwid32_t w)
A 32 bits version of membg_t.
Definition: lm.h:512
lmlog_t val
Definition: lm.h:424
uint32 higher
Definition: lm.h:428