53 static const arg_t defn[] = {
57 "Shows the usage of the tool"},
62 "Base in which all log-likelihoods calculated" },
67 "Language model file"},
72 "Probability definition file for classes in LM"},
77 "Control file listing a set of language models"},
82 "Name of language model in -lmctlfn to use for all utterances" },
87 "Transcription file to evaluate"},
91 "Text string to evaluate"},
96 "Use memory-mapped I/O for reading binary LM files"},
101 "Language model weight" },
106 "Word insertion probability" },
111 "Unigram probability weight (interpolated with uniform distribution)"},
116 "Print details of perplexity calculation" },
119 { NULL, 0, NULL, NULL }
126 int32 *out_n_ccs, int32 *out_n_oovs, int32 *out_lm_score)
130 int32 i, ch, nccs, noovs, unk;
139 for (i = 0; i < n; ++i)
146 ch = noovs = nccs = 0;
147 for (i = 0; i < n; ++i) {
152 if (wids[i] == startwid) {
163 wids[i], wids + i + 1,
174 printf(
") = %d\n", prob);
179 if (out_n_ccs) *out_n_ccs = nccs;
180 if (out_n_oovs) *out_n_oovs = noovs;
196 int32 nccs, noovs, nwords, lscr = 0;
197 float64 ch, log_to_log2;;
199 if ((fh = fopen(lsnfn,
"r")) == NULL)
205 nccs = noovs = nwords = 0;
209 int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;
213 E_FATAL(
"str2words(line, NULL, 0) = %d, should not happen\n", n);
220 if (words[n-1][0] ==
'('
221 && words[n-1][strlen(words[n-1])-1] ==
')')
224 tmp_ch = calc_entropy(lm, words, n, &tmp_nccs,
225 &tmp_noovs, &tmp_lscr);
227 ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
236 ch /= (nwords - nccs - noovs);
237 printf(
"cross-entropy: %f bits\n", ch);
240 printf(
"perplexity: %f\n", pow(2.0, ch));
241 printf(
"lm score: %d\n", lscr);
244 printf(
"%d words evaluated\n", nwords);
245 printf(
"%d OOVs (%.2f%%), %d context cues removed\n",
246 noovs, (
double)noovs / nwords * 100, nccs);
254 int32 n, ch, noovs, nccs, lscr;
260 E_FATAL(
"str2words(textfoo, NULL, 0) = %d, should not happen\n", n);
266 ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr);
268 printf(
"input: %s\n", text);
269 printf(
"cross-entropy: %f bits\n",
273 printf(
"perplexity: %f\n",
logmath_exp(lmath, ch));
274 printf(
"lm score: %d\n", lscr);
277 printf(
"%d words evaluated\n", n);
278 printf(
"%d OOVs, %d context cues removed\n",
286 main(
int argc,
char *argv[])
291 const char *lmfn, *probdefn, *lsnfn, *text;
293 if ((config =
cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
299 if ((lmath = logmath_init
300 (cmd_ln_float64_r(config,
"-logbase"), 0, 0)) == NULL) {
301 E_FATAL(
"Failed to initialize log math\n");
309 E_FATAL(
"Failed to load language model from %s\n",
312 if ((probdefn =
cmd_ln_str_r(config,
"-probdef")) != NULL)
315 cmd_ln_float32_r(config,
"-lw"),
316 cmd_ln_float32_r(config,
"-wip"),
317 cmd_ln_float32_r(config,
"-uw"));
323 evaluate_file(lm, lmath, lsnfn);
326 evaluate_string(lm, lmath, text);