PocketSphinx  0.6
ngram_search_fwdflat.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "ngram_search.h"
53 #include "ps_lattice_internal.h"
54 
55 /* Turn this on to dump channels for debugging */
56 #define __CHAN_DUMP__ 0
57 #if __CHAN_DUMP__
58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
59 #else
60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
61 #endif
62 
63 static void
64 ngram_fwdflat_expand_all(ngram_search_t *ngs)
65 {
66  int n_words, i;
67 
68  /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
69  *
70  * 1) Add the ones which are in the LM to the fwdflat wordlist
71  * 2) And to the expansion list (since we are expanding all)
72  */
73  ngs->n_expand_words = 0;
74  n_words = ps_search_n_words(ngs);
75  bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
76  for (i = 0; i < n_words; ++i) {
77  if (!ngram_model_set_known_wid(ngs->lmset,
78  dict_basewid(ps_search_dict(ngs),i)))
79  continue;
80  ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
81  ngs->expand_word_list[ngs->n_expand_words] = i;
82  bitvec_set(ngs->expand_word_flag, i);
83  ngs->n_expand_words++;
84  }
85  E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
86  ngs->expand_word_list[ngs->n_expand_words] = -1;
87  ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
88 }
89 
90 static void
91 ngram_fwdflat_allocate_1ph(ngram_search_t *ngs)
92 {
93  dict_t *dict = ps_search_dict(ngs);
94  int n_words = ps_search_n_words(ngs);
95  int i, w;
96 
97  /* Allocate single-phone words, since they won't have
98  * been allocated for us by fwdtree initialization. */
99  ngs->n_1ph_words = 0;
100  for (w = 0; w < n_words; w++) {
101  if (dict_is_single_phone(dict, w))
102  ++ngs->n_1ph_words;
103  }
104  ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
105  sizeof(*ngs->single_phone_wid));
106  ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
107  i = 0;
108  for (w = 0; w < n_words; w++) {
109  if (!dict_is_single_phone(dict, w))
110  continue;
111 
112  /* DICT2PID location */
113  ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
114  ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
115  hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
116  /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef,
117  ngs->rhmm_1ph[i].ciphone),
118  /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
119  ngs->rhmm_1ph[i].ciphone));
120  ngs->rhmm_1ph[i].next = NULL;
121  ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
122  ngs->single_phone_wid[i] = w;
123  i++;
124  }
125 }
126 
127 static void
128 ngram_fwdflat_free_1ph(ngram_search_t *ngs)
129 {
130  int i, w;
131  int n_words = ps_search_n_words(ngs);
132 
133  for (i = w = 0; w < n_words; ++w) {
134  if (!dict_is_single_phone(ps_search_dict(ngs), w))
135  continue;
136  hmm_deinit(&ngs->rhmm_1ph[i].hmm);
137  ++i;
138  }
139  ckd_free(ngs->rhmm_1ph);
140  ngs->rhmm_1ph = NULL;
141  ckd_free(ngs->single_phone_wid);
142 }
143 
144 void
146 {
147  int n_words;
148 
149  n_words = ps_search_n_words(ngs);
150  ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
151  ngs->expand_word_flag = bitvec_alloc(n_words);
152  ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
153  ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist));
154  ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid");
155  ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin");
156  E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n",
157  ngs->min_ef_width, ngs->max_sf_win);
158 
159  /* No tree-search; pre-build the expansion list, including all LM words. */
160  if (!ngs->fwdtree) {
161  /* Build full expansion list from LM words. */
162  ngram_fwdflat_expand_all(ngs);
163  /* Allocate single phone words. */
164  ngram_fwdflat_allocate_1ph(ngs);
165  }
166 }
167 
168 void
170 {
171  double n_speech = (double)ngs->n_tot_frame
172  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
173 
174  E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n",
175  ngs->fwdflat_perf.t_tot_cpu,
176  ngs->fwdflat_perf.t_tot_cpu / n_speech);
177  E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n",
178  ngs->fwdflat_perf.t_tot_elapsed,
179  ngs->fwdflat_perf.t_tot_elapsed / n_speech);
180 
181  /* Free single-phone words if we allocated them. */
182  if (!ngs->fwdtree) {
183  ngram_fwdflat_free_1ph(ngs);
184  }
185  ckd_free(ngs->fwdflat_wordlist);
186  bitvec_free(ngs->expand_word_flag);
187  ckd_free(ngs->expand_word_list);
188  ckd_free(ngs->frm_wordlist);
189 }
190 
191 int
193 {
194  /* Reallocate things that depend on the number of words. */
195  int n_words;
196 
197  ckd_free(ngs->fwdflat_wordlist);
198  ckd_free(ngs->expand_word_list);
199  bitvec_free(ngs->expand_word_flag);
200  n_words = ps_search_n_words(ngs);
201  ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
202  ngs->expand_word_flag = bitvec_alloc(n_words);
203  ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
204 
205  /* No tree-search; take care of the expansion list and single phone words. */
206  if (!ngs->fwdtree) {
207  /* Free single-phone words. */
208  ngram_fwdflat_free_1ph(ngs);
209  /* Reallocate word_chan. */
210  ckd_free(ngs->word_chan);
211  ngs->word_chan = ckd_calloc(dict_size(ps_search_dict(ngs)),
212  sizeof(*ngs->word_chan));
213  /* Rebuild full expansion list from LM words. */
214  ngram_fwdflat_expand_all(ngs);
215  /* Allocate single phone words. */
216  ngram_fwdflat_allocate_1ph(ngs);
217  }
218  /* Otherwise there is nothing to do since the wordlist is
219  * generated anew every utterance. */
220  return 0;
221 }
222 
226 static void
227 build_fwdflat_wordlist(ngram_search_t *ngs)
228 {
229  int32 i, f, sf, ef, wid, nwd;
230  dict_t *dict;
231  bptbl_t *bp;
232  ps_latnode_t *node, *prevnode, *nextnode;
233 
234  /* No tree-search, use statically allocated wordlist. */
235  if (!ngs->fwdtree)
236  return;
237 
238  dict = ps_search_dict(ngs);
239 
240  memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));
241 
242  /* Scan the backpointer table for all active words and record
243  * their exit frames. */
244  for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
245  sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
246  ef = bp->frame;
247  wid = bp->wid;
248 
249  /* Anything that can be transitioned to in the LM can go in
250  * the word list. */
251  if (!ngram_model_set_known_wid(ngs->lmset,
252  dict_basewid(ps_search_dict(ngs), wid)))
253  continue;
254 
255  /* Look for it in the wordlist. */
256  for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
257  node = node->next);
258 
259  /* Update last end frame. */
260  if (node)
261  node->lef = ef;
262  else {
263  /* New node; link to head of list */
264  node = listelem_malloc(ngs->latnode_alloc);
265  node->wid = wid;
266  node->fef = node->lef = ef;
267 
268  node->next = ngs->frm_wordlist[sf];
269  ngs->frm_wordlist[sf] = node;
270  }
271  }
272 
273  /* Eliminate "unlikely" words, for which there are too few end points */
274  for (f = 0; f < ngs->n_frame; f++) {
275  prevnode = NULL;
276  for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
277  nextnode = node->next;
278  /* Word has too few endpoints */
279  if ((node->lef - node->fef < ngs->min_ef_width) ||
280  /* Word is </s> and doesn't actually end in last frame */
281  ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
282  if (!prevnode)
283  ngs->frm_wordlist[f] = nextnode;
284  else
285  prevnode->next = nextnode;
286  listelem_free(ngs->latnode_alloc, node);
287  }
288  else
289  prevnode = node;
290  }
291  }
292 
293  /* Form overall wordlist for 2nd pass */
294  nwd = 0;
295  bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
296  for (f = 0; f < ngs->n_frame; f++) {
297  for (node = ngs->frm_wordlist[f]; node; node = node->next) {
298  if (!bitvec_is_set(ngs->word_active, node->wid)) {
299  bitvec_set(ngs->word_active, node->wid);
300  ngs->fwdflat_wordlist[nwd++] = node->wid;
301  }
302  }
303  }
304  ngs->fwdflat_wordlist[nwd] = -1;
305  E_INFO("Utterance vocabulary contains %d words\n", nwd);
306 }
307 
311 static void
312 build_fwdflat_chan(ngram_search_t *ngs)
313 {
314  int32 i, wid, p;
315  root_chan_t *rhmm;
316  chan_t *hmm, *prevhmm;
317  dict_t *dict;
318  dict2pid_t *d2p;
319 
320  dict = ps_search_dict(ngs);
321  d2p = ps_search_dict2pid(ngs);
322 
323  /* Build word HMMs for each word in the lattice. */
324  for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
325  wid = ngs->fwdflat_wordlist[i];
326 
327  /* Single-phone words are permanently allocated */
328  if (dict_is_single_phone(dict, wid))
329  continue;
330 
331  assert(ngs->word_chan[wid] == NULL);
332 
333  /* Multiplex root HMM for first phone (one root per word, flat
334  * lexicon). diphone is irrelevant here, for the time being,
335  * at least. */
336  rhmm = listelem_malloc(ngs->root_chan_alloc);
337  rhmm->ci2phone = dict_second_phone(dict, wid);
338  rhmm->ciphone = dict_first_phone(dict, wid);
339  rhmm->next = NULL;
340  hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
341  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
342  bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone));
343 
344  /* HMMs for word-internal phones */
345  prevhmm = NULL;
346  for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) {
347  hmm = listelem_malloc(ngs->chan_alloc);
348  hmm->ciphone = dict_pron(dict, wid, p);
349  hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1;
350  hmm->next = NULL;
351  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
352  dict2pid_internal(d2p,wid,p),
353  bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone));
354 
355  if (prevhmm)
356  prevhmm->next = hmm;
357  else
358  rhmm->next = hmm;
359 
360  prevhmm = hmm;
361  }
362 
363  /* Right-context phones */
364  ngram_search_alloc_all_rc(ngs, wid);
365 
366  /* Link in just allocated right-context phones */
367  if (prevhmm)
368  prevhmm->next = ngs->word_chan[wid];
369  else
370  rhmm->next = ngs->word_chan[wid];
371  ngs->word_chan[wid] = (chan_t *) rhmm;
372  }
373 
374 }
375 
376 void
378 {
379  root_chan_t *rhmm;
380  int i;
381 
382  ptmr_reset(&ngs->fwdflat_perf);
383  ptmr_start(&ngs->fwdflat_perf);
384  build_fwdflat_wordlist(ngs);
385  build_fwdflat_chan(ngs);
386 
387  ngs->bpidx = 0;
388  ngs->bss_head = 0;
389 
390  for (i = 0; i < ps_search_n_words(ngs); i++)
391  ngs->word_lat_idx[i] = NO_BP;
392 
393  /* Reset the permanently allocated single-phone words, since they
394  * may have junk left over in them from previous searches. */
395  for (i = 0; i < ngs->n_1ph_words; i++) {
396  int32 w = ngs->single_phone_wid[i];
397  rhmm = (root_chan_t *) ngs->word_chan[w];
398  hmm_clear(&rhmm->hmm);
399  }
400 
401  /* Start search with <s>; word_chan[<s>] is permanently allocated */
402  rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)];
403  hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
404  ngs->active_word_list[0][0] = ps_search_start_wid(ngs);
405  ngs->n_active_word[0] = 1;
406 
407  ngs->best_score = 0;
408  ngs->renormalized = FALSE;
409 
410  for (i = 0; i < ps_search_n_words(ngs); i++)
411  ngs->last_ltrans[i].sf = -1;
412 
413  if (!ngs->fwdtree)
414  ngs->n_frame = 0;
415 
416  ngs->st.n_fwdflat_chan = 0;
417  ngs->st.n_fwdflat_words = 0;
418  ngs->st.n_fwdflat_word_transition = 0;
419  ngs->st.n_senone_active_utt = 0;
420 }
421 
422 static void
423 compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
424 {
425  int32 i, w;
426  int32 *awl;
427  root_chan_t *rhmm;
428  chan_t *hmm;
429 
430  acmod_clear_active(ps_search_acmod(ngs));
431 
432  i = ngs->n_active_word[frame_idx & 0x1];
433  awl = ngs->active_word_list[frame_idx & 0x1];
434 
435  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
436  rhmm = (root_chan_t *)ngs->word_chan[w];
437  if (hmm_frame(&rhmm->hmm) == frame_idx) {
438  acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
439  }
440 
441  for (hmm = rhmm->next; hmm; hmm = hmm->next) {
442  if (hmm_frame(&hmm->hmm) == frame_idx) {
443  acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
444  }
445  }
446  }
447 }
448 
449 static void
450 fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
451 {
452  int32 i, w, bestscore;
453  int32 *awl;
454  root_chan_t *rhmm;
455  chan_t *hmm;
456 
457  i = ngs->n_active_word[frame_idx & 0x1];
458  awl = ngs->active_word_list[frame_idx & 0x1];
459  bestscore = WORST_SCORE;
460 
461  ngs->st.n_fwdflat_words += i;
462 
463  /* Scan all active words. */
464  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
465  rhmm = (root_chan_t *) ngs->word_chan[w];
466  if (hmm_frame(&rhmm->hmm) == frame_idx) {
467  int32 score = chan_v_eval(rhmm);
468  if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs)))
469  bestscore = score;
470  ngs->st.n_fwdflat_chan++;
471  }
472 
473  for (hmm = rhmm->next; hmm; hmm = hmm->next) {
474  if (hmm_frame(&hmm->hmm) == frame_idx) {
475  int32 score = chan_v_eval(hmm);
476  if (score BETTER_THAN bestscore)
477  bestscore = score;
478  ngs->st.n_fwdflat_chan++;
479  }
480  }
481  }
482 
483  ngs->best_score = bestscore;
484 }
485 
486 static void
487 fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
488 {
489  int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
490  int32 *awl;
491  root_chan_t *rhmm;
492  chan_t *hmm, *nexthmm;
493 
494  cf = frame_idx;
495  nf = cf + 1;
496  i = ngs->n_active_word[cf & 0x1];
497  awl = ngs->active_word_list[cf & 0x1];
498  bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
499 
500  thresh = ngs->best_score + ngs->fwdflatbeam;
501  wordthresh = ngs->best_score + ngs->fwdflatwbeam;
502  pip = ngs->pip;
503  E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));
504 
505  /* Scan all active words. */
506  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
507  rhmm = (root_chan_t *) ngs->word_chan[w];
508  /* Propagate active root channels */
509  if (hmm_frame(&rhmm->hmm) == cf
510  && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
511  hmm_frame(&rhmm->hmm) = nf;
512  bitvec_set(ngs->word_active, w);
513 
514  /* Transitions out of root channel */
515  newscore = hmm_out_score(&rhmm->hmm);
516  if (rhmm->next) {
517  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
518 
519  newscore += pip;
520  if (newscore BETTER_THAN thresh) {
521  hmm = rhmm->next;
522  /* Enter all right context phones */
523  if (hmm->info.rc_id >= 0) {
524  for (; hmm; hmm = hmm->next) {
525  if ((hmm_frame(&hmm->hmm) < cf)
526  || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
527  hmm_enter(&hmm->hmm, newscore,
528  hmm_out_history(&rhmm->hmm), nf);
529  }
530  }
531  }
532  /* Just a normal word internal phone */
533  else {
534  if ((hmm_frame(&hmm->hmm) < cf)
535  || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
536  hmm_enter(&hmm->hmm, newscore,
537  hmm_out_history(&rhmm->hmm), nf);
538  }
539  }
540  }
541  }
542  else {
543  assert(dict_is_single_phone(ps_search_dict(ngs), w));
544 
545  /* Word exit for single-phone words (where did their
546  * whmms come from?) (either from
547  * ngram_search_fwdtree, or from
548  * ngram_fwdflat_allocate_1ph(), that's where) */
549  if (newscore BETTER_THAN wordthresh) {
550  ngram_search_save_bp(ngs, cf, w, newscore,
551  hmm_out_history(&rhmm->hmm), 0);
552  }
553  }
554  }
555 
556  /* Transitions out of non-root channels. */
557  for (hmm = rhmm->next; hmm; hmm = hmm->next) {
558  if (hmm_frame(&hmm->hmm) >= cf) {
559  /* Propagate forward HMMs inside the beam. */
560  if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
561  hmm_frame(&hmm->hmm) = nf;
562  bitvec_set(ngs->word_active, w);
563 
564  newscore = hmm_out_score(&hmm->hmm);
565  /* Word-internal phones */
566  if (hmm->info.rc_id < 0) {
567  newscore += pip;
568  if (newscore BETTER_THAN thresh) {
569  nexthmm = hmm->next;
570  /* Enter all right-context phones. */
571  if (nexthmm->info.rc_id >= 0) {
572  for (; nexthmm; nexthmm = nexthmm->next) {
573  if ((hmm_frame(&nexthmm->hmm) < cf)
574  || (newscore BETTER_THAN
575  hmm_in_score(&nexthmm->hmm))) {
576  hmm_enter(&nexthmm->hmm,
577  newscore,
578  hmm_out_history(&hmm->hmm),
579  nf);
580  }
581  }
582  }
583  /* Enter single word-internal phone. */
584  else {
585  if ((hmm_frame(&nexthmm->hmm) < cf)
586  || (newscore BETTER_THAN
587  hmm_in_score(&nexthmm->hmm))) {
588  hmm_enter(&nexthmm->hmm, newscore,
589  hmm_out_history(&hmm->hmm), nf);
590  }
591  }
592  }
593  }
594  /* Right-context phones - apply word beam and exit. */
595  else {
596  if (newscore BETTER_THAN wordthresh) {
597  ngram_search_save_bp(ngs, cf, w, newscore,
598  hmm_out_history(&hmm->hmm),
599  hmm->info.rc_id);
600  }
601  }
602  }
603  /* Zero out inactive HMMs. */
604  else if (hmm_frame(&hmm->hmm) != nf) {
605  hmm_clear_scores(&hmm->hmm);
606  }
607  }
608  }
609  }
610 }
611 
612 static void
613 get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
614 {
615  int32 f, sf, ef;
616  ps_latnode_t *node;
617 
618  if (!ngs->fwdtree) {
619  ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
620  return;
621  }
622 
623  sf = frm - win;
624  if (sf < 0)
625  sf = 0;
626  ef = frm + win;
627  if (ef > ngs->n_frame)
628  ef = ngs->n_frame;
629 
630  bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
631  ngs->n_expand_words = 0;
632 
633  for (f = sf; f < ef; f++) {
634  for (node = ngs->frm_wordlist[f]; node; node = node->next) {
635  if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
636  ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
637  bitvec_set(ngs->expand_word_flag, node->wid);
638  }
639  }
640  }
641  ngs->expand_word_list[ngs->n_expand_words] = -1;
642  ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
643 }
644 
645 static void
646 fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
647 {
648  int32 cf, nf, b, thresh, pip, i, w, newscore;
649  int32 best_silrc_score = 0, best_silrc_bp = 0; /* FIXME: good defaults? */
650  bptbl_t *bp;
651  int32 *rcss;
652  root_chan_t *rhmm;
653  int32 *awl;
654  float32 lwf;
655  dict_t *dict = ps_search_dict(ngs);
656  dict2pid_t *d2p = ps_search_dict2pid(ngs);
657 
658  cf = frame_idx;
659  nf = cf + 1;
660  thresh = ngs->best_score + ngs->fwdflatbeam;
661  pip = ngs->pip;
662  best_silrc_score = WORST_SCORE;
663  lwf = ngs->fwdflat_fwdtree_lw_ratio;
664 
665  /* Search for all words starting within a window of this frame.
666  * These are the successors for words exiting now. */
667  get_expand_wordlist(ngs, cf, ngs->max_sf_win);
668 
669  /* Scan words exited in current frame */
670  for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
671  xwdssid_t *rssid;
672  int32 silscore;
673 
674  bp = ngs->bp_table + b;
675  ngs->word_lat_idx[bp->wid] = NO_BP;
676 
677  if (bp->wid == ps_search_finish_wid(ngs))
678  continue;
679 
680  /* DICT2PID location */
681  /* Get the mapping from right context phone ID to index in the
682  * right context table and the bscore_stack. */
683  rcss = ngs->bscore_stack + bp->s_idx;
684  if (bp->last2_phone == -1)
685  rssid = NULL;
686  else
687  rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);
688 
689  /* Transition to all successor words. */
690  for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
691  int32 n_used;
692 
693  w = ngs->expand_word_list[i];
694 
695  /* Get the exit score we recorded in save_bwd_ptr(), or
696  * something approximating it. */
697  if (rssid)
698  newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
699  else
700  newscore = bp->score;
701  if (newscore == WORST_SCORE)
702  continue;
703  /* FIXME: Floating point... */
704  newscore += lwf
705  * (ngram_tg_score(ngs->lmset,
706  dict_basewid(dict, w),
707  bp->real_wid,
708  bp->prev_real_wid,
709  &n_used) >> SENSCR_SHIFT);
710  newscore += pip;
711 
712  /* Enter the next word */
713  if (newscore BETTER_THAN thresh) {
714  rhmm = (root_chan_t *) ngs->word_chan[w];
715  if ((hmm_frame(&rhmm->hmm) < cf)
716  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
717  hmm_enter(&rhmm->hmm, newscore, b, nf);
718  /* DICT2PID: This is where mpx ssids get introduced. */
719  /* Look up the ssid to use when entering this mpx triphone. */
720  hmm_mpx_ssid(&rhmm->hmm, 0) =
721  dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
722  dict_last_phone(dict, bp->wid));
723  assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
724  E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
725  rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
726  hmm_mpx_ssid(&rhmm->hmm, 0)));
727  bitvec_set(ngs->word_active, w);
728  }
729  }
730  }
731 
732  /* Get the best exit into silence. */
733  if (rssid)
734  silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
735  else
736  silscore = bp->score;
737  if (silscore BETTER_THAN best_silrc_score) {
738  best_silrc_score = silscore;
739  best_silrc_bp = b;
740  }
741  }
742 
743  /* Transition to <sil> */
744  newscore = best_silrc_score + ngs->silpen + pip;
745  if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
746  w = ps_search_silence_wid(ngs);
747  rhmm = (root_chan_t *) ngs->word_chan[w];
748  if ((hmm_frame(&rhmm->hmm) < cf)
749  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
750  hmm_enter(&rhmm->hmm, newscore,
751  best_silrc_bp, nf);
752  bitvec_set(ngs->word_active, w);
753  }
754  }
755  /* Transition to noise words */
756  newscore = best_silrc_score + ngs->fillpen + pip;
757  if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
758  for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
759  rhmm = (root_chan_t *) ngs->word_chan[w];
760  /* Noise words that aren't a single phone will have NULL here. */
761  if (rhmm == NULL)
762  continue;
763  if ((hmm_frame(&rhmm->hmm) < cf)
764  || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
765  hmm_enter(&rhmm->hmm, newscore,
766  best_silrc_bp, nf);
767  bitvec_set(ngs->word_active, w);
768  }
769  }
770  }
771 
772  /* Reset initial channels of words that have become inactive even after word trans. */
773  i = ngs->n_active_word[cf & 0x1];
774  awl = ngs->active_word_list[cf & 0x1];
775  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
776  rhmm = (root_chan_t *) ngs->word_chan[w];
777  if (hmm_frame(&rhmm->hmm) == cf) {
778  hmm_clear_scores(&rhmm->hmm);
779  }
780  }
781 }
782 
783 static void
784 fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
785 {
786  root_chan_t *rhmm;
787  chan_t *hmm;
788  int32 i, cf, w, *awl;
789 
790  cf = frame_idx;
791 
792  /* Renormalize individual word channels */
793  i = ngs->n_active_word[cf & 0x1];
794  awl = ngs->active_word_list[cf & 0x1];
795  for (w = *(awl++); i > 0; --i, w = *(awl++)) {
796  rhmm = (root_chan_t *) ngs->word_chan[w];
797  if (hmm_frame(&rhmm->hmm) == cf) {
798  hmm_normalize(&rhmm->hmm, norm);
799  }
800  for (hmm = rhmm->next; hmm; hmm = hmm->next) {
801  if (hmm_frame(&hmm->hmm) == cf) {
802  hmm_normalize(&hmm->hmm, norm);
803  }
804  }
805  }
806 
807  ngs->renormalized = TRUE;
808 }
809 
810 int
812 {
813  int16 const *senscr;
814  int32 nf, i, j;
815  int32 *nawl;
816 
817  /* Activate our HMMs for the current frame if need be. */
818  if (!ps_search_acmod(ngs)->compallsen)
819  compute_fwdflat_sen_active(ngs, frame_idx);
820 
821  /* Compute GMM scores for the current frame. */
822  senscr = acmod_score(ps_search_acmod(ngs), &frame_idx);
823  ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
824 
825  /* Mark backpointer table for current frame. */
826  ngram_search_mark_bptable(ngs, frame_idx);
827 
828  /* If the best score is equal to or worse than WORST_SCORE,
829  * recognition has failed, don't bother to keep trying. */
830  if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
831  return 0;
832  /* Renormalize if necessary */
833  if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
834  E_INFO("Renormalizing Scores at frame %d, best score %d\n",
835  frame_idx, ngs->best_score);
836  fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score);
837  }
838 
839  ngs->best_score = WORST_SCORE;
840  hmm_context_set_senscore(ngs->hmmctx, senscr);
841 
842  /* Evaluate HMMs */
843  fwdflat_eval_chan(ngs, frame_idx);
844  /* Prune HMMs and do phone transitions. */
845  fwdflat_prune_chan(ngs, frame_idx);
846  /* Do word transitions. */
847  fwdflat_word_transition(ngs, frame_idx);
848 
849  /* Create next active word list */
850  nf = frame_idx + 1;
851  nawl = ngs->active_word_list[nf & 0x1];
852  for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
853  if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) {
854  *(nawl++) = ngs->fwdflat_wordlist[i];
855  j++;
856  }
857  }
858  for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) {
859  if (bitvec_is_set(ngs->word_active, i)) {
860  *(nawl++) = i;
861  j++;
862  }
863  }
864  if (!ngs->fwdtree)
865  ++ngs->n_frame;
866  ngs->n_active_word[nf & 0x1] = j;
867 
868  /* Return the number of frames processed. */
869  return 1;
870 }
871 
875 static void
876 destroy_fwdflat_wordlist(ngram_search_t *ngs)
877 {
878  ps_latnode_t *node, *tnode;
879  int32 f;
880 
881  if (!ngs->fwdtree)
882  return;
883 
884  for (f = 0; f < ngs->n_frame; f++) {
885  for (node = ngs->frm_wordlist[f]; node; node = tnode) {
886  tnode = node->next;
887  listelem_free(ngs->latnode_alloc, node);
888  }
889  }
890 }
891 
895 static void
896 destroy_fwdflat_chan(ngram_search_t *ngs)
897 {
898  int32 i, wid;
899 
900  for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
901  root_chan_t *rhmm;
902  chan_t *thmm;
903  wid = ngs->fwdflat_wordlist[i];
904  if (dict_is_single_phone(ps_search_dict(ngs),wid))
905  continue;
906  assert(ngs->word_chan[wid] != NULL);
907 
908  /* The first HMM in ngs->word_chan[wid] was allocated with
909  * ngs->root_chan_alloc, but this will attempt to free it
910  * using ngs->chan_alloc, which will not work. Therefore we
911  * free it manually and move the list forward before handing
912  * it off. */
913  rhmm = (root_chan_t *)ngs->word_chan[wid];
914  thmm = rhmm->next;
915  listelem_free(ngs->root_chan_alloc, rhmm);
916  ngs->word_chan[wid] = thmm;
917  ngram_search_free_all_rc(ngs, wid);
918  }
919 }
920 
921 void
923 {
924  int32 cf;
925 
926  destroy_fwdflat_chan(ngs);
927  destroy_fwdflat_wordlist(ngs);
928  bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
929 
930  /* This is the number of frames processed. */
931  cf = ps_search_acmod(ngs)->output_frame;
932  /* Add a mark in the backpointer table for one past the final frame. */
933  ngram_search_mark_bptable(ngs, cf);
934 
935  ptmr_stop(&ngs->fwdflat_perf);
936  /* Print out some statistics. */
937  if (cf > 0) {
938  double n_speech = (double)(cf + 1)
939  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
940  E_INFO("%8d words recognized (%d/fr)\n",
941  ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
942  E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
943  (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
944  E_INFO("%8d channels searched (%d/fr)\n",
945  ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1));
946  E_INFO("%8d words searched (%d/fr)\n",
947  ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1));
948  E_INFO("%8d word transitions (%d/fr)\n",
949  ngs->st.n_fwdflat_word_transition,
950  ngs->st.n_fwdflat_word_transition / (cf + 1));
951  E_INFO("fwdflat %.2f CPU %.3f xRT\n",
952  ngs->fwdflat_perf.t_cpu,
953  ngs->fwdflat_perf.t_cpu / n_speech);
954  E_INFO("fwdflat %.2f wall %.3f xRT\n",
955  ngs->fwdflat_perf.t_elapsed,
956  ngs->fwdflat_perf.t_elapsed / n_speech);
957  }
958 }