PocketSphinx  0.6
phone_loop_search.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 #include <sphinxbase/err.h>
43 
44 #include "phone_loop_search.h"
45 
46 static int phone_loop_search_start(ps_search_t *search);
47 static int phone_loop_search_step(ps_search_t *search, int frame_idx);
48 static int phone_loop_search_finish(ps_search_t *search);
49 static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
50 static void phone_loop_search_free(ps_search_t *search);
51 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
52 static int32 phone_loop_search_prob(ps_search_t *search);
53 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
54 
55 static ps_searchfuncs_t phone_loop_search_funcs = {
56  /* name: */ "phone_loop",
57  /* start: */ phone_loop_search_start,
58  /* step: */ phone_loop_search_step,
59  /* finish: */ phone_loop_search_finish,
60  /* reinit: */ phone_loop_search_reinit,
61  /* free: */ phone_loop_search_free,
62  /* lattice: */ NULL,
63  /* hyp: */ phone_loop_search_hyp,
64  /* prob: */ phone_loop_search_prob,
65  /* seg_iter: */ phone_loop_search_seg_iter,
66 };
67 
68 static int
69 phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
70 {
72  cmd_ln_t *config = ps_search_config(search);
73  acmod_t *acmod = ps_search_acmod(search);
74  int i;
75 
76  /* Free old dict2pid, dict, if necessary. */
77  ps_search_base_reinit(search, dict, d2p);
78 
79  /* Initialize HMM context. */
80  if (pls->hmmctx)
81  hmm_context_free(pls->hmmctx);
82  pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
83  acmod->tmat->tp, NULL, acmod->mdef->sseq);
84  if (pls->hmmctx == NULL)
85  return -1;
86 
87  /* Initialize phone HMMs. */
88  if (pls->phones) {
89  for (i = 0; i < pls->n_phones; ++i)
90  hmm_deinit((hmm_t *)&pls->phones[i]);
91  ckd_free(pls->phones);
92  }
93  pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
94  pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones));
95  for (i = 0; i < pls->n_phones; ++i) {
96  pls->phones[i].ciphone = i;
97  hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i],
98  FALSE,
99  bin_mdef_pid2ssid(acmod->mdef, i),
100  bin_mdef_pid2tmatid(acmod->mdef, i));
101  }
102  pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam"));
103  pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam"));
104  pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip"));
105  E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
106  pls->beam, pls->pbeam, pls->pip);
107 
108  return 0;
109 }
110 
111 ps_search_t *
112 phone_loop_search_init(cmd_ln_t *config,
113  acmod_t *acmod,
114  dict_t *dict)
115 {
116  phone_loop_search_t *pls;
117 
118  /* Allocate and initialize. */
119  pls = ckd_calloc(1, sizeof(*pls));
120  ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
121  config, acmod, dict, NULL);
122  phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
123  ps_search_dict2pid(pls));
124 
125  return ps_search_base(pls);
126 }
127 
128 static void
129 phone_loop_search_free_renorm(phone_loop_search_t *pls)
130 {
131  gnode_t *gn;
132  for (gn = pls->renorm; gn; gn = gnode_next(gn))
133  ckd_free(gnode_ptr(gn));
134  glist_free(pls->renorm);
135  pls->renorm = NULL;
136 }
137 
138 static void
139 phone_loop_search_free(ps_search_t *search)
140 {
141  phone_loop_search_t *pls = (phone_loop_search_t *)search;
142  int i;
143 
144  ps_search_deinit(search);
145  for (i = 0; i < pls->n_phones; ++i)
146  hmm_deinit((hmm_t *)&pls->phones[i]);
147  phone_loop_search_free_renorm(pls);
148  ckd_free(pls->phones);
149  hmm_context_free(pls->hmmctx);
150  ckd_free(pls);
151 }
152 
153 static int
154 phone_loop_search_start(ps_search_t *search)
155 {
156  phone_loop_search_t *pls = (phone_loop_search_t *)search;
157  int i;
158 
159  /* Reset and enter all phone HMMs. */
160  for (i = 0; i < pls->n_phones; ++i) {
161  hmm_t *hmm = (hmm_t *)&pls->phones[i];
162  hmm_clear(hmm);
163  hmm_enter(hmm, 0, -1, 0);
164  }
165  phone_loop_search_free_renorm(pls);
166  pls->best_score = 0;
167 
168  return 0;
169 }
170 
171 static void
172 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
173 {
174  phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn));
175  int i;
176 
177  pls->renorm = glist_add_ptr(pls->renorm, rn);
178  rn->frame_idx = frame_idx;
179  rn->norm = norm;
180 
181  for (i = 0; i < pls->n_phones; ++i) {
182  hmm_normalize((hmm_t *)&pls->phones[i], norm);
183  }
184 }
185 
186 static int32
187 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
188 {
189  int32 bs = WORST_SCORE;
190  int i, bi;
191 
192  hmm_context_set_senscore(pls->hmmctx, senscr);
193 
194  bi = 0;
195  for (i = 0; i < pls->n_phones; ++i) {
196  hmm_t *hmm = (hmm_t *)&pls->phones[i];
197  int32 score;
198 
199  if (hmm_frame(hmm) < frame_idx)
200  continue;
201  score = hmm_vit_eval(hmm);
202  if (score BETTER_THAN bs) {
203  bs = score;
204  bi = i;
205  }
206  }
207  pls->best_score = bs;
208  return bs;
209 }
210 
211 static void
212 prune_hmms(phone_loop_search_t *pls, int frame_idx)
213 {
214  int32 thresh = pls->best_score + pls->beam;
215  int nf = frame_idx + 1;
216  int i;
217 
218  /* Check all phones to see if they remain active in the next frame. */
219  for (i = 0; i < pls->n_phones; ++i) {
220  hmm_t *hmm = (hmm_t *)&pls->phones[i];
221 
222  if (hmm_frame(hmm) < frame_idx)
223  continue;
224  /* Retain if score better than threshold. */
225  if (hmm_bestscore(hmm) BETTER_THAN thresh) {
226  hmm_frame(hmm) = nf;
227  }
228  else
229  hmm_clear_scores(hmm);
230  }
231 }
232 
233 static void
234 phone_transition(phone_loop_search_t *pls, int frame_idx)
235 {
236  int32 thresh = pls->best_score + pls->pbeam;
237  int nf = frame_idx + 1;
238  int i;
239 
240  /* Now transition out of phones whose last states are inside the
241  * phone transition beam. */
242  for (i = 0; i < pls->n_phones; ++i) {
243  hmm_t *hmm = (hmm_t *)&pls->phones[i];
244  int32 newphone_score;
245  int j;
246 
247  if (hmm_frame(hmm) != nf)
248  continue;
249 
250  newphone_score = hmm_out_score(hmm) + pls->pip;
251  if (newphone_score BETTER_THAN thresh) {
252  /* Transition into all phones using the usual Viterbi rule. */
253  for (j = 0; j < pls->n_phones; ++j) {
254  hmm_t *nhmm = (hmm_t *)&pls->phones[j];
255 
256  if (hmm_frame(nhmm) < frame_idx
257  || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
258  hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
259  }
260  }
261  }
262  }
263 }
264 
265 static int
266 phone_loop_search_step(ps_search_t *search, int frame_idx)
267 {
268  phone_loop_search_t *pls = (phone_loop_search_t *)search;
269  acmod_t *acmod = ps_search_acmod(search);
270  int16 const *senscr;
271  int i;
272 
273  /* All CI senones are active all the time. */
274  if (!ps_search_acmod(pls)->compallsen)
275  for (i = 0; i < pls->n_phones; ++i)
276  acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]);
277 
278  /* Calculate senone scores for current frame. */
279  senscr = acmod_score(acmod, &frame_idx);
280 
281  /* Renormalize, if necessary. */
282  if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
283  E_INFO("Renormalizing Scores at frame %d, best score %d\n",
284  frame_idx, pls->best_score);
285  renormalize_hmms(pls, frame_idx, pls->best_score);
286  }
287 
288  /* Evaluate phone HMMs for current frame. */
289  pls->best_score = evaluate_hmms(pls, senscr, frame_idx);
290 
291  /* Prune phone HMMs. */
292  prune_hmms(pls, frame_idx);
293 
294  /* Do phone transitions. */
295  phone_transition(pls, frame_idx);
296 
297  return 0;
298 }
299 
300 static int
301 phone_loop_search_finish(ps_search_t *search)
302 {
303  /* Actually nothing to do here really. */
304  return 0;
305 }
306 
307 static char const *
308 phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
309 {
310  E_WARN("Hypotheses are not returned from phone loop search");
311  return NULL;
312 }
313 
314 static int32
315 phone_loop_search_prob(ps_search_t *search)
316 {
317  /* FIXME: Actually... they ought to be. */
318  E_WARN("Posterior probabilities are not returned from phone loop search");
319  return 0;
320 }
321 
322 static ps_seg_t *
323 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
324 {
325  E_WARN("Hypotheses are not returned from phone loop search");
326  return NULL;
327 }