PocketSphinx  0.6
acmod.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 
44 /* System headers. */
45 #include <assert.h>
46 #include <string.h>
47 
48 /* SphinxBase headers. */
49 #include <sphinxbase/prim_type.h>
50 #include <sphinxbase/err.h>
51 #include <sphinxbase/cmd_ln.h>
52 #include <sphinxbase/strfuncs.h>
53 #include <sphinxbase/byteorder.h>
54 #include <sphinxbase/feat.h>
55 #include <sphinxbase/bio.h>
56 
57 /* Local headers. */
58 #include "cmdln_macro.h"
59 #include "acmod.h"
60 #include "s2_semi_mgau.h"
61 #include "ptm_mgau.h"
62 #include "ms_mgau.h"
63 
64 /* Feature and front-end parameters that may be in feat.params */
65 static const arg_t feat_defn[] = {
66  waveform_to_cepstral_command_line_macro(),
67  cepstral_to_feature_command_line_macro(),
68  CMDLN_EMPTY_OPTION
69 };
70 
71 #ifndef WORDS_BIGENDIAN
72 #define WORDS_BIGENDIAN 1
73 #endif
74 
75 static int32 acmod_process_mfcbuf(acmod_t *acmod);
76 
77 static int
78 acmod_init_am(acmod_t *acmod)
79 {
80  char const *mdeffn, *tmatfn, *mllrfn;
81 
82  /* Read model definition. */
83  if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
84  E_ERROR("Acoustic model definition is not specified neither with -mdef option nor with -hmm\n");
85  return -1;
86  }
87 
88  if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
89  E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
90  return -1;
91  }
92 
93  /* Read transition matrices. */
94  if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
95  E_ERROR("No tmat file specified\n");
96  return -1;
97  }
98  acmod->tmat = tmat_init(tmatfn, acmod->lmath,
99  cmd_ln_float32_r(acmod->config, "-tmatfloor"),
100  TRUE);
101 
102  /* Read the acoustic models. */
103  if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
104  || (cmd_ln_str_r(acmod->config, "-var") == NULL)
105  || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
106  E_ERROR("No mean/var/tmat files specified\n");
107  return -1;
108  }
109 
110  if (cmd_ln_str_r(acmod->config, "-senmgau")) {
111  E_INFO("Using general multi-stream GMM computation\n");
112  acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
113  if (acmod->mgau == NULL)
114  return -1;
115  }
116  else {
117  E_INFO("Attempting to use SCHMM computation module\n");
118  if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
119  E_INFO("Attempting to use PTHMM computation module\n");
120  if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
121  E_INFO("Falling back to general multi-stream GMM computation\n");
122  acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
123  if (acmod->mgau == NULL)
124  return -1;
125  }
126  }
127  }
128 
129  /* If there is an MLLR transform, apply it. */
130  if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
131  ps_mllr_t *mllr = ps_mllr_read(mllrfn);
132  if (mllr == NULL)
133  return -1;
134  acmod_update_mllr(acmod, mllr);
135  }
136 
137  return 0;
138 }
139 
140 static int
141 acmod_init_feat(acmod_t *acmod)
142 {
143  acmod->fcb =
144  feat_init(cmd_ln_str_r(acmod->config, "-feat"),
145  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
146  cmd_ln_boolean_r(acmod->config, "-varnorm"),
147  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
148  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
149  if (acmod->fcb == NULL)
150  return -1;
151 
152  if (cmd_ln_str_r(acmod->config, "-lda")) {
153  E_INFO("Reading linear feature transformation from %s\n",
154  cmd_ln_str_r(acmod->config, "-lda"));
155  if (feat_read_lda(acmod->fcb,
156  cmd_ln_str_r(acmod->config, "-lda"),
157  cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
158  return -1;
159  }
160 
161  if (cmd_ln_str_r(acmod->config, "-svspec")) {
162  int32 **subvecs;
163  E_INFO("Using subvector specification %s\n",
164  cmd_ln_str_r(acmod->config, "-svspec"));
165  if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
166  return -1;
167  if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
168  return -1;
169  }
170 
171  if (cmd_ln_exists_r(acmod->config, "-agcthresh")
172  && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
173  agc_set_threshold(acmod->fcb->agc_struct,
174  cmd_ln_float32_r(acmod->config, "-agcthresh"));
175  }
176 
177  if (acmod->fcb->cmn_struct
178  && cmd_ln_exists_r(acmod->config, "-cmninit")) {
179  char *c, *cc, *vallist;
180  int32 nvals;
181 
182  vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
183  c = vallist;
184  nvals = 0;
185  while (nvals < acmod->fcb->cmn_struct->veclen
186  && (cc = strchr(c, ',')) != NULL) {
187  *cc = '\0';
188  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
189  c = cc + 1;
190  ++nvals;
191  }
192  if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
193  acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
194  }
195  ckd_free(vallist);
196  }
197  return 0;
198 }
199 
200 int
201 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
202 {
203  /* Output vector dimension needs to be the same. */
204  if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
205  E_ERROR("Configured feature length %d doesn't match feature extraction output size %d\n",
206  cmd_ln_int32_r(acmod->config, "-ceplen"),
207  fe_get_output_size(fe));
208  return TRUE;
209  }
210  /* Feature parameters need to be the same. */
211  /* ... */
212  return FALSE;
213 }
214 
215 int
216 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
217 {
218  /* Feature type needs to be the same. */
219  if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
220  return TRUE;
221  /* Input vector dimension needs to be the same. */
222  if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
223  return TRUE;
224  /* FIXME: Need to check LDA and stuff too. */
225  return FALSE;
226 }
227 
228 acmod_t *
229 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
230 {
231  acmod_t *acmod;
232  char const *featparams;
233 
234  acmod = ckd_calloc(1, sizeof(*acmod));
235  acmod->config = config;
236  acmod->lmath = lmath;
237  acmod->state = ACMOD_IDLE;
238 
239  /* Look for feat.params in acoustic model dir. */
240  if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
241  if (cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE) != NULL) {
242  E_INFO("Parsed model-specific feature parameters from %s\n", featparams);
243  }
244  }
245 
246  /* Initialize feature computation. */
247  if (fe) {
248  if (acmod_fe_mismatch(acmod, fe))
249  goto error_out;
250  fe_retain(fe);
251  acmod->fe = fe;
252  }
253  else {
254  /* Initialize a new front end. */
255  cmd_ln_retain(config);
256  acmod->fe = fe_init_auto_r(config);
257  if (acmod->fe == NULL)
258  goto error_out;
259  if (acmod_fe_mismatch(acmod, acmod->fe))
260  goto error_out;
261  }
262  if (fcb) {
263  if (acmod_feat_mismatch(acmod, fcb))
264  goto error_out;
265  feat_retain(fcb);
266  acmod->fcb = fcb;
267  }
268  else {
269  /* Initialize a new fcb. */
270  if (acmod_init_feat(acmod) < 0)
271  goto error_out;
272  }
273 
274  /* Load acoustic model parameters. */
275  if (acmod_init_am(acmod) < 0)
276  goto error_out;
277 
278 
279  /* The MFCC buffer needs to be at least as large as the dynamic
280  * feature window. */
281  acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
282  acmod->mfc_buf = (mfcc_t **)
283  ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
284  sizeof(**acmod->mfc_buf));
285 
286  /* Feature buffer has to be at least as large as MFCC buffer. */
287  acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
288  acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
289  acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));
290 
291  /* Senone computation stuff. */
292  acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
293  sizeof(*acmod->senone_scores));
294  acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
295  acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
296  sizeof(*acmod->senone_active));
297  acmod->log_zero = logmath_get_zero(acmod->lmath);
298  acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
299  return acmod;
300 
301 error_out:
302  acmod_free(acmod);
303  return NULL;
304 }
305 
306 void
308 {
309  if (acmod == NULL)
310  return;
311 
312  feat_free(acmod->fcb);
313  fe_free(acmod->fe);
314 
315  if (acmod->mfc_buf)
316  ckd_free_2d((void **)acmod->mfc_buf);
317  if (acmod->feat_buf)
318  feat_array_free(acmod->feat_buf);
319 
320  if (acmod->mfcfh)
321  fclose(acmod->mfcfh);
322  if (acmod->rawfh)
323  fclose(acmod->rawfh);
324  if (acmod->senfh)
325  fclose(acmod->senfh);
326 
327  ckd_free(acmod->framepos);
328  ckd_free(acmod->senone_scores);
329  ckd_free(acmod->senone_active_vec);
330  ckd_free(acmod->senone_active);
331 
332  if (acmod->mdef)
333  bin_mdef_free(acmod->mdef);
334  if (acmod->tmat)
335  tmat_free(acmod->tmat);
336  if (acmod->mgau)
337  ps_mgau_free(acmod->mgau);
338  if (acmod->mllr)
339  ps_mllr_free(acmod->mllr);
340 
341  ckd_free(acmod);
342 }
343 
344 ps_mllr_t *
346 {
347  if (acmod->mllr)
348  ps_mllr_free(acmod->mllr);
349  acmod->mllr = mllr;
350  ps_mgau_transform(acmod->mgau, mllr);
351 
352  return mllr;
353 }
354 
355 int
356 acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
357 {
358  char nsenstr[64], logbasestr[64];
359 
360  sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
361  sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
362  return bio_writehdr(logfh,
363  "version", "0.1",
364  "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"),
365  "n_sen", nsenstr,
366  "logbase", logbasestr, NULL);
367 }
368 
369 int
370 acmod_set_senfh(acmod_t *acmod, FILE *logfh)
371 {
372  if (acmod->senfh)
373  fclose(acmod->senfh);
374  acmod->senfh = logfh;
375  if (logfh == NULL)
376  return 0;
377  return acmod_write_senfh_header(acmod, logfh);
378 }
379 
380 int
381 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
382 {
383  int rv = 0;
384 
385  if (acmod->mfcfh)
386  fclose(acmod->mfcfh);
387  acmod->mfcfh = logfh;
388  fwrite(&rv, 4, 1, acmod->mfcfh);
389  return rv;
390 }
391 
392 int
393 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
394 {
395  if (acmod->rawfh)
396  fclose(acmod->rawfh);
397  acmod->rawfh = logfh;
398  return 0;
399 }
400 
401 void
402 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
403 {
404  mfcc_t ***new_feat_buf;
405 
406  new_feat_buf = feat_array_alloc(acmod->fcb, nfr);
407  if (acmod->n_feat_frame || acmod->grow_feat) {
408  memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0],
409  (acmod->n_feat_alloc
410  * feat_dimension(acmod->fcb)
411  * sizeof(***acmod->feat_buf)));
412  }
413  feat_array_free(acmod->feat_buf);
414  acmod->framepos = ckd_realloc(acmod->framepos,
415  nfr * sizeof(*acmod->framepos));
416  acmod->feat_buf = new_feat_buf;
417  acmod->n_feat_alloc = nfr;
418 }
419 
420 int
421 acmod_set_grow(acmod_t *acmod, int grow_feat)
422 {
423  int tmp = acmod->grow_feat;
424  acmod->grow_feat = grow_feat;
425 
426  /* Expand feat_buf to a reasonable size to start with. */
427  if (grow_feat && acmod->n_feat_alloc < 128)
428  acmod_grow_feat_buf(acmod, 128);
429 
430  return tmp;
431 }
432 
433 int
435 {
436  fe_start_utt(acmod->fe);
437  acmod->state = ACMOD_STARTED;
438  acmod->n_mfc_frame = 0;
439  acmod->n_feat_frame = 0;
440  acmod->mfc_outidx = 0;
441  acmod->feat_outidx = 0;
442  acmod->output_frame = 0;
443  acmod->senscr_frame = -1;
444  acmod->n_senone_active = 0;
445  acmod->mgau->frame_idx = 0;
446  return 0;
447 }
448 
449 int
451 {
452  int32 nfr = 0;
453 
454  acmod->state = ACMOD_ENDED;
455  if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
456  int inptr;
457  /* Where to start writing them (circular buffer) */
458  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
459  /* nfr is always either zero or one. */
460  fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
461  acmod->n_mfc_frame += nfr;
462  /* Process whatever's left, and any leadout. */
463  if (nfr)
464  nfr = acmod_process_mfcbuf(acmod);
465  }
466  if (acmod->mfcfh) {
467  int32 outlen, rv;
468  outlen = (ftell(acmod->mfcfh) - 4) / 4;
469  if (!WORDS_BIGENDIAN)
470  SWAP_INT32(&outlen);
471  /* Try to seek and write */
472  if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
473  fwrite(&outlen, 4, 1, acmod->mfcfh);
474  }
475  fclose(acmod->mfcfh);
476  acmod->mfcfh = NULL;
477  }
478  if (acmod->rawfh) {
479  fclose(acmod->rawfh);
480  acmod->rawfh = NULL;
481  }
482 
483  if (acmod->senfh) {
484  fclose(acmod->senfh);
485  acmod->senfh = NULL;
486  }
487 
488  return nfr;
489 }
490 
491 static int
492 acmod_log_mfc(acmod_t *acmod,
493  mfcc_t **cep, int n_frames)
494 {
495  int i, n;
496  int32 *ptr = (int32 *)cep[0];
497 
498  n = n_frames * feat_cepsize(acmod->fcb);
499  /* Swap bytes. */
500  if (!WORDS_BIGENDIAN) {
501  for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
502  SWAP_INT32(ptr + i);
503  }
504  }
505  /* Write features. */
506  if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
507  E_ERROR_SYSTEM("Failed to write %d values to log file", n);
508  }
509 
510  /* Swap them back. */
511  if (!WORDS_BIGENDIAN) {
512  for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
513  SWAP_INT32(ptr + i);
514  }
515  }
516  return 0;
517 }
518 
519 static int
520 acmod_process_full_cep(acmod_t *acmod,
521  mfcc_t ***inout_cep,
522  int *inout_n_frames)
523 {
524  int32 nfr;
525 
526  /* Write to log file. */
527  if (acmod->mfcfh)
528  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
529 
530  /* Resize feat_buf to fit. */
531  if (acmod->n_feat_alloc < *inout_n_frames) {
532  feat_array_free(acmod->feat_buf);
533  acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
534  acmod->n_feat_alloc = *inout_n_frames;
535  acmod->n_feat_frame = 0;
536  acmod->feat_outidx = 0;
537  }
538  /* Make dynamic features. */
539  nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
540  TRUE, TRUE, acmod->feat_buf);
541  acmod->n_feat_frame = nfr;
542  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
543  *inout_cep += *inout_n_frames;
544  *inout_n_frames = 0;
545  return nfr;
546 }
547 
548 static int
549 acmod_process_full_raw(acmod_t *acmod,
550  int16 const **inout_raw,
551  size_t *inout_n_samps)
552 {
553  int32 nfr, ntail;
554  mfcc_t **cepptr;
555 
556  /* Write to logging file if any. */
557  if (acmod->rawfh)
558  fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
559  /* Resize mfc_buf to fit. */
560  if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
561  return -1;
562  if (acmod->n_mfc_alloc < nfr + 1) {
563  ckd_free_2d(acmod->mfc_buf);
564  acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
565  sizeof(**acmod->mfc_buf));
566  acmod->n_mfc_alloc = nfr + 1;
567  }
568  acmod->n_mfc_frame = 0;
569  acmod->mfc_outidx = 0;
570  fe_start_utt(acmod->fe);
571  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
572  acmod->mfc_buf, &nfr) < 0)
573  return -1;
574  fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
575  nfr += ntail;
576 
577  cepptr = acmod->mfc_buf;
578  nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
579  acmod->n_mfc_frame = 0;
580  return nfr;
581 }
582 
586 static int32
587 acmod_process_mfcbuf(acmod_t *acmod)
588 {
589  mfcc_t **mfcptr;
590  int32 ncep;
591 
592  ncep = acmod->n_mfc_frame;
593  /* Also do this in two parts because of the circular mfc_buf. */
594  if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
595  int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
596  int saved_state = acmod->state;
597 
598  /* Make sure we don't end the utterance here. */
599  if (acmod->state == ACMOD_ENDED)
600  acmod->state = ACMOD_PROCESSING;
601  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
602  ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
603  /* It's possible that not all available frames were filled. */
604  ncep -= ncep1;
605  acmod->n_mfc_frame -= ncep1;
606  acmod->mfc_outidx += ncep1;
607  acmod->mfc_outidx %= acmod->n_mfc_alloc;
608  /* Restore original state (could this really be the end) */
609  acmod->state = saved_state;
610  }
611  mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
612  ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
613  acmod->n_mfc_frame -= ncep;
614  acmod->mfc_outidx += ncep;
615  acmod->mfc_outidx %= acmod->n_mfc_alloc;
616  return ncep;
617 }
618 
619 int
621  int16 const **inout_raw,
622  size_t *inout_n_samps,
623  int full_utt)
624 {
625  int32 ncep;
626 
627  /* If this is a full utterance, process it all at once. */
628  if (full_utt)
629  return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
630 
631  /* Append MFCCs to the end of any that are previously in there
632  * (in practice, there will probably be none) */
633  if (inout_n_samps && *inout_n_samps) {
634  int16 const *prev_audio_inptr = *inout_raw;
635  int inptr;
636 
637  /* Total number of frames available. */
638  ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
639  /* Where to start writing them (circular buffer) */
640  inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
641 
642  /* Write them in two (or more) parts if there is wraparound. */
643  while (inptr + ncep > acmod->n_mfc_alloc) {
644  int32 ncep1 = acmod->n_mfc_alloc - inptr;
645  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
646  acmod->mfc_buf + inptr, &ncep1) < 0)
647  return -1;
648  /* Write to logging file if any. */
649  if (acmod->rawfh) {
650  fwrite(prev_audio_inptr, 2,
651  *inout_raw - prev_audio_inptr,
652  acmod->rawfh);
653  prev_audio_inptr = *inout_raw;
654  }
655  /* ncep1 now contains the number of frames actually
656  * processed. This is a good thing, but it means we
657  * actually still might have some room left at the end of
658  * the buffer, hence the while loop. Unfortunately it
659  * also means that in the case where we are really
660  * actually done, we need to get out totally, hence the
661  * goto. */
662  acmod->n_mfc_frame += ncep1;
663  ncep -= ncep1;
664  inptr += ncep1;
665  inptr %= acmod->n_mfc_alloc;
666  if (ncep1 == 0)
667  goto alldone;
668  }
669  assert(inptr + ncep <= acmod->n_mfc_alloc);
670  if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
671  acmod->mfc_buf + inptr, &ncep) < 0)
672  return -1;
673  /* Write to logging file if any. */
674  if (acmod->rawfh) {
675  fwrite(prev_audio_inptr, 2,
676  *inout_raw - prev_audio_inptr, acmod->rawfh);
677  prev_audio_inptr = *inout_raw;
678  }
679  acmod->n_mfc_frame += ncep;
680  alldone:
681  ;
682  }
683 
684  /* Hand things off to acmod_process_cep. */
685  return acmod_process_mfcbuf(acmod);
686 }
687 
688 int
690  mfcc_t ***inout_cep,
691  int *inout_n_frames,
692  int full_utt)
693 {
694  int32 nfeat, ncep, inptr;
695  int orig_n_frames;
696 
697  /* If this is a full utterance, process it all at once. */
698  if (full_utt)
699  return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
700 
701  /* Write to log file. */
702  if (acmod->mfcfh)
703  acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
704 
705  /* Maximum number of frames we're going to generate. */
706  orig_n_frames = ncep = nfeat = *inout_n_frames;
707 
708  /* FIXME: This behaviour isn't guaranteed... */
709  if (acmod->state == ACMOD_ENDED)
710  nfeat += feat_window_size(acmod->fcb);
711  else if (acmod->state == ACMOD_STARTED)
712  nfeat -= feat_window_size(acmod->fcb);
713 
714  /* Clamp number of features to fit available space. */
715  if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
716  /* Grow it as needed - we have to grow it at the end of an
717  * utterance because we can't return a short read there. */
718  if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
719  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
720  else
721  ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
722  }
723 
724  /* Where to start writing in the feature buffer. */
725  if (acmod->grow_feat) {
726  /* Grow to avoid wraparound if grow_feat == TRUE. */
727  inptr = acmod->feat_outidx + acmod->n_feat_frame;
728  while (inptr + nfeat >= acmod->n_feat_alloc)
729  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
730  }
731  else {
732  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
733  }
734 
735  /* Write them in two parts if there is wraparound. */
736  if (inptr + nfeat > acmod->n_feat_alloc) {
737  int32 ncep1 = acmod->n_feat_alloc - inptr;
738  int saved_state = acmod->state;
739 
740  /* Make sure we don't end the utterance here. */
741  if (acmod->state == ACMOD_ENDED)
742  acmod->state = ACMOD_PROCESSING;
743  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
744  &ncep1,
745  (acmod->state == ACMOD_STARTED),
746  (acmod->state == ACMOD_ENDED),
747  acmod->feat_buf + inptr);
748  if (nfeat < 0)
749  return -1;
750  /* Move the output feature pointer forward. */
751  acmod->n_feat_frame += nfeat;
752  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
753  inptr += nfeat;
754  inptr %= acmod->n_feat_alloc;
755  /* Move the input feature pointers forward. */
756  *inout_n_frames -= ncep1;
757  *inout_cep += ncep1;
758  ncep -= ncep1;
759  /* Restore original state (could this really be the end) */
760  acmod->state = saved_state;
761  }
762 
763  nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
764  &ncep,
765  (acmod->state == ACMOD_STARTED),
766  (acmod->state == ACMOD_ENDED),
767  acmod->feat_buf + inptr);
768  if (nfeat < 0)
769  return -1;
770  acmod->n_feat_frame += nfeat;
771  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
772  /* Move the input feature pointers forward. */
773  *inout_n_frames -= ncep;
774  *inout_cep += ncep;
775  if (acmod->state == ACMOD_STARTED)
776  acmod->state = ACMOD_PROCESSING;
777  return orig_n_frames - *inout_n_frames;
778 }
779 
780 int
782  mfcc_t **feat)
783 {
784  int i, inptr;
785 
786  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
787  if (acmod->grow_feat)
788  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
789  else
790  return 0;
791  }
792 
793  if (acmod->grow_feat) {
794  /* Grow to avoid wraparound if grow_feat == TRUE. */
795  inptr = acmod->feat_outidx + acmod->n_feat_frame;
796  while (inptr + 1 >= acmod->n_feat_alloc)
797  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
798  }
799  else {
800  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
801  }
802  for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
803  memcpy(acmod->feat_buf[inptr][i],
804  feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
805  ++acmod->n_feat_frame;
806  assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
807 
808  return 1;
809 }
810 
811 static int
812 acmod_read_senfh_header(acmod_t *acmod)
813 {
814  char **name, **val;
815  int32 swap;
816  int i;
817 
818  if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
819  goto error_out;
820  for (i = 0; name[i] != NULL; ++i) {
821  if (!strcmp(name[i], "n_sen")) {
822  if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
823  E_ERROR("Number of senones in senone file (%d) does not match mdef (%d)\n",
824  atoi(val[i]), bin_mdef_n_sen(acmod->mdef));
825  goto error_out;
826  }
827  }
828  if (!strcmp(name[i], "logbase")) {
829  if (abs(atof(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
830  E_ERROR("Logbase in senone file (%f) does not match acmod (%f)\n",
831  atof(val[i]), logmath_get_base(acmod->lmath));
832  goto error_out;
833  }
834  }
835  }
836  acmod->insen_swap = swap;
837  bio_hdrarg_free(name, val);
838  return 0;
839 error_out:
840  bio_hdrarg_free(name, val);
841  return -1;
842 }
843 
844 int
845 acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
846 {
847  acmod->insenfh = senfh;
848  if (senfh == NULL) {
849  acmod->n_feat_frame = 0;
850  acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");
851  return 0;
852  }
853  acmod->compallsen = TRUE;
854  return acmod_read_senfh_header(acmod);
855 }
856 
857 int
859 {
860  /* If the feature buffer is circular, this is not possible. */
861  if (acmod->output_frame > acmod->n_feat_alloc) {
862  E_ERROR("Circular feature buffer cannot be rewound (output frame %d, alloc %d)\n",
863  acmod->output_frame, acmod->n_feat_alloc);
864  return -1;
865  }
866 
867  /* Frames consumed + frames available */
868  acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
869 
870  /* Reset output pointers. */
871  acmod->feat_outidx = 0;
872  acmod->output_frame = 0;
873  acmod->senscr_frame = -1;
874  acmod->mgau->frame_idx = 0;
875 
876  return 0;
877 }
878 
879 int
881 {
882  /* Advance the output pointers. */
883  if (++acmod->feat_outidx == acmod->n_feat_alloc)
884  acmod->feat_outidx = 0;
885  --acmod->n_feat_frame;
886  ++acmod->mgau->frame_idx;
887 
888  return ++acmod->output_frame;
889 }
890 
891 int
892 acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active,
893  int16 const *senscr, FILE *senfh)
894 {
895  int16 n_active2;
896 
897  /* Uncompressed frame format:
898  *
899  * (2 bytes) n_active: Number of active senones
900  * If all senones active:
901  * (n_active * 2 bytes) scores of active senones
902  *
903  * Otherwise:
904  * (2 bytes) n_active: Number of active senones
905  * (n_active bytes) deltas to active senones
906  * (n_active * 2 bytes) scores of active senones
907  */
908  n_active2 = n_active;
909  if (fwrite(&n_active2, 2, 1, senfh) != 1)
910  goto error_out;
911  if (n_active == bin_mdef_n_sen(acmod->mdef)) {
912  if (fwrite(senscr, 2, n_active, senfh) != n_active)
913  goto error_out;
914  }
915  else {
916  int i, n;
917  if (fwrite(active, 1, n_active, senfh) != n_active)
918  goto error_out;
919  for (i = n = 0; i < n_active; ++i) {
920  n += active[i];
921  if (fwrite(senscr + n, 2, 1, senfh) != 1)
922  goto error_out;
923  }
924  }
925  return 0;
926 error_out:
927  E_ERROR_SYSTEM("Failed to write frame to senone file");
928  return -1;
929 }
930 
934 static int
935 acmod_read_scores_internal(acmod_t *acmod)
936 {
937  FILE *senfh = acmod->insenfh;
938  int16 n_active;
939  int rv;
940 
941  if (acmod->n_feat_frame == acmod->n_feat_alloc) {
942  if (acmod->grow_feat)
943  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
944  else
945  return 0;
946  }
947 
948  if (senfh == NULL)
949  return -1;
950  if ((rv = fread(&n_active, 2, 1, senfh)) < 0)
951  goto error_out;
952  else if (rv == 0)
953  return 0;
954 
955  acmod->n_senone_active = n_active;
956  if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) {
957  if ((rv = fread(acmod->senone_scores, 2,
958  acmod->n_senone_active, senfh)) < 0)
959  goto error_out;
960  else if (rv != acmod->n_senone_active)
961  return 0;
962  }
963  else {
964  int i, n;
965  if ((rv = fread(acmod->senone_active, 1,
966  acmod->n_senone_active, senfh)) < 0)
967  goto error_out;
968  else if (rv != acmod->n_senone_active)
969  return 0;
970  for (i = 0, n = 0; i < acmod->n_senone_active; ++i) {
971  int j, sen = n + acmod->senone_active[i];
972  for (j = n + 1; j < sen; ++j)
973  acmod->senone_scores[j] = SENSCR_DUMMY;
974  if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) < 0)
975  goto error_out;
976  else if (rv == 0)
977  return 0;
978  n = sen;
979  }
980  ++n;
981  while (n < bin_mdef_n_sen(acmod->mdef))
982  acmod->senone_scores[n++] = SENSCR_DUMMY;
983  }
984  return 1;
985 error_out:
986  E_ERROR_SYSTEM("Failed to read frame from senone file");
987  return -1;
988 }
989 
990 int
992 {
993  int inptr, rv;
994 
995  if (acmod->grow_feat) {
996  /* Grow to avoid wraparound if grow_feat == TRUE. */
997  inptr = acmod->feat_outidx + acmod->n_feat_frame;
998  /* Has to be +1, otherwise, next time acmod_advance() is
999  * called, this will wrap around. */
1000  while (inptr + 1 >= acmod->n_feat_alloc)
1001  acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
1002  }
1003  else {
1004  inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
1005  }
1006 
1007  if ((rv = acmod_read_scores_internal(acmod)) != 1)
1008  return rv;
1009 
1010  /* Set acmod->senscr_frame appropriately so that these scores
1011  get reused below in acmod_score(). */
1012  acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame;
1013 
1014  E_DEBUG(1,("Frame %d has %d active states\n",
1015  acmod->senscr_frame, acmod->n_senone_active));
1016 
1017  /* Increment the "feature frame counter" and record the file
1018  * position for the relevant frame in the (possibly circular)
1019  * buffer. */
1020  ++acmod->n_feat_frame;
1021  acmod->framepos[inptr] = ftell(acmod->insenfh);
1022 
1023  return 1;
1024 }
1025 
1026 static int
1027 calc_frame_idx(acmod_t *acmod, int *inout_frame_idx)
1028 {
1029  int frame_idx;
1030 
1031  /* Calculate the absolute frame index to be scored. */
1032  if (inout_frame_idx == NULL)
1033  frame_idx = acmod->output_frame;
1034  else if (*inout_frame_idx < 0)
1035  frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
1036  else
1037  frame_idx = *inout_frame_idx;
1038 
1039  return frame_idx;
1040 }
1041 
1042 static int
1043 calc_feat_idx(acmod_t *acmod, int frame_idx)
1044 {
1045  int n_backfr, feat_idx;
1046 
1047  n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
1048  if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
1049  E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), cannot score\n",
1050  frame_idx, acmod->n_feat_frame, acmod->n_feat_alloc,
1051  acmod->output_frame - frame_idx, n_backfr);
1052  return -1;
1053  }
1054 
1055  /* Get the index in feat_buf/framepos of the frame to be scored. */
1056  feat_idx = ((acmod->feat_outidx + frame_idx - acmod->output_frame)
1057  % acmod->n_feat_alloc);
1058  if (feat_idx < 0) feat_idx += acmod->n_feat_alloc;
1059 
1060  return feat_idx;
1061 }
1062 
1063 mfcc_t **
1064 acmod_get_frame(acmod_t *acmod, int *inout_frame_idx)
1065 {
1066  int frame_idx, feat_idx;
1067 
1068  /* Calculate the absolute frame index requested. */
1069  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1070 
1071  /* Calculate position of requested frame in circular buffer. */
1072  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1073  return NULL;
1074 
1075  if (inout_frame_idx)
1076  *inout_frame_idx = frame_idx;
1077 
1078  return acmod->feat_buf[feat_idx];
1079 }
1080 
1081 int16 const *
1082 acmod_score(acmod_t *acmod, int *inout_frame_idx)
1083 {
1084  int frame_idx, feat_idx;
1085 
1086  /* Calculate the absolute frame index to be scored. */
1087  frame_idx = calc_frame_idx(acmod, inout_frame_idx);
1088 
1089  /* If all senones are being computed, or we are using a senone file,
1090  then we can reuse existing scores. */
1091  if ((acmod->compallsen || acmod->insenfh)
1092  && frame_idx == acmod->senscr_frame) {
1093  if (inout_frame_idx)
1094  *inout_frame_idx = frame_idx;
1095  return acmod->senone_scores;
1096  }
1097 
1098  /* Calculate position of requested frame in circular buffer. */
1099  if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0)
1100  return NULL;
1101 
1102  /* If there is an input senone file locate the appropriate frame and read it. */
1103  if (acmod->insenfh) {
1104  fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET);
1105  if (acmod_read_scores_internal(acmod) < 0)
1106  return NULL;
1107  }
1108  else {
1109  /* Build active senone list. */
1110  acmod_flags2list(acmod);
1111 
1112  /* Generate scores for the next available frame */
1113  ps_mgau_frame_eval(acmod->mgau,
1114  acmod->senone_scores,
1115  acmod->senone_active,
1116  acmod->n_senone_active,
1117  acmod->feat_buf[feat_idx],
1118  frame_idx,
1119  acmod->compallsen);
1120  }
1121 
1122  if (inout_frame_idx)
1123  *inout_frame_idx = frame_idx;
1124  acmod->senscr_frame = frame_idx;
1125 
1126  /* Dump scores to the senone dump file if one exists. */
1127  if (acmod->senfh) {
1128  if (acmod_write_scores(acmod, acmod->n_senone_active,
1129  acmod->senone_active,
1130  acmod->senone_scores,
1131  acmod->senfh) < 0)
1132  return NULL;
1133  E_DEBUG(1,("Frame %d has %d active states\n", frame_idx, acmod->n_senone_active));
1134  }
1135 
1136  return acmod->senone_scores;
1137 }
1138 
1139 int
1140 acmod_best_score(acmod_t *acmod, int *out_best_senid)
1141 {
1142  int i, best;
1143 
1144  best = SENSCR_DUMMY;
1145  if (acmod->compallsen) {
1146  for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
1147  if (acmod->senone_scores[i] < best) {
1148  best = acmod->senone_scores[i];
1149  *out_best_senid = i;
1150  }
1151  }
1152  }
1153  else {
1154  int16 *senscr;
1155  senscr = acmod->senone_scores;
1156  for (i = 0; i < acmod->n_senone_active; ++i) {
1157  senscr += acmod->senone_active[i];
1158  if (*senscr < best) {
1159  best = *senscr;
1160  *out_best_senid = i;
1161  }
1162  }
1163  }
1164  return best;
1165 }
1166 
1167 
1168 void
1170 {
1171  if (acmod->compallsen)
1172  return;
1173  bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
1174  acmod->n_senone_active = 0;
1175 }
1176 
1177 #define MPX_BITVEC_SET(a,h,i) \
1178  if (hmm_mpx_ssid(h,i) != BAD_SSID) \
1179  bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
1180 #define NONMPX_BITVEC_SET(a,h,i) \
1181  bitvec_set((a)->senone_active_vec, \
1182  hmm_nonmpx_senid(h,i))
1183 
1184 void
1186 {
1187  int i;
1188 
1189  if (acmod->compallsen)
1190  return;
1191  if (hmm_is_mpx(hmm)) {
1192  switch (hmm_n_emit_state(hmm)) {
1193  case 5:
1194  MPX_BITVEC_SET(acmod, hmm, 4);
1195  MPX_BITVEC_SET(acmod, hmm, 3);
1196  case 3:
1197  MPX_BITVEC_SET(acmod, hmm, 2);
1198  MPX_BITVEC_SET(acmod, hmm, 1);
1199  MPX_BITVEC_SET(acmod, hmm, 0);
1200  break;
1201  default:
1202  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1203  MPX_BITVEC_SET(acmod, hmm, i);
1204  }
1205  }
1206  }
1207  else {
1208  switch (hmm_n_emit_state(hmm)) {
1209  case 5:
1210  NONMPX_BITVEC_SET(acmod, hmm, 4);
1211  NONMPX_BITVEC_SET(acmod, hmm, 3);
1212  case 3:
1213  NONMPX_BITVEC_SET(acmod, hmm, 2);
1214  NONMPX_BITVEC_SET(acmod, hmm, 1);
1215  NONMPX_BITVEC_SET(acmod, hmm, 0);
1216  break;
1217  default:
1218  for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
1219  NONMPX_BITVEC_SET(acmod, hmm, i);
1220  }
1221  }
1222  }
1223 }
1224 
1225 int32
1227 {
1228  int32 w, l, n, b, total_dists, total_words, extra_bits;
1229  bitvec_t *flagptr;
1230 
1231  total_dists = bin_mdef_n_sen(acmod->mdef);
1232  if (acmod->compallsen) {
1233  acmod->n_senone_active = total_dists;
1234  return total_dists;
1235  }
1236  total_words = total_dists / BITVEC_BITS;
1237  extra_bits = total_dists % BITVEC_BITS;
1238  w = n = l = 0;
1239  for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
1240  if (*flagptr == 0)
1241  continue;
1242  for (b = 0; b < BITVEC_BITS; ++b) {
1243  if (*flagptr & (1UL << b)) {
1244  int32 sen = w * BITVEC_BITS + b;
1245  int32 delta = sen - l;
1246  /* Handle excessive deltas "lossily" by adding a few
1247  extra senones to bridge the gap. */
1248  while (delta > 255) {
1249  acmod->senone_active[n++] = 255;
1250  delta -= 255;
1251  }
1252  acmod->senone_active[n++] = delta;
1253  l = sen;
1254  }
1255  }
1256  }
1257 
1258  for (b = 0; b < extra_bits; ++b) {
1259  if (*flagptr & (1UL << b)) {
1260  int32 sen = w * BITVEC_BITS + b;
1261  int32 delta = sen - l;
1262  /* Handle excessive deltas "lossily" by adding a few
1263  extra senones to bridge the gap. */
1264  while (delta > 255) {
1265  acmod->senone_active[n++] = 255;
1266  delta -= 255;
1267  }
1268  acmod->senone_active[n++] = delta;
1269  l = sen;
1270  }
1271  }
1272 
1273  acmod->n_senone_active = n;
1274  E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
1275  acmod->n_senone_active, acmod->output_frame));
1276  return n;
1277 }