PocketSphinx  0.6
s2_semi_mgau.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers */
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <math.h>
45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
48 #endif
49 
50 #ifndef M_PI
51 #define M_PI 3.14159265358979323846
52 #endif
53 
54 /* SphinxBase headers */
55 #include <sphinx_config.h>
56 #include <sphinxbase/cmd_ln.h>
57 #include <sphinxbase/fixpoint.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/bio.h>
60 #include <sphinxbase/err.h>
61 #include <sphinxbase/prim_type.h>
62 
63 /* Local headers */
64 #include "s2_semi_mgau.h"
65 #include "tied_mgau_common.h"
66 #include "posixwin32.h"
67 
68 static ps_mgaufuncs_t s2_semi_mgau_funcs = {
69  "s2_semi",
70  &s2_semi_mgau_frame_eval, /* frame_eval */
71  &s2_semi_mgau_mllr_transform, /* transform */
72  &s2_semi_mgau_free /* free */
73 };
74 
75 struct vqFeature_s {
76  int32 score; /* score or distance */
77  int32 codeword; /* codeword (vector index) */
78 };
79 
80 static void
81 eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
82 {
83  int i, ceplen;
84  vqFeature_t *topn;
85 
86  topn = s->f[feat];
87  ceplen = s->veclen[feat];
88 
89  for (i = 0; i < s->max_topn; i++) {
90  mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */
91  vqFeature_t vtmp;
92  mfcc_t *var, d;
93  mfcc_t *obs;
94  int32 cw, j;
95 
96  cw = topn[i].codeword;
97  mean = s->means[feat][0] + cw * ceplen;
98  var = s->vars[feat][0] + cw * ceplen;
99  d = s->dets[feat][cw];
100  obs = z;
101  for (j = 0; j < ceplen; j++) {
102  diff = *obs++ - *mean++;
103  sqdiff = MFCCMUL(diff, diff);
104  compl = MFCCMUL(sqdiff, *var);
105  d = GMMSUB(d, compl);
106  ++var;
107  }
108  topn[i].score = (int32)d;
109  if (i == 0)
110  continue;
111  vtmp = topn[i];
112  for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) {
113  topn[j + 1] = topn[j];
114  }
115  topn[j + 1] = vtmp;
116  }
117 }
118 
119 static void
120 eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z)
121 {
122  vqFeature_t *worst, *best, *topn;
123  mfcc_t *mean;
124  mfcc_t *var, *det, *detP, *detE;
125  int32 i, ceplen;
126 
127  best = topn = s->f[feat];
128  worst = topn + (s->max_topn - 1);
129  mean = s->means[feat][0];
130  var = s->vars[feat][0];
131  det = s->dets[feat];
132  detE = det + s->n_density;
133  ceplen = s->veclen[feat];
134 
135  for (detP = det; detP < detE; ++detP) {
136  mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */
137  mfcc_t d;
138  mfcc_t *obs;
139  vqFeature_t *cur;
140  int32 cw, j;
141 
142  d = *detP;
143  obs = z;
144  cw = detP - det;
145  for (j = 0; (j < ceplen) && (d >= worst->score); ++j) {
146  diff = *obs++ - *mean++;
147  sqdiff = MFCCMUL(diff, diff);
148  compl = MFCCMUL(sqdiff, *var);
149  d = GMMSUB(d, compl);
150  ++var;
151  }
152  if (j < ceplen) {
153  /* terminated early, so not in topn */
154  mean += (ceplen - j);
155  var += (ceplen - j);
156  continue;
157  }
158  if ((int32)d < worst->score)
159  continue;
160  for (i = 0; i < s->max_topn; i++) {
161  /* already there, so don't need to insert */
162  if (topn[i].codeword == cw)
163  break;
164  }
165  if (i < s->max_topn)
166  continue; /* already there. Don't insert */
167  /* remaining code inserts codeword and dist in correct spot */
168  for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur)
169  memcpy(cur + 1, cur, sizeof(vqFeature_t));
170  ++cur;
171  cur->codeword = cw;
172  cur->score = (int32)d;
173  }
174 }
175 
176 static void
177 mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z)
178 {
179  eval_topn(s, feat, z);
180 
181  /* If this frame is skipped, do nothing else. */
182  if (frame % s->ds_ratio)
183  return;
184 
185  /* Evaluate the rest of the codebook (or subset thereof). */
186  eval_cb(s, feat, z);
187 }
188 
189 static int
190 mgau_norm(s2_semi_mgau_t *s, int feat)
191 {
192  int32 norm;
193  int j;
194 
195  /* Compute quantized normalizing constant. */
196  norm = s->f[feat][0].score >> SENSCR_SHIFT;
197 
198  /* Normalize the scores, negate them, and clamp their dynamic range. */
199  for (j = 0; j < s->max_topn; ++j) {
200  s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm);
201  if (s->f[feat][j].score > MAX_NEG_ASCR)
202  s->f[feat][j].score = MAX_NEG_ASCR;
203  if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat])
204  break;
205  }
206  return j;
207 }
208 
209 static int32
210 get_scores_8b_feat_6(s2_semi_mgau_t * s, int i,
211  int16 *senone_scores, uint8 *senone_active,
212  int32 n_senone_active)
213 {
214  int32 j, l;
215  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
216 
217  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
218  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
219  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
220  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
221  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
222  pid_cw5 = s->mixw[i][s->f[i][5].codeword];
223 
224  for (l = j = 0; j < n_senone_active; j++) {
225  int sen = senone_active[j] + l;
226  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
227 
228  tmp = fast_logmath_add(s->lmath_8b, tmp,
229  pid_cw1[sen] + s->f[i][1].score);
230  tmp = fast_logmath_add(s->lmath_8b, tmp,
231  pid_cw2[sen] + s->f[i][2].score);
232  tmp = fast_logmath_add(s->lmath_8b, tmp,
233  pid_cw3[sen] + s->f[i][3].score);
234  tmp = fast_logmath_add(s->lmath_8b, tmp,
235  pid_cw4[sen] + s->f[i][4].score);
236  tmp = fast_logmath_add(s->lmath_8b, tmp,
237  pid_cw5[sen] + s->f[i][5].score);
238 
239  senone_scores[sen] += tmp;
240  l = sen;
241  }
242  return 0;
243 }
244 
245 static int32
246 get_scores_8b_feat_5(s2_semi_mgau_t * s, int i,
247  int16 *senone_scores, uint8 *senone_active,
248  int32 n_senone_active)
249 {
250  int32 j, l;
251  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
252 
253  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
254  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
255  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
256  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
257  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
258 
259  for (l = j = 0; j < n_senone_active; j++) {
260  int sen = senone_active[j] + l;
261  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
262 
263  tmp = fast_logmath_add(s->lmath_8b, tmp,
264  pid_cw1[sen] + s->f[i][1].score);
265  tmp = fast_logmath_add(s->lmath_8b, tmp,
266  pid_cw2[sen] + s->f[i][2].score);
267  tmp = fast_logmath_add(s->lmath_8b, tmp,
268  pid_cw3[sen] + s->f[i][3].score);
269  tmp = fast_logmath_add(s->lmath_8b, tmp,
270  pid_cw4[sen] + s->f[i][4].score);
271 
272  senone_scores[sen] += tmp;
273  l = sen;
274  }
275  return 0;
276 }
277 
278 static int32
279 get_scores_8b_feat_4(s2_semi_mgau_t * s, int i,
280  int16 *senone_scores, uint8 *senone_active,
281  int32 n_senone_active)
282 {
283  int32 j, l;
284  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
285 
286  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
287  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
288  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
289  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
290 
291  for (l = j = 0; j < n_senone_active; j++) {
292  int sen = senone_active[j] + l;
293  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
294 
295  tmp = fast_logmath_add(s->lmath_8b, tmp,
296  pid_cw1[sen] + s->f[i][1].score);
297  tmp = fast_logmath_add(s->lmath_8b, tmp,
298  pid_cw2[sen] + s->f[i][2].score);
299  tmp = fast_logmath_add(s->lmath_8b, tmp,
300  pid_cw3[sen] + s->f[i][3].score);
301 
302  senone_scores[sen] += tmp;
303  l = sen;
304  }
305  return 0;
306 }
307 
308 static int32
309 get_scores_8b_feat_3(s2_semi_mgau_t * s, int i,
310  int16 *senone_scores, uint8 *senone_active,
311  int32 n_senone_active)
312 {
313  int32 j, l;
314  uint8 *pid_cw0, *pid_cw1, *pid_cw2;
315 
316  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
317  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
318  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
319 
320  for (l = j = 0; j < n_senone_active; j++) {
321  int sen = senone_active[j] + l;
322  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
323 
324  tmp = fast_logmath_add(s->lmath_8b, tmp,
325  pid_cw1[sen] + s->f[i][1].score);
326  tmp = fast_logmath_add(s->lmath_8b, tmp,
327  pid_cw2[sen] + s->f[i][2].score);
328 
329  senone_scores[sen] += tmp;
330  l = sen;
331  }
332  return 0;
333 }
334 
335 static int32
336 get_scores_8b_feat_2(s2_semi_mgau_t * s, int i,
337  int16 *senone_scores, uint8 *senone_active,
338  int32 n_senone_active)
339 {
340  int32 j, l;
341  uint8 *pid_cw0, *pid_cw1;
342 
343  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
344  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
345 
346  for (l = j = 0; j < n_senone_active; j++) {
347  int sen = senone_active[j] + l;
348  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
349 
350  tmp = fast_logmath_add(s->lmath_8b, tmp,
351  pid_cw1[sen] + s->f[i][1].score);
352 
353  senone_scores[sen] += tmp;
354  l = sen;
355  }
356  return 0;
357 }
358 
359 static int32
360 get_scores_8b_feat_1(s2_semi_mgau_t * s, int i,
361  int16 *senone_scores, uint8 *senone_active,
362  int32 n_senone_active)
363 {
364  int32 j, l;
365  uint8 *pid_cw0;
366 
367  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
368  for (l = j = 0; j < n_senone_active; j++) {
369  int sen = senone_active[j] + l;
370  int32 tmp = pid_cw0[sen] + s->f[i][0].score;
371  senone_scores[sen] += tmp;
372  l = sen;
373  }
374  return 0;
375 }
376 
377 static int32
378 get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn,
379  int16 *senone_scores, uint8 *senone_active,
380  int32 n_senone_active)
381 {
382  int32 j, k, l;
383 
384  for (l = j = 0; j < n_senone_active; j++) {
385  int sen = senone_active[j] + l;
386  uint8 *pid_cw;
387  int32 tmp;
388  pid_cw = s->mixw[i][s->f[i][0].codeword];
389  tmp = pid_cw[sen] + s->f[i][0].score;
390  for (k = 1; k < topn; ++k) {
391  pid_cw = s->mixw[i][s->f[i][k].codeword];
392  tmp = fast_logmath_add(s->lmath_8b, tmp,
393  pid_cw[sen] + s->f[i][k].score);
394  }
395  senone_scores[sen] += tmp;
396  l = sen;
397  }
398  return 0;
399 }
400 
401 static int32
402 get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn,
403  int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
404 {
405  switch (topn) {
406  case 6:
407  return get_scores_8b_feat_6(s, i, senone_scores,
408  senone_active, n_senone_active);
409  case 5:
410  return get_scores_8b_feat_5(s, i, senone_scores,
411  senone_active, n_senone_active);
412  case 4:
413  return get_scores_8b_feat_4(s, i, senone_scores,
414  senone_active, n_senone_active);
415  case 3:
416  return get_scores_8b_feat_3(s, i, senone_scores,
417  senone_active, n_senone_active);
418  case 2:
419  return get_scores_8b_feat_2(s, i, senone_scores,
420  senone_active, n_senone_active);
421  case 1:
422  return get_scores_8b_feat_1(s, i, senone_scores,
423  senone_active, n_senone_active);
424  default:
425  return get_scores_8b_feat_any(s, i, topn, senone_scores,
426  senone_active, n_senone_active);
427  }
428 }
429 
430 static int32
431 get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
432 {
433  int32 j, k;
434 
435  for (j = 0; j < s->n_sen; j++) {
436  uint8 *pid_cw;
437  int32 tmp;
438  pid_cw = s->mixw[i][s->f[i][0].codeword];
439  tmp = pid_cw[j] + s->f[i][0].score;
440  for (k = 1; k < topn; ++k) {
441  pid_cw = s->mixw[i][s->f[i][k].codeword];
442  tmp = fast_logmath_add(s->lmath_8b, tmp,
443  pid_cw[j] + s->f[i][k].score);
444  }
445  senone_scores[j] += tmp;
446  }
447  return 0;
448 }
449 
450 static int32
451 get_scores_4b_feat_6(s2_semi_mgau_t * s, int i,
452  int16 *senone_scores, uint8 *senone_active,
453  int32 n_senone_active)
454 {
455  int32 j, l;
456  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5;
457  uint8 w_den[6][16];
458 
459  /* Precompute scaled densities. */
460  for (j = 0; j < 16; ++j) {
461  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
462  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
463  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
464  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
465  w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
466  w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score;
467  }
468 
469  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
470  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
471  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
472  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
473  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
474  pid_cw5 = s->mixw[i][s->f[i][5].codeword];
475 
476  for (l = j = 0; j < n_senone_active; j++) {
477  int n = senone_active[j] + l;
478  int tmp, cw;
479 
480  if (n & 1) {
481  cw = pid_cw0[n/2] >> 4;
482  tmp = w_den[0][cw];
483  cw = pid_cw1[n/2] >> 4;
484  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
485  cw = pid_cw2[n/2] >> 4;
486  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
487  cw = pid_cw3[n/2] >> 4;
488  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
489  cw = pid_cw4[n/2] >> 4;
490  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
491  cw = pid_cw5[n/2] >> 4;
492  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
493  }
494  else {
495  cw = pid_cw0[n/2] & 0x0f;
496  tmp = w_den[0][cw];
497  cw = pid_cw1[n/2] & 0x0f;
498  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
499  cw = pid_cw2[n/2] & 0x0f;
500  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
501  cw = pid_cw3[n/2] & 0x0f;
502  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
503  cw = pid_cw4[n/2] & 0x0f;
504  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
505  cw = pid_cw5[n/2] & 0x0f;
506  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]);
507  }
508  senone_scores[n] += tmp;
509  l = n;
510  }
511  return 0;
512 }
513 
514 static int32
515 get_scores_4b_feat_5(s2_semi_mgau_t * s, int i,
516  int16 *senone_scores, uint8 *senone_active,
517  int32 n_senone_active)
518 {
519  int32 j, l;
520  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4;
521  uint8 w_den[5][16];
522 
523  /* Precompute scaled densities. */
524  for (j = 0; j < 16; ++j) {
525  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
526  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
527  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
528  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
529  w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score;
530  }
531 
532  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
533  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
534  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
535  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
536  pid_cw4 = s->mixw[i][s->f[i][4].codeword];
537 
538  for (l = j = 0; j < n_senone_active; j++) {
539  int n = senone_active[j] + l;
540  int tmp, cw;
541 
542  if (n & 1) {
543  cw = pid_cw0[n/2] >> 4;
544  tmp = w_den[0][cw];
545  cw = pid_cw1[n/2] >> 4;
546  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
547  cw = pid_cw2[n/2] >> 4;
548  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
549  cw = pid_cw3[n/2] >> 4;
550  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
551  cw = pid_cw4[n/2] >> 4;
552  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
553  }
554  else {
555  cw = pid_cw0[n/2] & 0x0f;
556  tmp = w_den[0][cw];
557  cw = pid_cw1[n/2] & 0x0f;
558  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
559  cw = pid_cw2[n/2] & 0x0f;
560  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
561  cw = pid_cw3[n/2] & 0x0f;
562  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
563  cw = pid_cw4[n/2] & 0x0f;
564  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]);
565  }
566  senone_scores[n] += tmp;
567  l = n;
568  }
569  return 0;
570 }
571 
572 static int32
573 get_scores_4b_feat_4(s2_semi_mgau_t * s, int i,
574  int16 *senone_scores, uint8 *senone_active,
575  int32 n_senone_active)
576 {
577  int32 j, l;
578  uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3;
579  uint8 w_den[4][16];
580 
581  /* Precompute scaled densities. */
582  for (j = 0; j < 16; ++j) {
583  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
584  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
585  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
586  w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score;
587  }
588 
589  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
590  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
591  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
592  pid_cw3 = s->mixw[i][s->f[i][3].codeword];
593 
594  for (l = j = 0; j < n_senone_active; j++) {
595  int n = senone_active[j] + l;
596  int tmp, cw;
597 
598  if (n & 1) {
599  cw = pid_cw0[n/2] >> 4;
600  tmp = w_den[0][cw];
601  cw = pid_cw1[n/2] >> 4;
602  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
603  cw = pid_cw2[n/2] >> 4;
604  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
605  cw = pid_cw3[n/2] >> 4;
606  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
607  }
608  else {
609  cw = pid_cw0[n/2] & 0x0f;
610  tmp = w_den[0][cw];
611  cw = pid_cw1[n/2] & 0x0f;
612  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
613  cw = pid_cw2[n/2] & 0x0f;
614  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
615  cw = pid_cw3[n/2] & 0x0f;
616  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]);
617  }
618  senone_scores[n] += tmp;
619  l = n;
620  }
621  return 0;
622 }
623 
624 static int32
625 get_scores_4b_feat_3(s2_semi_mgau_t * s, int i,
626  int16 *senone_scores, uint8 *senone_active,
627  int32 n_senone_active)
628 {
629  int32 j, l;
630  uint8 *pid_cw0, *pid_cw1, *pid_cw2;
631  uint8 w_den[3][16];
632 
633  /* Precompute scaled densities. */
634  for (j = 0; j < 16; ++j) {
635  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
636  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
637  w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score;
638  }
639 
640  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
641  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
642  pid_cw2 = s->mixw[i][s->f[i][2].codeword];
643 
644  for (l = j = 0; j < n_senone_active; j++) {
645  int n = senone_active[j] + l;
646  int tmp, cw;
647 
648  if (n & 1) {
649  cw = pid_cw0[n/2] >> 4;
650  tmp = w_den[0][cw];
651  cw = pid_cw1[n/2] >> 4;
652  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
653  cw = pid_cw2[n/2] >> 4;
654  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
655  }
656  else {
657  cw = pid_cw0[n/2] & 0x0f;
658  tmp = w_den[0][cw];
659  cw = pid_cw1[n/2] & 0x0f;
660  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
661  cw = pid_cw2[n/2] & 0x0f;
662  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]);
663  }
664  senone_scores[n] += tmp;
665  l = n;
666  }
667  return 0;
668 }
669 
670 static int32
671 get_scores_4b_feat_2(s2_semi_mgau_t * s, int i,
672  int16 *senone_scores, uint8 *senone_active,
673  int32 n_senone_active)
674 {
675  int32 j, l;
676  uint8 *pid_cw0, *pid_cw1;
677  uint8 w_den[2][16];
678 
679  /* Precompute scaled densities. */
680  for (j = 0; j < 16; ++j) {
681  w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score;
682  w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score;
683  }
684 
685  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
686  pid_cw1 = s->mixw[i][s->f[i][1].codeword];
687 
688  for (l = j = 0; j < n_senone_active; j++) {
689  int n = senone_active[j] + l;
690  int tmp, cw;
691 
692  if (n & 1) {
693  cw = pid_cw0[n/2] >> 4;
694  tmp = w_den[0][cw];
695  cw = pid_cw1[n/2] >> 4;
696  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
697  }
698  else {
699  cw = pid_cw0[n/2] & 0x0f;
700  tmp = w_den[0][cw];
701  cw = pid_cw1[n/2] & 0x0f;
702  tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]);
703  }
704  senone_scores[n] += tmp;
705  l = n;
706  }
707  return 0;
708 }
709 
710 static int32
711 get_scores_4b_feat_1(s2_semi_mgau_t * s, int i,
712  int16 *senone_scores, uint8 *senone_active,
713  int32 n_senone_active)
714 {
715  int32 j, l;
716  uint8 *pid_cw0;
717  uint8 w_den[16];
718 
719  /* Precompute scaled densities. */
720  for (j = 0; j < 16; ++j) {
721  w_den[j] = s->mixw_cb[j] + s->f[i][0].score;
722  }
723 
724  pid_cw0 = s->mixw[i][s->f[i][0].codeword];
725 
726  for (l = j = 0; j < n_senone_active; j++) {
727  int n = senone_active[j] + l;
728  int tmp, cw;
729 
730  if (n & 1) {
731  cw = pid_cw0[n/2] >> 4;
732  tmp = w_den[cw];
733  }
734  else {
735  cw = pid_cw0[n/2] & 0x0f;
736  tmp = w_den[cw];
737  }
738  senone_scores[n] += tmp;
739  l = n;
740  }
741  return 0;
742 }
743 
744 static int32
745 get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn,
746  int16 *senone_scores, uint8 *senone_active,
747  int32 n_senone_active)
748 {
749  int32 j, k, l;
750 
751  for (l = j = 0; j < n_senone_active; j++) {
752  int n = senone_active[j] + l;
753  int tmp, cw;
754  uint8 *pid_cw;
755 
756  pid_cw = s->mixw[i][s->f[i][0].codeword];
757  if (n & 1)
758  cw = pid_cw[n/2] >> 4;
759  else
760  cw = pid_cw[n/2] & 0x0f;
761  tmp = s->mixw_cb[cw] + s->f[i][0].score;
762  for (k = 1; k < topn; ++k) {
763  pid_cw = s->mixw[i][s->f[i][k].codeword];
764  if (n & 1)
765  cw = pid_cw[n/2] >> 4;
766  else
767  cw = pid_cw[n/2] & 0x0f;
768  tmp = fast_logmath_add(s->lmath_8b, tmp,
769  s->mixw_cb[cw] + s->f[i][k].score);
770  }
771  senone_scores[n] += tmp;
772  l = n;
773  }
774  return 0;
775 }
776 
777 static int32
778 get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn,
779  int16 *senone_scores, uint8 *senone_active, int32 n_senone_active)
780 {
781  switch (topn) {
782  case 6:
783  return get_scores_4b_feat_6(s, i, senone_scores,
784  senone_active, n_senone_active);
785  case 5:
786  return get_scores_4b_feat_5(s, i, senone_scores,
787  senone_active, n_senone_active);
788  case 4:
789  return get_scores_4b_feat_4(s, i, senone_scores,
790  senone_active, n_senone_active);
791  case 3:
792  return get_scores_4b_feat_3(s, i, senone_scores,
793  senone_active, n_senone_active);
794  case 2:
795  return get_scores_4b_feat_2(s, i, senone_scores,
796  senone_active, n_senone_active);
797  case 1:
798  return get_scores_4b_feat_1(s, i, senone_scores,
799  senone_active, n_senone_active);
800  default:
801  return get_scores_4b_feat_any(s, i, topn, senone_scores,
802  senone_active, n_senone_active);
803  }
804 }
805 
806 static int32
807 get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores)
808 {
809  int j, last_sen;
810 
811  j = 0;
812  /* Number of senones is always even, but don't overrun if it isn't. */
813  last_sen = s->n_sen & ~1;
814  while (j < last_sen) {
815  uint8 *pid_cw;
816  int32 tmp0, tmp1;
817  int k;
818 
819  pid_cw = s->mixw[i][s->f[i][0].codeword];
820  tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score;
821  tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score;
822  for (k = 1; k < topn; ++k) {
823  int32 w_den0, w_den1;
824 
825  pid_cw = s->mixw[i][s->f[i][k].codeword];
826  w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score;
827  w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score;
828  tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0);
829  tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1);
830  }
831  senone_scores[j++] += tmp0;
832  senone_scores[j++] += tmp1;
833  }
834  return 0;
835 }
836 
837 /*
838  * Compute senone scores for the active senones.
839  */
840 int32
841 s2_semi_mgau_frame_eval(ps_mgau_t *ps,
842  int16 *senone_scores,
843  uint8 *senone_active,
844  int32 n_senone_active,
845  mfcc_t ** featbuf, int32 frame,
846  int32 compallsen)
847 {
848  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
849  int i, topn_idx;
850 
851  memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
852  /* No bounds checking is done here, which just means you'll get
853  * semi-random crap if you request a frame in the future or one
854  * that's too far in the past. */
855  topn_idx = frame % s->n_topn_hist;
856  s->f = s->topn_hist[topn_idx];
857  for (i = 0; i < s->n_feat; ++i) {
858  /* For past frames this will already be computed. */
859  if (frame >= ps_mgau_base(ps)->frame_idx) {
860  vqFeature_t **lastf;
861  if (topn_idx == 0)
862  lastf = s->topn_hist[s->n_topn_hist-1];
863  else
864  lastf = s->topn_hist[topn_idx-1];
865  memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn);
866  mgau_dist(s, frame, i, featbuf[i]);
867  s->topn_hist_n[topn_idx][i] = mgau_norm(s, i);
868  }
869  if (s->mixw_cb) {
870  if (compallsen)
871  get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
872  else
873  get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
874  senone_active, n_senone_active);
875  }
876  else {
877  if (compallsen)
878  get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores);
879  else
880  get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores,
881  senone_active, n_senone_active);
882  }
883  }
884 
885  return 0;
886 }
887 
888 static int32
889 read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file)
890 {
891  FILE *fp;
892  char line[1000];
893  int32 i, n, r, c;
894  int32 do_swap, do_mmap;
895  size_t filesize, offset;
896  int n_clust = 0;
897  int n_feat = s->n_feat;
898  int n_density = s->n_density;
899  int n_sen = bin_mdef_n_sen(mdef);
900  int n_bits = 8;
901 
902  s->n_sen = n_sen; /* FIXME: Should have been done earlier */
903  do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
904 
905  if ((fp = fopen(file, "rb")) == NULL)
906  return -1;
907 
908  E_INFO("Loading senones from dump file %s\n", file);
909  /* Read title size, title */
910  if (fread(&n, sizeof(int32), 1, fp) != 1) {
911  E_ERROR_SYSTEM("Failed to read title size from %s", file);
912  goto error_out;
913  }
914  /* This is extremely bogus */
915  do_swap = 0;
916  if (n < 1 || n > 999) {
917  SWAP_INT32(&n);
918  if (n < 1 || n > 999) {
919  E_ERROR("Title length %x in dump file %s out of range\n", n, file);
920  goto error_out;
921  }
922  do_swap = 1;
923  }
924  if (fread(line, sizeof(char), n, fp) != n) {
925  E_ERROR_SYSTEM("Cannot read title");
926  goto error_out;
927  }
928  if (line[n - 1] != '\0') {
929  E_ERROR("Bad title in dump file\n");
930  goto error_out;
931  }
932  E_INFO("%s\n", line);
933 
934  /* Read header size, header */
935  if (fread(&n, sizeof(n), 1, fp) != 1) {
936  E_ERROR_SYSTEM("Failed to read header size from %s", file);
937  goto error_out;
938  }
939  if (do_swap) SWAP_INT32(&n);
940  if (fread(line, sizeof(char), n, fp) != n) {
941  E_ERROR_SYSTEM("Cannot read header");
942  goto error_out;
943  }
944  if (line[n - 1] != '\0') {
945  E_ERROR("Bad header in dump file\n");
946  goto error_out;
947  }
948 
949  /* Read other header strings until string length = 0 */
950  for (;;) {
951  if (fread(&n, sizeof(n), 1, fp) != 1) {
952  E_ERROR_SYSTEM("Failed to read header string size from %s", file);
953  goto error_out;
954  }
955  if (do_swap) SWAP_INT32(&n);
956  if (n == 0)
957  break;
958  if (fread(line, sizeof(char), n, fp) != n) {
959  E_ERROR_SYSTEM("Cannot read header");
960  goto error_out;
961  }
962  /* Look for a cluster count, if present */
963  if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
964  n_feat = atoi(line + strlen("feature_count "));
965  }
966  if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
967  n_density = atoi(line + strlen("mixture_count "));
968  }
969  if (!strncmp(line, "model_count ", strlen("model_count "))) {
970  n_sen = atoi(line + strlen("model_count "));
971  }
972  if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
973  n_clust = atoi(line + strlen("cluster_count "));
974  }
975  if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
976  n_bits = atoi(line + strlen("cluster_bits "));
977  }
978  }
979 
980  /* Defaults for #rows, #columns in mixw array. */
981  c = n_sen;
982  r = n_density;
983  if (n_clust == 0) {
984  /* Older mixw files have them here, and they might be padded. */
985  if (fread(&r, sizeof(r), 1, fp) != 1) {
986  E_ERROR_SYSTEM("Cannot read #rows");
987  goto error_out;
988  }
989  if (do_swap) SWAP_INT32(&r);
990  if (fread(&c, sizeof(c), 1, fp) != 1) {
991  E_ERROR_SYSTEM("Cannot read #columns");
992  goto error_out;
993  }
994  if (do_swap) SWAP_INT32(&c);
995  E_INFO("Rows: %d, Columns: %d\n", r, c);
996  }
997 
998  if (n_feat != s->n_feat) {
999  E_ERROR("Number of feature streams mismatch: %d != %d\n",
1000  n_feat, s->n_feat);
1001  goto error_out;
1002  }
1003  if (n_density != s->n_density) {
1004  E_ERROR("Number of densities mismatch: %d != %d\n",
1005  n_density, s->n_density);
1006  goto error_out;
1007  }
1008  if (n_sen != s->n_sen) {
1009  E_ERROR("Number of senones mismatch: %d != %d\n",
1010  n_sen, s->n_sen);
1011  goto error_out;
1012  }
1013 
1014  if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
1015  E_ERROR("Cluster count must be 0, 15, or 16\n");
1016  goto error_out;
1017  }
1018  if (n_clust == 15)
1019  ++n_clust;
1020 
1021  if (!((n_bits == 8) || (n_bits == 4))) {
1022  E_ERROR("Cluster count must be 4 or 8\n");
1023  goto error_out;
1024  }
1025 
1026  if (do_mmap) {
1027  E_INFO("Using memory-mapped I/O for senones\n");
1028  }
1029  offset = ftell(fp);
1030  fseek(fp, 0, SEEK_END);
1031  filesize = ftell(fp);
1032  fseek(fp, offset, SEEK_SET);
1033 
1034  /* Allocate memory for pdfs (or memory map them) */
1035  if (do_mmap) {
1036  s->sendump_mmap = mmio_file_read(file);
1037  /* Get cluster codebook if any. */
1038  if (n_clust) {
1039  s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1040  offset += n_clust;
1041  }
1042  }
1043  else {
1044  /* Get cluster codebook if any. */
1045  if (n_clust) {
1046  s->mixw_cb = ckd_calloc(1, n_clust);
1047  if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
1048  E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
1049  goto error_out;
1050  }
1051  }
1052  }
1053 
1054  /* Set up pointers, or read, or whatever */
1055  if (s->sendump_mmap) {
1056  s->mixw = ckd_calloc_2d(s->n_feat, n_density, sizeof(*s->mixw));
1057  for (n = 0; n < n_feat; n++) {
1058  int step = c;
1059  if (n_bits == 4)
1060  step = (step + 1) / 2;
1061  for (i = 0; i < r; i++) {
1062  s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
1063  offset += step;
1064  }
1065  }
1066  }
1067  else {
1068  s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
1069  /* Read pdf values and ids */
1070  for (n = 0; n < n_feat; n++) {
1071  int step = c;
1072  if (n_bits == 4)
1073  step = (step + 1) / 2;
1074  for (i = 0; i < r; i++) {
1075  if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
1076  != (size_t) step) {
1077  E_ERROR("Failed to read %d bytes from sendump\n", step);
1078  goto error_out;
1079  }
1080  }
1081  }
1082  }
1083 
1084  fclose(fp);
1085  return 0;
1086 error_out:
1087  fclose(fp);
1088  return -1;
1089 }
1090 
1091 static int32
1092 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin)
1093 {
1094  char **argname, **argval;
1095  char eofchk;
1096  FILE *fp;
1097  int32 byteswap, chksum_present;
1098  uint32 chksum;
1099  float32 *pdf;
1100  int32 i, f, c, n;
1101  int32 n_sen;
1102  int32 n_feat;
1103  int32 n_comp;
1104  int32 n_err;
1105 
1106  E_INFO("Reading mixture weights file '%s'\n", file_name);
1107 
1108  if ((fp = fopen(file_name, "rb")) == NULL)
1109  E_FATAL("Failed to open mixture weights file '%s' for reading: %s\n", file_name, strerror(errno));
1110 
1111  /* Read header, including argument-value info and 32-bit byteorder magic */
1112  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
1113  E_FATAL("Failed to read header from file '%s'\n", file_name);
1114 
1115  /* Parse argument-value list */
1116  chksum_present = 0;
1117  for (i = 0; argname[i]; i++) {
1118  if (strcmp(argname[i], "version") == 0) {
1119  if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
1120  E_WARN("Version mismatch(%s): %s, expecting %s\n",
1121  file_name, argval[i], MGAU_MIXW_VERSION);
1122  }
1123  else if (strcmp(argname[i], "chksum0") == 0) {
1124  chksum_present = 1; /* Ignore the associated value */
1125  }
1126  }
1127  bio_hdrarg_free(argname, argval);
1128  argname = argval = NULL;
1129 
1130  chksum = 0;
1131 
1132  /* Read #senones, #features, #codewords, arraysize */
1133  if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
1134  || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
1135  1)
1136  || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
1137  1)
1138  || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
1139  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
1140  }
1141  if (n_feat != s->n_feat)
1142  E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat);
1143  if (n != n_sen * n_feat * n_comp) {
1144  E_FATAL
1145  ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
1146  file_name, i, n_sen, n_feat, n_comp);
1147  }
1148 
1149  /* n_sen = number of mixture weights per codeword, which is
1150  * fixed at the number of senones since we have only one codebook.
1151  */
1152  s->n_sen = n_sen;
1153 
1154  /* Quantized mixture weight arrays. */
1155  s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw));
1156 
1157  /* Temporary structure to read in floats before conversion to (int32) logs3 */
1158  pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
1159 
1160  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
1161  n_err = 0;
1162  for (i = 0; i < n_sen; i++) {
1163  for (f = 0; f < n_feat; f++) {
1164  if (bio_fread((void *) pdf, sizeof(float32),
1165  n_comp, fp, byteswap, &chksum) != n_comp) {
1166  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
1167  }
1168 
1169  /* Normalize and floor */
1170  if (vector_sum_norm(pdf, n_comp) <= 0.0)
1171  n_err++;
1172  vector_floor(pdf, n_comp, SmoothMin);
1173  vector_sum_norm(pdf, n_comp);
1174 
1175  /* Convert to LOG, quantize, and transpose */
1176  for (c = 0; c < n_comp; c++) {
1177  int32 qscr;
1178 
1179  qscr = -logmath_log(s->lmath_8b, pdf[c]);
1180  if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
1181  qscr = MAX_NEG_MIXW;
1182  s->mixw[f][c][i] = qscr;
1183  }
1184  }
1185  }
1186  if (n_err > 0)
1187  E_WARN("Weight normalization failed for %d senones\n", n_err);
1188 
1189  ckd_free(pdf);
1190 
1191  if (chksum_present)
1192  bio_verify_chksum(fp, byteswap, chksum);
1193 
1194  if (fread(&eofchk, 1, 1, fp) == 1)
1195  E_FATAL("More data than expected in %s\n", file_name);
1196 
1197  fclose(fp);
1198 
1199  E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
1200  return n_sen;
1201 }
1202 
1203 
1204 static int
1205 split_topn(char const *str, uint8 *out, int nfeat)
1206 {
1207  char *topn_list = ckd_salloc(str);
1208  char *c, *cc;
1209  int i, maxn;
1210 
1211  c = topn_list;
1212  i = 0;
1213  maxn = 0;
1214  while (i < nfeat && (cc = strchr(c, ',')) != NULL) {
1215  *cc = '\0';
1216  out[i] = atoi(c);
1217  if (out[i] > maxn) maxn = out[i];
1218  c = cc + 1;
1219  ++i;
1220  }
1221  if (i < nfeat && *c != '\0') {
1222  out[i] = atoi(c);
1223  if (out[i] > maxn) maxn = out[i];
1224  ++i;
1225  }
1226  while (i < nfeat)
1227  out[i++] = maxn;
1228 
1229  ckd_free(topn_list);
1230  return maxn;
1231 }
1232 
1233 
1234 ps_mgau_t *
1235 s2_semi_mgau_init(acmod_t *acmod)
1236 {
1237  s2_semi_mgau_t *s;
1238  ps_mgau_t *ps;
1239  char const *sendump_path;
1240  int i;
1241 
1242  s = ckd_calloc(1, sizeof(*s));
1243  s->config = acmod->config;
1244 
1245  s->lmath = logmath_retain(acmod->lmath);
1246  /* Log-add table. */
1247  s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
1248  if (s->lmath_8b == NULL)
1249  goto error_out;
1250  /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
1251  if (logmath_get_width(s->lmath_8b) != 1) {
1252  E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
1253  logmath_get_base(s->lmath_8b));
1254  goto error_out;
1255  }
1256 
1257  /* Read means and variances. */
1258  if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
1259  cmd_ln_str_r(s->config, "-var"),
1260  cmd_ln_float32_r(s->config, "-varfloor"),
1261  s->lmath)) == NULL)
1262  goto error_out;
1263  /* Currently only a single codebook is supported. */
1264  if (s->g->n_mgau != 1)
1265  goto error_out;
1266  /* FIXME: maintaining pointers for convenience for now */
1267  s->means = s->g->mean[0];
1268  s->vars = s->g->var[0];
1269  s->dets = s->g->det[0];
1270  s->veclen = s->g->featlen;
1271  /* Verify n_feat and veclen, against acmod. */
1272  s->n_feat = s->g->n_feat;
1273  if (s->n_feat != feat_dimension1(acmod->fcb)) {
1274  E_ERROR("Number of streams does not match: %d != %d\n",
1275  s->n_feat, feat_dimension(acmod->fcb));
1276  goto error_out;
1277  }
1278  for (i = 0; i < s->n_feat; ++i) {
1279  if (s->veclen[i] != feat_dimension2(acmod->fcb, i)) {
1280  E_ERROR("Dimension of stream %d does not match: %d != %d\n",
1281  s->veclen[i], feat_dimension2(acmod->fcb, i));
1282  goto error_out;
1283  }
1284  }
1285  s->n_density = s->g->n_density;
1286  /* Read mixture weights */
1287  if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
1288  if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
1289  goto error_out;
1290  }
1291  }
1292  else {
1293  if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
1294  cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
1295  goto error_out;
1296  }
1297  }
1298  s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
1299 
1300  /* Determine top-N for each feature */
1301  s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
1302  s->max_topn = cmd_ln_int32_r(s->config, "-topn");
1303  split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
1304  E_INFO("Maximum top-N: %d ", s->max_topn);
1305  E_INFOCONT("Top-N beams:");
1306  for (i = 0; i < s->n_feat; ++i) {
1307  E_INFOCONT(" %d", s->topn_beam[i]);
1308  }
1309  E_INFOCONT("\n");
1310 
1311  /* Top-N scores from recent frames */
1312  s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
1313  s->topn_hist = (vqFeature_t ***)
1314  ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
1315  sizeof(***s->topn_hist));
1316  s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
1317  sizeof(**s->topn_hist_n));
1318  for (i = 0; i < s->n_topn_hist; ++i) {
1319  int j;
1320  for (j = 0; j < s->n_feat; ++j) {
1321  int k;
1322  for (k = 0; k < s->max_topn; ++k) {
1323  s->topn_hist[i][j][k].score = WORST_DIST;
1324  s->topn_hist[i][j][k].codeword = k;
1325  }
1326  }
1327  }
1328 
1329  ps = (ps_mgau_t *)s;
1330  ps->vt = &s2_semi_mgau_funcs;
1331  return ps;
1332 error_out:
1333  s2_semi_mgau_free(ps_mgau_base(s));
1334  return NULL;
1335 }
1336 
1337 int
1338 s2_semi_mgau_mllr_transform(ps_mgau_t *ps,
1339  ps_mllr_t *mllr)
1340 {
1341  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1342  return gauden_mllr_transform(s->g, mllr, s->config);
1343 }
1344 
1345 void
1346 s2_semi_mgau_free(ps_mgau_t *ps)
1347 {
1348  s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps;
1349 
1350  logmath_free(s->lmath);
1351  logmath_free(s->lmath_8b);
1352  if (s->sendump_mmap) {
1353  ckd_free_2d(s->mixw);
1354  mmio_file_unmap(s->sendump_mmap);
1355  }
1356  else {
1357  ckd_free_3d(s->mixw);
1358  }
1359  gauden_free(s->g);
1360  ckd_free(s->topn_beam);
1361  ckd_free_2d(s->topn_hist_n);
1362  ckd_free_3d((void **)s->topn_hist);
1363  ckd_free(s);
1364 }