PocketSphinx  0.6
mdef.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /*
39  * mdef.c -- HMM model definition: base (CI) phones and triphones
40  *
41  * **********************************************
42  * CMU ARPA Speech Project
43  *
44  * Copyright (c) 1999 Carnegie Mellon University.
45  * ALL RIGHTS RESERVED.
46  * **********************************************
47  *
48  * HISTORY
49  *
50  *
51  * 22-Nov-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
52  * Imported from s3.2, for supporting s3 format continuous
53  * acoustic models.
54  *
55  * 14-Oct-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
56  * Added mdef_sseq2sen_active().
57  *
58  * 06-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
59  * In mdef_phone_id(), added backing off to silence phone context from filler
60  * context if original triphone not found.
61  *
62  * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
63  * Added senone-sequence id (ssid) to phone_t and appropriate functions to
64  * maintain it. Instead, moved state sequence info to mdef_t.
65  *
66  * 13-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
67  * Added mdef_phone_str().
68  *
69  * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
70  * Allowed mdef_phone_id_nearest to return base phone id if either
71  * left or right context (or both) is undefined.
72  *
73  * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University.
74  * Created.
75  */
76 
77 
78 /*
79  * Major assumptions:
80  * All phones have same #states, same topology.
81  * Every phone has exactly one non-emitting, final state--the last one.
82  * CI phones must appear first in model definition file.
83  */
84 
85 /* System headers. */
86 #include <stdio.h>
87 #include <string.h>
88 #include <stdlib.h>
89 #include <assert.h>
90 
91 /* SphinxBase headers. */
92 #include <sphinxbase/ckd_alloc.h>
93 #include <sphinxbase/err.h>
94 
95 /* Local headers. */
96 #include "mdef.h"
97 
98 
99 #define MODEL_DEF_VERSION "0.3"
100 
101 static void
102 ciphone_add(mdef_t * m, char *ci, int p)
103 {
104  assert(p < m->n_ciphone);
105 
106  m->ciphone[p].name = (char *) ckd_salloc(ci); /* freed in mdef_free */
107  if (hash_table_enter(m->ciphone_ht, m->ciphone[p].name,
108  (void *)(long)p) != (void *)(long)p)
109  E_FATAL("hash_table_enter(%s) failed; duplicate CIphone?\n",
110  m->ciphone[p].name);
111 }
112 
113 
114 static ph_lc_t *
115 find_ph_lc(ph_lc_t * lclist, int lc)
116 {
117  ph_lc_t *lcptr;
118 
119  for (lcptr = lclist; lcptr && (lcptr->lc != lc); lcptr = lcptr->next);
120  return lcptr;
121 }
122 
123 
124 static ph_rc_t *
125 find_ph_rc(ph_rc_t * rclist, int rc)
126 {
127  ph_rc_t *rcptr;
128 
129  for (rcptr = rclist; rcptr && (rcptr->rc != rc); rcptr = rcptr->next);
130  return rcptr;
131 }
132 
133 
134 static void
135 triphone_add(mdef_t * m,
136  int ci, int lc, int rc, word_posn_t wpos,
137  int p)
138 {
139  ph_lc_t *lcptr;
140  ph_rc_t *rcptr;
141 
142  assert(p < m->n_phone);
143 
144  /* Fill in phone[p] information (state and tmat mappings added later) */
145  m->phone[p].ci = ci;
146  m->phone[p].lc = lc;
147  m->phone[p].rc = rc;
148  m->phone[p].wpos = wpos;
149 
150  /* Create <ci,lc,rc,wpos> -> p mapping if not a CI phone */
151  if (p >= m->n_ciphone) {
152  if ((lcptr = find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc))
153  == NULL) {
154  lcptr = (ph_lc_t *) ckd_calloc(1, sizeof(ph_lc_t)); /* freed at mdef_free, I believe */
155  lcptr->lc = lc;
156  lcptr->next = m->wpos_ci_lclist[wpos][(int) ci];
157  m->wpos_ci_lclist[wpos][(int) ci] = lcptr; /* This is what needs to be freed */
158  }
159  if ((rcptr = find_ph_rc(lcptr->rclist, rc)) != NULL) {
160  __BIGSTACKVARIABLE__ char buf[4096];
161 
162  mdef_phone_str(m, rcptr->pid, buf);
163  E_FATAL("Duplicate triphone: %s\n", buf);
164  }
165 
166  rcptr = (ph_rc_t *) ckd_calloc(1, sizeof(ph_rc_t)); /* freed in mdef_free, I believe */
167  rcptr->rc = rc;
168  rcptr->pid = p;
169  rcptr->next = lcptr->rclist;
170  lcptr->rclist = rcptr;
171  }
172 }
173 
174 
175 int
176 mdef_ciphone_id(mdef_t * m, char *ci)
177 {
178  int32 id;
179  if (hash_table_lookup_int32(m->ciphone_ht, ci, &id) < 0)
180  return -1;
181  return id;
182 }
183 
184 
185 const char *
186 mdef_ciphone_str(mdef_t * m, int id)
187 {
188  assert(m);
189  assert((id >= 0) && (id < m->n_ciphone));
190 
191  return (m->ciphone[id].name);
192 }
193 
194 
195 int
196 mdef_phone_str(mdef_t * m, int pid, char *buf)
197 {
198  char *wpos_name;
199 
200  assert(m);
201  assert((pid >= 0) && (pid < m->n_phone));
202  wpos_name = WPOS_NAME;
203 
204  buf[0] = '\0';
205  if (pid < m->n_ciphone)
206  sprintf(buf, "%s", mdef_ciphone_str(m, pid));
207  else {
208  sprintf(buf, "%s %s %s %c",
209  mdef_ciphone_str(m, m->phone[pid].ci),
210  mdef_ciphone_str(m, m->phone[pid].lc),
211  mdef_ciphone_str(m, m->phone[pid].rc),
212  wpos_name[m->phone[pid].wpos]);
213  }
214  return 0;
215 }
216 
217 
218 int
219 mdef_phone_id(mdef_t * m,
220  int ci, int lc, int rc, word_posn_t wpos)
221 {
222  ph_lc_t *lcptr;
223  ph_rc_t *rcptr;
224  int newl, newr;
225 
226  assert(m);
227  assert((ci >= 0) && (ci < m->n_ciphone));
228  assert((lc >= 0) && (lc < m->n_ciphone));
229  assert((rc >= 0) && (rc < m->n_ciphone));
230  assert((wpos >= 0) && (wpos < N_WORD_POSN));
231 
232  if (((lcptr =
233  find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc)) == NULL)
234  || ((rcptr = find_ph_rc(lcptr->rclist, rc)) == NULL)) {
235  /* Not found; backoff to silence context if non-silence filler context */
236  if (m->sil < 0)
237  return -1;
238 
239  newl = m->ciphone[(int) lc].filler ? m->sil : lc;
240  newr = m->ciphone[(int) rc].filler ? m->sil : rc;
241  if ((newl == lc) && (newr == rc))
242  return -1;
243 
244  return (mdef_phone_id(m, ci, newl, newr, wpos));
245  }
246 
247  return (rcptr->pid);
248 }
249 
250 int
251 mdef_is_ciphone(mdef_t * m, int p)
252 {
253  assert(m);
254  assert((p >= 0) && (p < m->n_phone));
255 
256  return ((p < m->n_ciphone) ? 1 : 0);
257 }
258 
259 int
260 mdef_is_cisenone(mdef_t * m, int s)
261 {
262  assert(m);
263  if (s >= m->n_sen) {
264  return 0;
265  }
266  assert(s >= 0);
267  return ((s == m->cd2cisen[s]) ? 1 : 0);
268 }
269 
270 
271 /* Parse tmat and state->senone mappings for phone p and fill in structure */
272 static void
273 parse_tmat_senmap(mdef_t * m, char *line, int32 off, int p)
274 {
275  int32 wlen, n, s;
276  __BIGSTACKVARIABLE__ char word[1024], *lp;
277 
278  lp = line + off;
279 
280  /* Read transition matrix id */
281  if ((sscanf(lp, "%d%n", &n, &wlen) != 1) || (n < 0))
282  E_FATAL("Missing or bad transition matrix id: %s\n", line);
283  m->phone[p].tmat = n;
284  if (m->n_tmat <= n)
285  E_FATAL("tmat-id(%d) > #tmat in header(%d): %s\n", n, m->n_tmat,
286  line);
287  lp += wlen;
288 
289  /* Read senone mappings for each emitting state */
290  for (n = 0; n < m->n_emit_state; n++) {
291  if ((sscanf(lp, "%d%n", &s, &wlen) != 1) || (s < 0))
292  E_FATAL("Missing or bad state[%d]->senone mapping: %s\n", n,
293  line);
294 
295  /*20040821 ARCHAN, This line is added to allow 3.x/3.0 compatability. */
296  m->phone[p].state[n] = s;
297 
298  if ((p < m->n_ciphone) && (m->n_ci_sen <= s))
299  E_FATAL("CI-senone-id(%d) > #CI-senones(%d): %s\n", s,
300  m->n_ci_sen, line);
301  if (m->n_sen <= s)
302  E_FATAL("Senone-id(%d) > #senones(%d): %s\n", s, m->n_sen,
303  line);
304 
305  m->sseq[p][n] = s;
306  lp += wlen;
307  }
308 
309  /* Check for the last non-emitting state N */
310  if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (strcmp(word, "N") != 0))
311  E_FATAL("Missing non-emitting state spec: %s\n", line);
312  lp += wlen;
313 
314  /* Check for end of line */
315  if (sscanf(lp, "%s%n", word, &wlen) == 1)
316  E_FATAL("Non-empty beyond non-emitting final state: %s\n", line);
317 }
318 
319 
320 static void
321 parse_base_line(mdef_t * m, char *line, int p)
322 {
323  int32 wlen, n;
324  __BIGSTACKVARIABLE__ char word[1024], *lp;
325  int ci;
326 
327  lp = line;
328 
329  /* Read base phone name */
330  if (sscanf(lp, "%s%n", word, &wlen) != 1)
331  E_FATAL("Missing base phone name: %s\n", line);
332  lp += wlen;
333 
334  /* Make sure it's not a duplicate */
335  ci = mdef_ciphone_id(m, word);
336  if (ci >= 0)
337  E_FATAL("Duplicate base phone: %s\n", line);
338 
339  /* Add ciphone to ciphone table with id p */
340  ciphone_add(m, word, p);
341  ci = (int) p;
342 
343  /* Read and skip "-" for lc, rc, wpos */
344  for (n = 0; n < 3; n++) {
345  if ((sscanf(lp, "%s%n", word, &wlen) != 1)
346  || (strcmp(word, "-") != 0))
347  E_FATAL("Bad context info for base phone: %s\n", line);
348  lp += wlen;
349  }
350 
351  /* Read filler attribute, if present */
352  if (sscanf(lp, "%s%n", word, &wlen) != 1)
353  E_FATAL("Missing filler atribute field: %s\n", line);
354  lp += wlen;
355  if (strcmp(word, "filler") == 0)
356  m->ciphone[(int) ci].filler = 1;
357  else if (strcmp(word, "n/a") == 0)
358  m->ciphone[(int) ci].filler = 0;
359  else
360  E_FATAL("Bad filler attribute field: %s\n", line);
361 
362  triphone_add(m, ci, -1, -1, WORD_POSN_UNDEFINED, p);
363 
364  /* Parse remainder of line: transition matrix and state->senone mappings */
365  parse_tmat_senmap(m, line, lp - line, p);
366 }
367 
368 
369 static void
370 parse_tri_line(mdef_t * m, char *line, int p)
371 {
372  int32 wlen;
373  __BIGSTACKVARIABLE__ char word[1024], *lp;
374  int ci, lc, rc;
376 
377  lp = line;
378 
379  /* Read base phone name */
380  if (sscanf(lp, "%s%n", word, &wlen) != 1)
381  E_FATAL("Missing base phone name: %s\n", line);
382  lp += wlen;
383 
384  ci = mdef_ciphone_id(m, word);
385  if (ci < 0)
386  E_FATAL("Unknown base phone: %s\n", line);
387 
388  /* Read lc */
389  if (sscanf(lp, "%s%n", word, &wlen) != 1)
390  E_FATAL("Missing left context: %s\n", line);
391  lp += wlen;
392  lc = mdef_ciphone_id(m, word);
393  if (lc < 0)
394  E_FATAL("Unknown left context: %s\n", line);
395 
396  /* Read rc */
397  if (sscanf(lp, "%s%n", word, &wlen) != 1)
398  E_FATAL("Missing right context: %s\n", line);
399  lp += wlen;
400  rc = mdef_ciphone_id(m, word);
401  if (rc < 0)
402  E_FATAL("Unknown right context: %s\n", line);
403 
404  /* Read tripone word-position within word */
405  if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0'))
406  E_FATAL("Missing or bad word-position spec: %s\n", line);
407  lp += wlen;
408  switch (word[0]) {
409  case 'b':
410  wpos = WORD_POSN_BEGIN;
411  break;
412  case 'e':
413  wpos = WORD_POSN_END;
414  break;
415  case 's':
416  wpos = WORD_POSN_SINGLE;
417  break;
418  case 'i':
419  wpos = WORD_POSN_INTERNAL;
420  break;
421  default:
422  E_FATAL("Bad word-position spec: %s\n", line);
423  }
424 
425  /* Read filler attribute, if present. Must match base phone attribute */
426  if (sscanf(lp, "%s%n", word, &wlen) != 1)
427  E_FATAL("Missing filler attribute field: %s\n", line);
428  lp += wlen;
429  if (((strcmp(word, "filler") == 0) && (m->ciphone[(int) ci].filler)) ||
430  ((strcmp(word, "n/a") == 0) && (!m->ciphone[(int) ci].filler))) {
431  /* Everything is fine */
432  }
433  else
434  E_FATAL("Bad filler attribute field: %s\n", line);
435 
436  triphone_add(m, ci, lc, rc, wpos, p);
437 
438  /* Parse remainder of line: transition matrix and state->senone mappings */
439  parse_tmat_senmap(m, line, lp - line, p);
440 }
441 
442 
443 static void
444 sseq_compress(mdef_t * m)
445 {
446  hash_table_t *h;
447  uint16 **sseq;
448  int32 n_sseq;
449  int32 p, j, k;
450  glist_t g;
451  gnode_t *gn;
452  hash_entry_t *he;
453 
454  k = m->n_emit_state * sizeof(int16);
455 
456  h = hash_table_new(m->n_phone, HASH_CASE_YES);
457  n_sseq = 0;
458 
459  /* Identify unique senone-sequence IDs. BUG: tmat-id not being considered!! */
460  for (p = 0; p < m->n_phone; p++) {
461  /* Add senone sequence to hash table */
462  if (n_sseq
463  == (j = hash_table_enter_bkey_int32(h, (char *)m->sseq[p], k, n_sseq)))
464  n_sseq++;
465 
466  m->phone[p].ssid = j;
467  }
468 
469  /* Generate compacted sseq table */
470  sseq = ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(**sseq)); /* freed in mdef_free() */
471 
472  g = hash_table_tolist(h, &j);
473  assert(j == n_sseq);
474 
475  for (gn = g; gn; gn = gnode_next(gn)) {
476  he = (hash_entry_t *) gnode_ptr(gn);
477  j = (long)hash_entry_val(he);
478  memcpy(sseq[j], hash_entry_key(he), k);
479  }
480  glist_free(g);
481 
482  /* Free the old, temporary senone sequence table, replace with compacted one */
483  ckd_free_2d(m->sseq);
484  m->sseq = sseq;
485  m->n_sseq = n_sseq;
486 
487  hash_table_free(h);
488 }
489 
490 
491 static int32
492 noncomment_line(char *line, int32 size, FILE * fp)
493 {
494  while (fgets(line, size, fp) != NULL) {
495  if (line[0] != '#')
496  return 0;
497  }
498  return -1;
499 }
500 
501 
502 /*
503  * Initialize phones (ci and triphones) and state->senone mappings from .mdef file.
504  */
505 mdef_t *
506 mdef_init(char *mdeffile, int32 breport)
507 {
508  FILE *fp;
509  int32 n_ci, n_tri, n_map, n;
510  __BIGSTACKVARIABLE__ char tag[1024], buf[1024];
511  uint16 **senmap;
512  int p;
513  int32 s, ci, cd;
514  mdef_t *m;
515 
516  if (!mdeffile)
517  E_FATAL("No mdef-file\n");
518 
519  if (breport)
520  E_INFO("Reading model definition: %s\n", mdeffile);
521 
522  m = (mdef_t *) ckd_calloc(1, sizeof(mdef_t)); /* freed in mdef_free */
523 
524  if ((fp = fopen(mdeffile, "r")) == NULL)
525  E_FATAL_SYSTEM("Failed to open mdef file '%s' for reading", mdeffile);
526 
527  if (noncomment_line(buf, sizeof(buf), fp) < 0)
528  E_FATAL("Empty file: %s\n", mdeffile);
529 
530  if (strncmp(buf, "BMDF", 4) == 0 || strncmp(buf, "FDMB", 4) == 0) {
531  E_INFO
532  ("Found byte-order mark %.4s, assuming this is a binary mdef file\n",
533  buf);
534  fclose(fp);
535  ckd_free(m);
536  return NULL;
537  }
538  if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0)
539  E_FATAL("Version error: Expecing %s, but read %s\n",
540  MODEL_DEF_VERSION, buf);
541 
542  /* Read #base phones, #triphones, #senone mappings defined in header */
543  n_ci = -1;
544  n_tri = -1;
545  n_map = -1;
546  m->n_ci_sen = -1;
547  m->n_sen = -1;
548  m->n_tmat = -1;
549  do {
550  if (noncomment_line(buf, sizeof(buf), fp) < 0)
551  E_FATAL("Incomplete header\n");
552 
553  if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0))
554  E_FATAL("Error in header: %s\n", buf);
555 
556  if (strcmp(tag, "n_base") == 0)
557  n_ci = n;
558  else if (strcmp(tag, "n_tri") == 0)
559  n_tri = n;
560  else if (strcmp(tag, "n_state_map") == 0)
561  n_map = n;
562  else if (strcmp(tag, "n_tied_ci_state") == 0)
563  m->n_ci_sen = n;
564  else if (strcmp(tag, "n_tied_state") == 0)
565  m->n_sen = n;
566  else if (strcmp(tag, "n_tied_tmat") == 0)
567  m->n_tmat = n;
568  else
569  E_FATAL("Unknown header line: %s\n", buf);
570  } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) ||
571  (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0));
572 
573  if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0)
574  || (m->n_ci_sen > m->n_sen))
575  E_FATAL("%s: Error in header\n", mdeffile);
576 
577  /* Check typesize limits */
578  if (n_ci >= MAX_INT16)
579  E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci,
580  MAX_INT16);
581  if (n_ci + n_tri >= MAX_INT32) /* Comparison is always false... */
582  E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile,
583  n_ci + n_tri, MAX_INT32);
584  if (m->n_sen >= MAX_INT16)
585  E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile,
586  m->n_sen, MAX_INT16);
587  if (m->n_tmat >= MAX_INT32) /* Comparison is always false... */
588  E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile,
589  m->n_tmat, MAX_INT32);
590 
591  m->n_emit_state = (n_map / (n_ci + n_tri)) - 1;
592  if ((m->n_emit_state + 1) * (n_ci + n_tri) != n_map)
593  E_FATAL
594  ("Header error: n_state_map not a multiple of n_ci*n_tri\n");
595 
596  /* Initialize ciphone info */
597  m->n_ciphone = n_ci;
598  m->ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES); /* With case-insensitive string names *//* freed in mdef_free */
599  m->ciphone = (ciphone_t *) ckd_calloc(n_ci, sizeof(ciphone_t)); /* freed in mdef_free */
600 
601  /* Initialize phones info (ciphones + triphones) */
602  m->n_phone = n_ci + n_tri;
603  m->phone = (phone_t *) ckd_calloc(m->n_phone, sizeof(phone_t)); /* freed in mdef_free */
604 
605  /* Allocate space for state->senone map for each phone */
606  senmap = ckd_calloc_2d(m->n_phone, m->n_emit_state, sizeof(**senmap)); /* freed in mdef_free */
607  m->sseq = senmap; /* TEMPORARY; until it is compressed into just the unique ones */
608 
609 
611  /* Flat decoder-specific */
612  /* Allocate space for state->senone map for each phone */
613 
614  /* ARCHAN 20040820, this sacrifice readability and may cause pointer
615  problems in future. However, this is a less evil than
616  duplication of code. This is trick point all the state mapping
617  to the global mapping and avoid duplicated memory.
618  */
619 
620  /* S3 xwdpid_compress will compress the below list phone list.
621  */
622 
623  /* ARCHAN, this part should not be used when one of the recognizer is used. */
624  m->st2senmap =
625  (int16 *) ckd_calloc(m->n_phone * m->n_emit_state,
626  sizeof(*m->st2senmap));
627  for (p = 0; p < m->n_phone; p++)
628  m->phone[p].state = m->st2senmap + (p * m->n_emit_state);
629  /******************************************************************************************************/
630 
631 
632  /* Allocate initial space for <ci,lc,rc,wpos> -> pid mapping */
633  m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d(N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *)); /* freed in mdef_free */
634 
635  /*
636  * Read base phones and triphones. They'll simply be assigned a running sequence
637  * number as their "phone-id". If the phone-id < n_ci, it's a ciphone.
638  */
639 
640  /* Read base phones */
641  for (p = 0; p < n_ci; p++) {
642  if (noncomment_line(buf, sizeof(buf), fp) < 0)
643  E_FATAL("Premature EOF reading CIphone %d\n", p);
644  parse_base_line(m, buf, p);
645  }
646  m->sil = mdef_ciphone_id(m, S3_SILENCE_CIPHONE);
647 
648  /* Read triphones, if any */
649  for (; p < m->n_phone; p++) {
650  if (noncomment_line(buf, sizeof(buf), fp) < 0)
651  E_FATAL("Premature EOF reading phone %d\n", p);
652  parse_tri_line(m, buf, p);
653  }
654 
655  if (noncomment_line(buf, sizeof(buf), fp) >= 0)
656  E_ERROR("Non-empty file beyond expected #phones (%d)\n",
657  m->n_phone);
658 
659  /* Build CD senones to CI senones map */
660  if (m->n_ciphone * m->n_emit_state != m->n_ci_sen)
661  E_FATAL
662  ("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n",
663  m->n_ci_sen, m->n_ciphone, m->n_emit_state);
664  m->cd2cisen = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->cd2cisen)); /* freed in mdef_free */
665 
666  m->sen2cimap = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->sen2cimap)); /* freed in mdef_free */
667 
668  for (s = 0; s < m->n_sen; s++)
669  m->sen2cimap[s] = -1;
670  for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */
671  m->cd2cisen[s] = s;
672  m->sen2cimap[s] = s / m->n_emit_state;
673  }
674  for (p = n_ci; p < m->n_phone; p++) { /* CD senones */
675  for (s = 0; s < m->n_emit_state; s++) {
676  cd = m->sseq[p][s];
677  ci = m->sseq[m->phone[p].ci][s];
678  m->cd2cisen[cd] = ci;
679  m->sen2cimap[cd] = m->phone[p].ci;
680  }
681  }
682 
683  sseq_compress(m);
684  fclose(fp);
685 
686  return m;
687 }
688 
689 void
690 mdef_report(mdef_t * m)
691 {
692  E_INFO_NOFN("Initialization of mdef_t, report:\n");
693  E_INFO_NOFN
694  ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
695  m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
696  m->n_ci_sen, m->n_sen, m->n_sseq);
697  E_INFO_NOFN("\n");
698 
699 }
700 
701 /* RAH 4.23.01, Need to step down the ->next list to see if there are
702  any more things to free
703  */
704 
705 
706 
707 /* RAH 4.19.01, Attempt to free memory that was allocated within this module
708  I have not verified that all the memory has been freed. I've taken only a
709  reasonable effort for now.
710  RAH 4.24.01 - verified that all memory is released.
711  */
712 void
713 mdef_free_recursive_lc(ph_lc_t * lc)
714 {
715  if (lc == NULL)
716  return;
717 
718  if (lc->rclist)
719  mdef_free_recursive_rc(lc->rclist);
720 
721  if (lc->next)
722  mdef_free_recursive_lc(lc->next);
723 
724  ckd_free((void *) lc);
725 }
726 
727 void
728 mdef_free_recursive_rc(ph_rc_t * rc)
729 {
730  if (rc == NULL)
731  return;
732 
733  if (rc->next)
734  mdef_free_recursive_rc(rc->next);
735 
736  ckd_free((void *) rc);
737 }
738 
739 
740 /* RAH, Free memory that was allocated in mdef_init
741  Rational purify shows that no leaks exist
742  */
743 
744 void
745 mdef_free(mdef_t * m)
746 {
747  int i, j;
748 
749  if (m) {
750  if (m->sen2cimap)
751  ckd_free((void *) m->sen2cimap);
752  if (m->cd2cisen)
753  ckd_free((void *) m->cd2cisen);
754 
755  /* RAH, go down the ->next list and delete all the pieces */
756  for (i = 0; i < N_WORD_POSN; i++)
757  for (j = 0; j < m->n_ciphone; j++)
758  if (m->wpos_ci_lclist[i][j]) {
759  mdef_free_recursive_lc(m->wpos_ci_lclist[i][j]->next);
760  mdef_free_recursive_rc(m->wpos_ci_lclist[i][j]->
761  rclist);
762  }
763 
764  for (i = 0; i < N_WORD_POSN; i++)
765  for (j = 0; j < m->n_ciphone; j++)
766  if (m->wpos_ci_lclist[i][j])
767  ckd_free((void *) m->wpos_ci_lclist[i][j]);
768 
769 
770  if (m->wpos_ci_lclist)
771  ckd_free_2d((void *) m->wpos_ci_lclist);
772  if (m->sseq)
773  ckd_free_2d((void *) m->sseq);
774  /* Free phone context */
775  if (m->phone)
776  ckd_free((void *) m->phone);
777  if (m->ciphone_ht)
778  hash_table_free(m->ciphone_ht);
779 
780  for (i = 0; i < m->n_ciphone; i++) {
781  if (m->ciphone[i].name)
782  ckd_free((void *) m->ciphone[i].name);
783  }
784 
785 
786  if (m->ciphone)
787  ckd_free((void *) m->ciphone);
788 
789  if (m->st2senmap)
790  ckd_free((void *) m->st2senmap);
791 
792  ckd_free((void *) m);
793  }
794 }