fsg_search.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 /*
35  * fsg_search.h -- Search structures for FSM decoding.
36  *
37  * **********************************************
38  * CMU ARPA Speech Project
39  *
40  * Copyright (c) 2004 Carnegie Mellon University.
41  * ALL RIGHTS RESERVED.
42  * **********************************************
43  *
44  * HISTORY
45  *
46  * $Log: fsg_search.h,v $
47  * Revision 1.2 2006/02/23 05:12:43 arthchan2003
48  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
49  *
50  * Revision 1.1.2.7 2006/01/16 18:20:46 arthchan2003
51  * Remove junks in the code, change the reporting from printf to log_hypstr.
52  *
53  * Revision 1.1.2.6 2005/07/24 19:34:46 arthchan2003
54  * Removed search_hyp_t, used srch_hyp_t instead
55  *
56  * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003
57  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
58  *
59  * Revision 1.1.2.4 2005/07/17 05:44:32 arthchan2003
60  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
61  *
62  * Revision 1.1.2.3 2005/07/13 18:39:48 arthchan2003
63  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet.
64  *
65  * Revision 1.1.2.2 2005/06/28 07:01:20 arthchan2003
66  * General fix of fsg routines to make a prototype of fsg_init and fsg_read. Not completed. The number of empty functions in fsg_search is now decreased from 35 to 30.
67  *
68  * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003
69  * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users.
70  *
71  * Revision 1.2 2004/07/23 23:36:34 egouvea
72  * Ravi's merge, with the latest fixes in the FSG code, and making the log files generated by FSG, LM, and allphone have the same 'look and feel', with the backtrace information presented consistently
73  *
74  * Revision 1.6 2004/07/20 13:40:55 rkm
75  * Added FSG get/set start/final state functions.
76  *
77  * Revision 1.5 2004/07/16 19:55:28 rkm
78  * Added state information to hypothesis.
79  *
80  * Revision 1.1 2004/07/16 00:57:12 egouvea
81  * Added Ravi's implementation of FSG support.
82  *
83  * Revision 1.4 2004/07/07 13:56:33 rkm
84  * Added reporting of (acoustic score - best senone score)/frame
85  *
86  * Revision 1.3 2004/06/22 15:36:12 rkm
87  * Added partial result handling in FSG mode
88  *
89  * Revision 1.2 2004/05/27 14:22:57 rkm
90  * FSG cross-word triphones completed (but for single-phone words)
91  *
92  * Revision 1.1.1.1 2004/03/01 14:30:31 rkm
93  *
94  *
95  * Revision 1.6 2004/02/27 16:15:13 rkm
96  * Added FSG switching
97  *
98  * Revision 1.5 2004/02/27 15:05:21 rkm
99  * *** empty log message ***
100  *
101  * Revision 1.4 2004/02/26 14:48:20 rkm
102  * *** empty log message ***
103  *
104  * Revision 1.3 2004/02/26 01:14:48 rkm
105  * *** empty log message ***
106  *
107  * Revision 1.2 2004/02/24 18:13:05 rkm
108  * Added NULL transition handling
109  *
110  * Revision 1.1 2004/02/23 15:53:45 rkm
111  * Renamed from fst to fsg
112  *
113  * Revision 1.1 2004/02/19 21:16:54 rkm
114  * Added fsg_search.{c,h}
115  *
116  *
117  * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
118  * Started.
119  */
120 
121 
122 #ifndef __S2_FSG_SEARCH_H__
123 #define __S2_FSG_SEARCH_H__
124 
125 #define HYP_SZ 1024
126 
127 
128 #include <stdio.h>
129 
130 #include <cmd_ln.h>
131 #include <logmath.h>
132 #include <s3types.h>
133 #include <glist.h>
134 #include <word_fsg.h>
135 #include <fsg_lextree.h>
136 #include <fsg_history.h>
137 #include <ascr.h>
138 #include <search.h>
139 #include <dict.h>
140 #include <mdef.h>
141 #include <tmat.h>
142 #include <hmm.h>
143 
144 
145 #ifdef __cplusplus
146 extern "C" {
147 #endif
148 #if 0
149 /* Fool Emacs. */
150 }
151 #endif
152 
153 typedef struct fsg_search_s {
154  glist_t fsglist; /* List of all FSGs loaded */
155 
156  word_fsg_t *fsg; /* Currently active FSG; NULL if none. One
157  must be made active before starting FSG
158  decoding */
159  fsg_lextree_t *lextree; /* Lextree structure for the currently
160  active FSG */
161  fsg_history_t *history; /* For storing the Viterbi search history */
162 
163  glist_t pnode_active; /* Those active in this frame */
164  glist_t pnode_active_next; /* Those activated for the next frame */
165 
166  int32 beam; /* Global threshold */
167  int32 pbeam; /* Threshold for phone transition */
168  int32 wbeam; /* Threshold for word exit */
169 
170  int32 frame; /* Current frame */
171 
172  int32 bestscore; /* For beam pruning */
173  int32 bpidx_start; /* First history entry index this frame */
174 
175  srch_hyp_t *hyp; /* Search hypothesis */
176  int32 ascr, lscr; /* Total acoustic and lm score for utt */
177 
178  int32 n_hmm_eval; /* Total HMMs evaluated this utt */
179 
180  int32 state; /* Whether IDLE or BUSY */
181 
183 
184  /*Added by Arthur at 20050627*/
186  int32 isUseFiller;
187  int32 isBacktrace;
189  char* DumpLatdir;
190  int32 n_ci_phone;
191 
196  char* uttid;
197  int32 *senscale;
199  FILE* matchfp;
200  FILE* matchsegfp;
201 
202  cmd_ln_t *config;
203  logmath_t *logmath;
204 } fsg_search_t;
205 
206 
207 /* Access macros */
208 #define fsg_search_frame(s) ((s)->frame)
209 
210 
211 /*
212  * Create, initialize and return a search module for the given FSM.
213  * If no FSG is given (i.e., the argument is NULL), a search structure is
214  * still created. If an FSG is provided, it is made the currently active
215  * FSG.
216  */
217 fsg_search_t *fsg_search_init (word_fsg_t *, void *srch_struct);
218 
219 
224 
225 /*
226  * Lookup the FSG associated with the given name and return it, or NULL if
227  * no match found.
228  */
230 
231 
232 /*
233  * Add the given FSG to the collection of FSGs known to this search object.
234  * The given fsg is simply added to the collection. It is not automatically
235  * made the currently active one.
236  * The name of the new FSG must not match any of the existing ones. If so,
237  * FALSE is returned. If successfully added, TRUE is returned.
238  */
240 
241 
242 /*
243  * Delete the given FSG from the known collection. Free the FSG itself,
244  * and if it was the currently active FSG, also free the associated search
245  * structures and leave the current FSG undefined.
246  */
248 
249 
250 /* Like fsg_search_del_fsg(), but identifies the FSG by its name */
251 int fsg_search_del_fsg_byname (fsg_search_t *, char *name);
252 
253 
254 /*
255  * Switch to a new FSG (identified by its string name). Must not be invoked
256  * when search is busy (ie, in the midst of an utterance. That's an error
257  * and FALSE is returned. If successful, returns TRUE.
258  */
260 
261 
262 /*
263  * Deallocate search structure.
264  */
266 
267 
268 /*
269  * Prepare the FSG search structure for beginning decoding of the next
270  * utterance.
271  */
273 
274 
275 /*
276  * Windup and clean the FSG search structure after utterance. Fill in the
277  * results of search: fsg_search_t.{hyp,ascr,lscr,frame}. (But some fields
278  * of hyp are left unfilled for now: conf, latden, phone_perp.)
279  */
281 
282 
283 /*
284  * Step one frame forward through the Viterbi search.
285  */
287 
288 
289 /*
290  * Compute the partial or final Viterbi backtrace result. (The result can
291  * be retrieved using the API functions seach_result or search_get_hyp().)
292  * If "check_fsg_final_state" is TRUE, the backtrace starts from the best
293  * history entry ending in the final state (if it exists). Otherwise it
294  * starts from the best entry, regardless of the terminating state (usually
295  * used for partial results).
296  */
298  int check_fsg_final_state);
299 
300 /*
301  * Return the start (or final) state of the currently active FSG, if any.
302  * Otherwise return -1.
303  */
306 
307 
308 /*
309  * Set the start (or final) state of the current active FSG, if any, to the
310  * given state. This operation can only be done in between utterances, not
311  * in the midst of one. Return the previous start (or final) state if
312  * successful. Return -1 if any error.
313  */
314 int32 fsg_search_set_start_state (fsg_search_t *, int32 state);
315 int32 fsg_search_set_final_state (fsg_search_t *, int32 state);
316 
317 
318 void fsg_search_sen_active (fsg_search_t *search);
319 
320 #ifdef __cplusplus
321 }
322 #endif
323 
324 
325 #endif