• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/lm/lm3g_model.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * \file lm3g_model.h Core Sphinx 3-gram code used in
00039  * DMP/DMP32/ARPA (for now) model code.
00040  *
00041  * Author: A cast of thousands, probably.
00042  */
00043 
00044 #ifndef __NGRAM_MODEL_LM3G_H__
00045 #define __NGRAM_MODEL_LM3G_H__
00046 
00047 #include <listelem_alloc.h>
00048 
00049 #include "ngram_model_internal.h"
00050 
00054 typedef union {
00055     float32 f;
00056     int32 l;
00057 } lmprob_t;
00058 
00068 typedef struct sorted_entry_s {
00069     lmprob_t val;               
00070     uint16 lower;               
00073     uint16 higher;              
00076 } sorted_entry_t;
00077 
00082 typedef struct {
00083     sorted_entry_t *list;
00084     int32 free;                 
00085 } sorted_list_t;
00086 
00087 #define MAX_SORTED_ENTRIES      65534
00088 
00092 typedef struct unigram_s {
00093     lmprob_t prob1;     
00094     lmprob_t bo_wt1;    
00095     int32 bigrams;      
00096 } unigram_t;
00097 
00101 typedef struct bigram_s bigram_t;
00105 typedef struct trigram_s trigram_t;
00106 
00107 
00108 /*
00109  * To conserve space, bigram info is kept in many tables.  Since the number
00110  * of distinct values << #bigrams, these table indices can be 16-bit values.
00111  * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
00112  * It is supposed to be the index of the first trigram entry for each bigram.
00113  * But such an index cannot be represented in 16-bits, hence the following
00114  * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
00115  * consecutive entries, such that #trigrams in each segment <= 2**16 (the
00116  * corresponding trigram segment).  The bigram_t.trigrams value is then a
00117  * 16-bit relative index within the trigram segment.  A separate table--
00118  * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
00119  */
00120 #define BG_SEG_SZ       512     /* chosen so that #trigram/segment <= 2**16 */
00121 #define LOG_BG_SEG_SZ   9
00122 
00130 typedef struct tginfo_s {
00131     int32 w1;                   
00133     int32 n_tg;                 
00134     int32 bowt;                 
00135     int32 used;                 
00136     trigram_t *tg;              
00137     struct tginfo_s *next;      
00138 } tginfo_t;
00139 
00143 typedef struct lm3g_model_s {
00144     unigram_t *unigrams;
00145     bigram_t *bigrams;
00146     trigram_t *trigrams;
00147     lmprob_t *prob2;         
00148     int32 n_prob2;           
00149     lmprob_t *bo_wt2;        
00150     int32 n_bo_wt2;          
00151     lmprob_t *prob3;         
00152     int32 n_prob3;           
00153     int32 *tseg_base;    
00155     tginfo_t **tginfo;   
00157     listelem_alloc_t *le; 
00158 } lm3g_model_t;
00159 
00160 void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g);
00161 void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g);
00162 void lm3g_apply_weights(ngram_model_t *base,
00163                         lm3g_model_t *lm3g,
00164                         float32 lw, float32 wip, float32 uw);
00165 int32 lm3g_add_ug(ngram_model_t *base,
00166                   lm3g_model_t *lm3g, int32 wid, int32 lweight);
00167 
00168 
00173 void init_sorted_list(sorted_list_t *l);
00174 void free_sorted_list(sorted_list_t *l);
00175 lmprob_t *vals_in_sorted_list(sorted_list_t *l);
00176 int32 sorted_id(sorted_list_t * l, int32 *val);
00177 
00178 #endif /* __NGRAM_MODEL_LM3G_H__ */

Generated on Thu Jan 6 2011 for SphinxBase by  doxygen 1.7.1