SphinxBase
0.6
Main Page
Related Pages
Data Structures
Files
File List
Globals
lm3g_model.h
1
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* ====================================================================
3
* Copyright (c) 1999-2007 Carnegie Mellon University. All rights
4
* reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
*
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in
15
* the documentation and/or other materials provided with the
16
* distribution.
17
*
18
* This work was supported in part by funding from the Defense Advanced
19
* Research Projects Agency and the National Science Foundation of the
20
* United States of America, and the CMU Sphinx Speech Consortium.
21
*
22
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
*
34
* ====================================================================
35
*
36
*/
37
/*
38
* \file lm3g_model.h Core Sphinx 3-gram code used in
39
* DMP/DMP32/ARPA (for now) model code.
40
*
41
* Author: A cast of thousands, probably.
42
*/
43
44
#ifndef __NGRAM_MODEL_LM3G_H__
45
#define __NGRAM_MODEL_LM3G_H__
46
47
#include "
sphinxbase/listelem_alloc.h
"
48
49
#include "ngram_model_internal.h"
50
54
typedef
union
{
55
float32 f;
56
int32 l;
57
}
lmprob_t
;
58
68
typedef
struct
sorted_entry_s
{
69
lmprob_t
val
;
70
uint16
lower
;
73
uint16
higher
;
76
}
sorted_entry_t
;
77
82
typedef
struct
{
83
sorted_entry_t
*list;
84
int32
free
;
85
}
sorted_list_t
;
86
87
#define MAX_SORTED_ENTRIES 65534
88
92
typedef
struct
unigram_s
{
93
lmprob_t
prob1
;
94
lmprob_t
bo_wt1
;
95
int32
bigrams
;
96
}
unigram_t
;
97
101
typedef
struct
bigram_s
bigram_t
;
105
typedef
struct
trigram_s
trigram_t
;
106
107
108
/*
109
* To conserve space, bigram info is kept in many tables. Since the number
110
* of distinct values << #bigrams, these table indices can be 16-bit values.
111
* prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
112
* It is supposed to be the index of the first trigram entry for each bigram.
113
* But such an index cannot be represented in 16-bits, hence the following
114
* segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
115
* consecutive entries, such that #trigrams in each segment <= 2**16 (the
116
* corresponding trigram segment). The bigram_t.trigrams value is then a
117
* 16-bit relative index within the trigram segment. A separate table--
118
* lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
119
*/
120
#define BG_SEG_SZ 512
/* chosen so that #trigram/segment <= 2**16 */
121
#define LOG_BG_SEG_SZ 9
122
130
typedef
struct
tginfo_s
{
131
int32
w1
;
133
int32
n_tg
;
134
int32
bowt
;
135
int32
used
;
136
trigram_t
*
tg
;
137
struct
tginfo_s
*
next
;
138
}
tginfo_t
;
139
143
typedef
struct
lm3g_model_s
{
144
unigram_t
*unigrams;
145
bigram_t
*bigrams;
146
trigram_t
*trigrams;
147
lmprob_t
*
prob2
;
148
int32
n_prob2
;
149
lmprob_t
*
bo_wt2
;
150
int32
n_bo_wt2
;
151
lmprob_t
*
prob3
;
152
int32
n_prob3
;
153
int32 *
tseg_base
;
155
tginfo_t
**
tginfo
;
157
listelem_alloc_t
*
le
;
158
}
lm3g_model_t
;
159
160
void
lm3g_tginfo_free(
ngram_model_t
*base,
lm3g_model_t
*lm3g);
161
void
lm3g_tginfo_reset(
ngram_model_t
*base,
lm3g_model_t
*lm3g);
162
void
lm3g_apply_weights(
ngram_model_t
*base,
163
lm3g_model_t
*lm3g,
164
float32 lw, float32 wip, float32 uw);
165
int32 lm3g_add_ug(
ngram_model_t
*base,
166
lm3g_model_t
*lm3g, int32 wid, int32 lweight);
167
168
173
void
init_sorted_list(
sorted_list_t
*l);
174
void
free_sorted_list(
sorted_list_t
*l);
175
lmprob_t
*vals_in_sorted_list(
sorted_list_t
*l);
176
int32 sorted_id(
sorted_list_t
* l, int32 *val);
177
178
#endif
/* __NGRAM_MODEL_LM3G_H__ */
src
libsphinxbase
lm
lm3g_model.h
Generated by
1.8.1.1