SphinxBase
0.6
Main Page
Related Pages
Data Structures
Files
File List
Globals
sphinx_lm_convert.c
Go to the documentation of this file.
1
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
/* ====================================================================
3
* Copyright (c) 2009 Carnegie Mellon University. All rights
4
* reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
*
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
*
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in
15
* the documentation and/or other materials provided with the
16
* distribution.
17
*
18
* This work was supported in part by funding from the Defense Advanced
19
* Research Projects Agency and the National Science Foundation of the
20
* United States of America, and the CMU Sphinx Speech Consortium.
21
*
22
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
*
34
* ====================================================================
35
*
36
*/
41
#include <
sphinxbase/logmath.h
>
42
#include <
sphinxbase/ngram_model.h
>
43
#include <
sphinxbase/cmd_ln.h
>
44
#include <
sphinxbase/ckd_alloc.h
>
45
#include <
sphinxbase/err.h
>
46
#include <
sphinxbase/pio.h
>
47
#include <
sphinxbase/strfuncs.h
>
48
49
#include <stdio.h>
50
#include <string.h>
51
#include <math.h>
52
53
static
const
arg_t
defn[] = {
54
{
"-help"
,
55
ARG_BOOLEAN
,
56
"no"
,
57
"Shows the usage of the tool"
},
58
59
{
"-logbase"
,
60
ARG_FLOAT64
,
61
"1.0001"
,
62
"Base in which all log-likelihoods calculated"
},
63
64
{
"-i"
,
65
REQARG_STRING
,
66
NULL,
67
"Input language model file (required)"
},
68
69
{
"-o"
,
70
REQARG_STRING
,
71
NULL,
72
"Output language model file (required)"
},
73
74
{
"-ifmt"
,
75
ARG_STRING
,
76
NULL,
77
"Input language model format (will guess if not specified)"
},
78
79
{
"-ofmt"
,
80
ARG_STRING
,
81
NULL,
82
"Output language model file (will guess if not specified)"
},
83
84
{
"-ienc"
,
85
ARG_STRING
,
86
NULL,
87
"Input language model text encoding (no conversion done if not specified)"
},
88
89
{
"-oenc"
,
90
ARG_STRING
,
91
"utf8"
,
92
"Output language model text encoding"
},
93
94
{
"-case"
,
95
ARG_STRING
,
96
NULL,
97
"Ether 'lower' or 'upper' - case fold to lower/upper case (NOT UNICODE AWARE)"
},
98
99
{
"-mmap"
,
100
ARG_BOOLEAN
,
101
"no"
,
102
"Use memory-mapped I/O for reading binary LM files"
},
103
104
{
"-debug"
,
105
ARG_INT32
,
106
NULL,
107
"Verbosity level for debugging messages"
108
},
109
110
{ NULL, 0, NULL, NULL }
111
};
112
113
static
void
114
usagemsg(
char
*pgm)
115
{
116
E_INFO
(
"Usage: %s -i <input.lm> \\\n"
, pgm);
117
E_INFOCONT
(
"\t[-ifmt txt] [-ofmt dmp]\n"
);
118
E_INFOCONT
(
"\t-o <output.lm.DMP>\n"
);
119
120
exit(0);
121
}
122
123
124
int
125
main(
int
argc,
char
*argv[])
126
{
127
cmd_ln_t
*
config
;
128
ngram_model_t
*lm = NULL;
129
logmath_t
*lmath;
130
int
itype, otype;
131
char
const
*kase;
132
133
if
((config =
cmd_ln_parse_r
(NULL, defn, argc, argv, TRUE)) == NULL)
134
return
1;
135
136
if
(
cmd_ln_boolean_r
(config,
"-help"
)) {
137
usagemsg(argv[0]);
138
}
139
140
err_set_debug_level
(cmd_ln_int32_r(config,
"-debug"
));
141
142
/* Create log math object. */
143
if
((lmath = logmath_init
144
(cmd_ln_float64_r(config,
"-logbase"
), 0, 0)) == NULL) {
145
E_FATAL
(
"Failed to initialize log math\n"
);
146
}
147
148
if
(
cmd_ln_str_r
(config,
"-i"
) == NULL ||
cmd_ln_str_r
(config,
"-i"
) == NULL) {
149
E_ERROR
(
"Please specify both input and output models\n"
);
150
goto
error_out;
151
}
152
153
154
/* Load the input language model. */
155
if
(
cmd_ln_str_r
(config,
"-ifmt"
)) {
156
if
((itype =
ngram_str_to_type
(
cmd_ln_str_r
(config,
"-ifmt"
)))
157
==
NGRAM_INVALID
) {
158
E_ERROR
(
"Invalid input type %s\n"
,
cmd_ln_str_r
(config,
"-ifmt"
));
159
goto
error_out;
160
}
161
lm =
ngram_model_read
(config,
cmd_ln_str_r
(config,
"-i"
),
162
itype, lmath);
163
}
164
else
{
165
lm =
ngram_model_read
(config,
cmd_ln_str_r
(config,
"-i"
),
166
NGRAM_AUTO
, lmath);
167
}
168
169
/* Guess or set the output language model type. */
170
if
(
cmd_ln_str_r
(config,
"-ofmt"
)) {
171
if
((otype =
ngram_str_to_type
(
cmd_ln_str_r
(config,
"-ofmt"
)))
172
==
NGRAM_INVALID
) {
173
E_ERROR
(
"Invalid output type %s\n"
,
cmd_ln_str_r
(config,
"-ofmt"
));
174
goto
error_out;
175
}
176
}
177
else
{
178
otype =
ngram_file_name_to_type
(
cmd_ln_str_r
(config,
"-o"
));
179
}
180
181
/* Recode the language model if desired. */
182
if
(
cmd_ln_str_r
(config,
"-ienc"
)) {
183
if
(
ngram_model_recode
(lm,
cmd_ln_str_r
(config,
"-ienc"
),
184
cmd_ln_str_r
(config,
"-oenc"
)) != 0) {
185
E_ERROR
(
"Failed to recode language model from %s to %s\n"
,
186
cmd_ln_str_r
(config,
"-ienc"
),
187
cmd_ln_str_r
(config,
"-oenc"
));
188
goto
error_out;
189
}
190
}
191
192
/* Case fold if requested. */
193
if
((kase =
cmd_ln_str_r
(config,
"-case"
))) {
194
if
(0 == strcmp(kase,
"lower"
)) {
195
ngram_model_casefold
(lm, NGRAM_LOWER);
196
}
197
else
if
(0 == strcmp(kase,
"upper"
)) {
198
ngram_model_casefold
(lm, NGRAM_UPPER);
199
}
200
else
{
201
E_ERROR
(
"Unknown value for -case: %s\n"
, kase);
202
goto
error_out;
203
}
204
}
205
206
/* Write the output language model. */
207
if
(
ngram_model_write
(lm,
cmd_ln_str_r
(config,
"-o"
), otype) != 0) {
208
E_ERROR
(
"Failed to write language model in format %s to %s\n"
,
209
ngram_type_to_str
(otype),
cmd_ln_str_r
(config,
"-o"
));
210
goto
error_out;
211
}
212
213
/* That's all folks! */
214
ngram_model_free
(lm);
215
return
0;
216
217
error_out:
218
ngram_model_free
(lm);
219
return
1;
220
}
src
sphinx_lmtools
sphinx_lm_convert.c
Generated by
1.8.1.1