• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/fe.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights 
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /*
00039  * fe.h
00040  * 
00041  * $Log: fe.h,v $
00042  * Revision 1.11  2005/02/05 02:15:02  egouvea
00043  * Removed fe_process(), never used
00044  *
00045  * Revision 1.10  2004/12/10 16:48:55  rkm
00046  * Added continuous density acoustic model handling
00047  *
00048  *
00049  */
00050 
00051 #if defined(WIN32) && !defined(GNUWINCE)
00052 #define srand48(x) srand(x)
00053 #define lrand48() rand()
00054 #endif
00055 
00056 #ifndef _NEW_FE_H_
00057 #define _NEW_FE_H_
00058 
00059 /* Win32/WinCE DLL gunk */
00060 #include <sphinxbase_export.h>
00061 
00062 #include <sphinx_config.h>
00063 #include <cmd_ln.h>
00064 #include <fixpoint.h>
00065 
00066 #ifdef __cplusplus
00067 extern "C" {
00068 #endif
00069 #if 0
00070 /* Fool Emacs. */
00071 }
00072 #endif
00073 
00074 #ifdef WORDS_BIGENDIAN
00075 #define NATIVE_ENDIAN "big"
00076 #else
00077 #define NATIVE_ENDIAN "little"
00078 #endif
00079 
00081 #define DEFAULT_SAMPLING_RATE 16000
00082 
00083 #define DEFAULT_FRAME_RATE 100
00084 
00086 #define DEFAULT_FRAME_SHIFT 160
00087 
00088 #define DEFAULT_WINDOW_LENGTH 0.025625 
00089 
00090 #define DEFAULT_FFT_SIZE 512
00091 
00092 #define DEFAULT_NUM_CEPSTRA 13
00093 
00094 #define DEFAULT_NUM_FILTERS 40
00095 
00096 #define DEFAULT_LOWER_FILT_FREQ 133.33334
00097 
00098 #define DEFAULT_UPPER_FILT_FREQ 6855.4976
00099 
00100 #define DEFAULT_PRE_EMPHASIS_ALPHA 0.97
00101 
00102 #define DEFAULT_WARP_TYPE "inverse_linear"
00103 
00104 #define SEED  -1
00105 
00106 #define waveform_to_cepstral_command_line_macro() \
00107   { "-logspec", \
00108     ARG_BOOLEAN, \
00109     "no", \
00110     "Write out logspectral files instead of cepstra" }, \
00111    \
00112   { "-smoothspec", \
00113     ARG_BOOLEAN, \
00114     "no", \
00115     "Write out cepstral-smoothed logspectral files" }, \
00116    \
00117   { "-transform", \
00118     ARG_STRING, \
00119     "legacy", \
00120     "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \
00121    \
00122   { "-alpha", \
00123     ARG_FLOAT32, \
00124     ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \
00125     "Preemphasis parameter" }, \
00126    \
00127   { "-samprate", \
00128     ARG_FLOAT32, \
00129     ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \
00130     "Sampling rate" }, \
00131    \
00132   { "-frate", \
00133     ARG_INT32, \
00134     ARG_STRINGIFY(DEFAULT_FRAME_RATE), \
00135     "Frame rate" }, \
00136    \
00137   { "-wlen", \
00138     ARG_FLOAT32, \
00139     ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \
00140     "Hamming window length" }, \
00141    \
00142   { "-nfft", \
00143     ARG_INT32, \
00144     ARG_STRINGIFY(DEFAULT_FFT_SIZE), \
00145     "Size of FFT" }, \
00146    \
00147   { "-nfilt", \
00148     ARG_INT32, \
00149     ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \
00150     "Number of filter banks" }, \
00151    \
00152   { "-lowerf", \
00153     ARG_FLOAT32, \
00154     ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \
00155     "Lower edge of filters" }, \
00156    \
00157   { "-upperf", \
00158     ARG_FLOAT32, \
00159     ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \
00160     "Upper edge of filters" }, \
00161    \
00162   { "-unit_area", \
00163     ARG_BOOLEAN, \
00164     "yes", \
00165     "Normalize mel filters to unit area" }, \
00166    \
00167   { "-round_filters", \
00168     ARG_BOOLEAN, \
00169     "yes", \
00170     "Round mel filter frequencies to DFT points" }, \
00171    \
00172   { "-ncep", \
00173     ARG_INT32, \
00174     ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \
00175     "Number of cep coefficients" }, \
00176    \
00177   { "-doublebw", \
00178     ARG_BOOLEAN, \
00179     "no", \
00180     "Use double bandwidth filters (same center freq)" }, \
00181    \
00182   { "-lifter", \
00183     ARG_INT32, \
00184     "0", \
00185     "Length of sin-curve for liftering, or 0 for no liftering." }, \
00186    \
00187   { "-input_endian", \
00188     ARG_STRING, \
00189     NATIVE_ENDIAN, \
00190     "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \
00191    \
00192   { "-warp_type", \
00193     ARG_STRING, \
00194     DEFAULT_WARP_TYPE, \
00195     "Warping function type (or shape)" }, \
00196    \
00197   { "-warp_params", \
00198     ARG_STRING, \
00199     NULL, \
00200     "Parameters defining the warping function" }, \
00201    \
00202   { "-dither", \
00203     ARG_BOOLEAN, \
00204     "no", \
00205     "Add 1/2-bit noise" }, \
00206    \
00207   { "-seed", \
00208     ARG_INT32, \
00209     ARG_STRINGIFY(SEED), \
00210     "Seed for random number generator; if less than zero, pick our own" }, \
00211    \
00212   { "-remove_dc", \
00213     ARG_BOOLEAN, \
00214     "no", \
00215     "Remove DC offset from each frame" }, \
00216                                           \
00217   { "-verbose", \
00218     ARG_BOOLEAN, \
00219     "no", \
00220     "Show input filenames" } \
00221   
00222   
00223 #ifdef FIXED_POINT
00224 
00225 typedef fixed32 mfcc_t;
00226 
00228 #define FLOAT2MFCC(x) FLOAT2FIX(x)
00229 
00230 #define MFCC2FLOAT(x) FIX2FLOAT(x)
00231 
00232 #define MFCCMUL(a,b) FIXMUL(a,b)
00233 #define MFCCLN(x,in,out) FIXLN_ANY(x,in,out)
00234 #else /* !FIXED_POINT */
00235 
00237 typedef float32 mfcc_t;
00239 #define FLOAT2MFCC(x) (x)
00240 
00241 #define MFCC2FLOAT(x) (x)
00242 
00243 #define MFCCMUL(a,b) ((a)*(b))
00244 #define MFCCLN(x,in,out) log(x)
00245 #endif /* !FIXED_POINT */
00246 
00250 typedef struct fe_s fe_t;
00251 
00255 enum fe_error_e {
00256         FE_SUCCESS = 0,
00257         FE_OUTPUT_FILE_SUCCESS  = 0,
00258         FE_CONTROL_FILE_ERROR = -1,
00259         FE_START_ERROR = -2,
00260         FE_UNKNOWN_SINGLE_OR_BATCH = -3,
00261         FE_INPUT_FILE_OPEN_ERROR = -4,
00262         FE_INPUT_FILE_READ_ERROR = -5,
00263         FE_MEM_ALLOC_ERROR = -6,
00264         FE_OUTPUT_FILE_WRITE_ERROR = -7,
00265         FE_OUTPUT_FILE_OPEN_ERROR = -8,
00266         FE_ZERO_ENERGY_ERROR = -9,
00267         FE_INVALID_PARAM_ERROR =  -10
00268 };
00269 
00277 SPHINXBASE_EXPORT
00278 fe_t* fe_init_auto(void);
00279 
00287 SPHINXBASE_EXPORT
00288 arg_t const *fe_get_args(void);
00289 
00300 SPHINXBASE_EXPORT
00301 fe_t *fe_init_auto_r(cmd_ln_t *config);
00302 
00310 SPHINXBASE_EXPORT
00311 cmd_ln_t *fe_get_config(fe_t *fe);
00312 
00317 SPHINXBASE_EXPORT
00318 int fe_start_utt(fe_t *fe);
00319 
00332 SPHINXBASE_EXPORT
00333 int fe_get_output_size(fe_t *fe);
00334 
00347 SPHINXBASE_EXPORT
00348 void fe_get_input_size(fe_t *fe, int *out_frame_shift,
00349                        int *out_frame_size);
00350 
00365 SPHINXBASE_EXPORT
00366 int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes);
00367 
00373 SPHINXBASE_EXPORT
00374 fe_t *fe_retain(fe_t *fe);
00375 
00383 SPHINXBASE_EXPORT
00384 int fe_free(fe_t *fe);
00385 
00394 SPHINXBASE_EXPORT
00395 int fe_process_frame(fe_t *fe, int16 const *spch,
00396                      int32 nsamps, mfcc_t *out_cep);
00397 
00445 SPHINXBASE_EXPORT
00446 int fe_process_frames(fe_t *fe,
00447                       int16 const **inout_spch,
00448                       size_t *inout_nsamps,
00449                       mfcc_t **buf_cep,
00450                       int32 *inout_nframes);
00451 
00467 SPHINXBASE_EXPORT
00468 int fe_process_utt(fe_t *fe,  
00469                    int16 const *spch, 
00470                    size_t nsamps, 
00471                    mfcc_t ***cep_block, 
00472                    int32 *nframes 
00473         );
00474 
00478 SPHINXBASE_EXPORT
00479 void fe_free_2d(void *arr);
00480 
00484 SPHINXBASE_EXPORT
00485 int fe_mfcc_to_float(fe_t *fe,
00486                      mfcc_t **input,
00487                      float32 **output,
00488                      int32 nframes);
00489 
00493 SPHINXBASE_EXPORT
00494 int fe_float_to_mfcc(fe_t *fe,
00495                      float32 **input,
00496                      mfcc_t **output,
00497                      int32 nframes);
00498 
00522 SPHINXBASE_EXPORT
00523 int fe_logspec_to_mfcc(fe_t *fe,  
00524                        const mfcc_t *fr_spec, 
00525                        mfcc_t *fr_cep 
00526         );
00527 
00536 SPHINXBASE_EXPORT
00537 int fe_logspec_dct2(fe_t *fe,  
00538                     const mfcc_t *fr_spec, 
00539                     mfcc_t *fr_cep 
00540         );
00541 
00550 SPHINXBASE_EXPORT
00551 int fe_mfcc_dct3(fe_t *fe,  
00552                  const mfcc_t *fr_cep, 
00553                  mfcc_t *fr_spec 
00554         );
00555 
00556 #ifdef __cplusplus
00557 }
00558 #endif
00559 
00560 
00561 #endif

Generated on Thu Jan 6 2011 for SphinxBase by  doxygen 1.7.1