• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/cont_ad.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * cont_ad.h -- Continuous A/D listening and silence filtering module.
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * 
00049  * 13-Jul-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00050  *              Added spf and adbufsize to cont_ad_t in order to support variable
00051  *              frame sizes depending on audio sampling rate.
00052  * 
00053  * 30-Jun-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00054  *              Added FILE* argument to cont_ad_powhist_dump().
00055  * 
00056  * 16-Jan-98    Paul Placeway (pwp@cs.cmu.edu) at Carnegie Mellon University
00057  *              Changed to use dB instead of the weird power measure.
00058  *              Added most system parameters to cont_ad_t instead of hardwiring
00059  *              them in cont_ad.c.
00060  *              Added cont_ad_set_params() and cont_ad_get_params().
00061  * 
00062  * 28-Jul-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00063  *              Added cont_ad_t.siglvl.
00064  * 
00065  * 27-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00066  *              Added the option for cont_ad_read to return -1 on EOF.
00067  * 
00068  * 21-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00069  *              Added cont_ad_set_thresh().
00070  * 
00071  * 20-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00072  *              Separated thresholds for speech and silence.
00073  * 
00074  * 17-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00075  *              Created, based loosely on Steve Reed's original implementation.
00076  */
00077 
00078 
00079 #ifndef _CONT_AD_H_
00080 #define _CONT_AD_H_
00081 
00082 /* Win32/WinCE DLL gunk */
00083 #include <sphinxbase_export.h>
00084 #include <prim_type.h>
00085 #include <ad.h>
00086 
00114 #include <stdio.h>
00115 
00116 
00117 #ifdef __cplusplus
00118 extern "C" {
00119 #endif
00120 #if 0
00121 /* Fool Emacs. */
00122 }
00123 #endif
00124 
00125 /* States of continuous listening module */
00126 #define CONT_AD_STATE_SIL       0
00127 #define CONT_AD_STATE_SPEECH    1
00128 
00129 
00135 typedef struct spseg_s {
00136     int32 startfrm;     
00137     int32 nfrm;         
00138     struct spseg_s *next;       
00139 } spseg_t;
00140 
00141 
00151 typedef struct {
00152     /* Function to be called for obtaining A/D data (see prototype for ad_read in ad.h) */
00153     int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max);
00154     ad_rec_t *ad;       
00156     int32 rawmode;      
00158     int16 *adbuf;       
00160     /* **************************************************************************
00161      * state, read_ts, and siglvl are provided for READ-ONLY use by client
00162      * applications, and are updated by calls to cont_ad_read() (see below).  All
00163      * other variables should be left alone.
00164      */
00165     int32 state;        
00167     int32 read_ts;      
00171     int32 seglen;       
00175     int32 siglvl;       
00178     /* ************************************************************************ */
00179     
00180     int32 sps;          
00183     int32 eof;          
00185     int32 spf;          
00186     int32 adbufsize;    
00187     int32 prev_sample;  
00188     int32 headfrm;      
00189     int32 n_frm;        
00190     int32 n_sample;     
00191     int32 tot_frm;      
00192     int32 noise_level;  
00194     int32 *pow_hist;    
00195     char *frm_pow;      
00197     int32 auto_thresh;  
00198     int32 delta_sil;    
00199     int32 delta_speech; 
00200     int32 min_noise;    
00201     int32 max_noise;    
00202     int32 winsize;      
00203     int32 speech_onset; 
00204     int32 sil_onset;    
00205     int32 leader;       
00206     int32 trailer;      
00208     int32 thresh_speech;
00210     int32 thresh_sil;   
00212     int32 thresh_update;
00213     float32 adapt_rate; 
00217     int32 tail_state;   
00220     int32 win_startfrm; 
00221     int32 win_validfrm; 
00222     int32 n_other;      
00224     spseg_t *spseg_head;
00225     spseg_t *spseg_tail;
00227     FILE *rawfp;        
00231     FILE *logfp;        
00236     int32 n_calib_frame; 
00237 } cont_ad_t;
00238 
00239 
00255 SPHINXBASE_EXPORT
00256 cont_ad_t *cont_ad_init (ad_rec_t *ad,  
00257                          int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max)
00261                          );
00262 
00269 SPHINXBASE_EXPORT
00270 cont_ad_t *cont_ad_init_rawmode (ad_rec_t *ad,
00271                                  int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max));
00272 
00273 
00302 SPHINXBASE_EXPORT
00303 int32 cont_ad_read (cont_ad_t *r,       
00304                     int16 *buf,         
00307                     int32 max           
00310         );
00311 
00315 SPHINXBASE_EXPORT
00316 int32 cont_ad_buffer_space(cont_ad_t *r);
00317 
00330 SPHINXBASE_EXPORT
00331 int32 cont_ad_calib (cont_ad_t *cont    
00332                      );
00333 
00345 SPHINXBASE_EXPORT
00346 int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max); 
00347 
00359 SPHINXBASE_EXPORT
00360 int32 cont_ad_calib_size(cont_ad_t *r);
00361 
00374 SPHINXBASE_EXPORT
00375 int32 cont_ad_set_thresh (cont_ad_t *cont,      
00376                           int32 sil,    
00377                           int32 sp      
00378                           );
00379 
00380 
00388 SPHINXBASE_EXPORT
00389 int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech,
00390                           int32 min_noise, int32 max_noise,
00391                           int32 winsize, int32 speech_onset, int32 sil_onset,
00392                           int32 leader, int32 trailer,
00393                           float32 adapt_rate);
00394 
00402 SPHINXBASE_EXPORT
00403 int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech,
00404                           int32 *min_noise, int32 *max_noise,
00405                           int32 *winsize, int32 *speech_onset, int32 *sil_onset,
00406                           int32 *leader, int32 *trailer,
00407                           float32 *adapt_rate);
00408 
00413 SPHINXBASE_EXPORT
00414 int32 cont_ad_reset (cont_ad_t *cont);  /* In: Object pointer from cont_ad_init */
00415 
00416 
00420 SPHINXBASE_EXPORT
00421 int32 cont_ad_close (cont_ad_t *cont);  /* In: Object pointer from cont_ad_init */
00422 
00423 
00427 SPHINXBASE_EXPORT
00428 void cont_ad_powhist_dump (FILE *fp, cont_ad_t *cont);
00429 
00430 
00435 SPHINXBASE_EXPORT
00436 int32 cont_ad_detach (cont_ad_t *c);
00437 
00438 
00444 SPHINXBASE_EXPORT
00445 int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32));
00446 
00447 
00459 SPHINXBASE_EXPORT
00460 int32 cont_ad_set_rawfp (cont_ad_t *c,  /* The cont_ad object being addressed */
00461                          FILE *fp);     /* File to which raw audio data is to
00462                                            be dumped; NULL to stop dumping. */
00463 
00471 SPHINXBASE_EXPORT
00472 int32 cont_ad_set_logfp (cont_ad_t *c,  /* The cont_ad object being addressed */
00473                          FILE *fp);     /* File to which logs are written;
00474                                            NULL to stop logging. */
00475 
00484 SPHINXBASE_EXPORT
00485 int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech);
00486 
00487 #ifdef __cplusplus
00488 }
00489 #endif
00490 
00491 
00492 #endif

Generated on Thu Jan 6 2011 for SphinxBase by  doxygen 1.7.1