• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/sphinx_adtools/cont_adseg.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * cont_adseg.c -- Continuously listen and segment input speech into utterances.
00039  * 
00040  * HISTORY
00041  * 
00042  * 27-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00043  *              Created.
00044  */
00045 
00046 #include <stdio.h>
00047 #include <stdlib.h>
00048 #include <string.h>
00049 #include <assert.h>
00050 #include <math.h>
00051 
00052 #include <prim_type.h>
00053 #include <ad.h>
00054 #include <cont_ad.h>
00055 #include <err.h>
00056 
00057 /*
00058  * Segment raw A/D input data into utterances whenever silence region of given
00059  * duration is encountered.
00060  * Utterances are written to files named 0001.raw, 0002.raw, 0003.raw, etc.
00061  */
00062 int
00063 main(int32 argc, char **argv)
00064 {
00065     ad_rec_t *ad;
00066     cont_ad_t *cont;
00067     int32 k, uttno, ts, uttlen, sps, endsilsamples;
00068     float endsil;
00069     int16 buf[4096];
00070     FILE *fp;
00071     char file[1024];
00072 
00073     if ((argc != 3) ||
00074         (sscanf(argv[1], "%d", &sps) != 1) ||
00075         (sscanf(argv[2], "%f", &endsil) != 1) || (endsil <= 0.0)) {
00076         E_FATAL("Usage: %s <sampling-rate> <utt-end-sil(sec)>\n", argv[0]);
00077     }
00078 
00079     /* Convert desired min. inter-utterance silence duration to #samples */
00080     endsilsamples = (int32) (endsil * sps);
00081 
00082     /* Open raw A/D device */
00083     if ((ad = ad_open_sps(sps)) == NULL)
00084         E_FATAL("ad_open_sps(%d) failed\n", sps);
00085 
00086     /* Associate new continuous listening module with opened raw A/D device */
00087     if ((cont = cont_ad_init(ad, ad_read)) == NULL)
00088         E_FATAL("cont_ad_init failed\n");
00089 
00090     /* Calibrate continuous listening for background noise/silence level */
00091     printf("Calibrating ...");
00092     fflush(stdout);
00093     ad_start_rec(ad);
00094     if (cont_ad_calib(cont) < 0)
00095         printf(" failed\n");
00096     else
00097         printf(" done\n");
00098 
00099     /* Forever listen for utterances */
00100     printf("You may speak now\n");
00101     fflush(stdout);
00102     uttno = 0;
00103     for (;;) {
00104         /* Wait for beginning of next utterance; for non-silence data */
00105         while ((k = cont_ad_read(cont, buf, 4096)) == 0);
00106         if (k < 0)
00107             E_FATAL("cont_ad_read failed\n");
00108 
00109         /* Non-silence data received; open and write to new logging file */
00110         uttno++;
00111         sprintf(file, "%04d.raw", uttno);
00112         if ((fp = fopen(file, "wb")) == NULL)
00113             E_FATAL("fopen(%s,wb) failed\n", file);
00114         fwrite(buf, sizeof(int16), k, fp);
00115         uttlen = k;
00116         printf("Utterance %04d, logging to %s\n", uttno, file);
00117 
00118         /* Note current timestamp */
00119         ts = cont->read_ts;
00120 
00121         /* Read utterance data until a gap of at least 1 sec observed */
00122         for (;;) {
00123             if ((k = cont_ad_read(cont, buf, 4096)) < 0)
00124                 E_FATAL("cont_ad_read failed\n");
00125             if (k == 0) {
00126                 /*
00127                  * No speech data available; check current timestamp.  End of
00128                  * utterance if no non-silence data been read for at least 1 sec.
00129                  */
00130                 if ((cont->read_ts - ts) > endsilsamples)
00131                     break;
00132             }
00133             else {
00134                 /* Note timestamp at the end of most recently read speech data */
00135                 ts = cont->read_ts;
00136                 uttlen += k;
00137                 fwrite(buf, sizeof(int16), k, fp);
00138             }
00139         }
00140         fclose(fp);
00141 
00142         printf("\tUtterance %04d = %d samples (%.1fsec)\n\n",
00143                uttno, uttlen, (double) uttlen / (double) sps);
00144     }
00145 
00146     ad_stop_rec(ad);
00147     cont_ad_close(cont);
00148     ad_close(ad);
00149     return 0;
00150 }

Generated on Thu Jan 6 2011 for SphinxBase by  doxygen 1.7.1