spandsp 0.0.6
|
00001 /* 00002 * SpanDSP - a series of DSP components for telephony 00003 * 00004 * time_scale.h - Time scaling for linear speech data 00005 * 00006 * Written by Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2004 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU Lesser General Public License version 2.1, 00014 * as published by the Free Software Foundation. 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Lesser General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU Lesser General Public 00022 * License along with this program; if not, write to the Free Software 00023 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00024 */ 00025 00026 #if !defined(_SPANDSP_TIME_SCALE_H_) 00027 #define _SPANDSP_TIME_SCALE_H_ 00028 00029 /*! \page time_scale_page Time scaling speech 00030 \section time_scale_page_sec_1 What does it do? 00031 The time scaling module allows speech files to be played back at a 00032 different speed from the speed at which they were recorded. If this 00033 were done by simply speeding up or slowing down replay, the pitch of 00034 the voice would change, and sound very odd. This module keeps the pitch 00035 of the voice at its original level. 00036 00037 The speed of the voice may be altered over a wide range. However, the practical 00038 useful rates are between about half normal speed and twice normal speed. 00039 00040 \section time_scale_page_sec_2 How does it work? 00041 The time scaling module is based on the Pointer Interval Controlled 00042 OverLap and Add (PICOLA) method, developed by Morita Naotaka. 00043 Mikio Ikeda has an excellent web page on this subject at 00044 http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html 00045 There is also working code there. This implementation uses 00046 exactly the same algorithms, but the code is a complete rewrite. 00047 Mikio's code batch processes files. This version works incrementally 00048 on streams, and allows multiple streams to be processed concurrently. 00049 00050 \section time_scale_page_sec_3 How do I used it? 00051 The output buffer must be big enough to hold the maximum number of samples which 00052 could result from the data in the input buffer, which is: 00053 00054 input_len*playout_rate + sample_rate/TIME_SCALE_MIN_PITCH + 1 00055 */ 00056 00057 /*! Audio time scaling descriptor. */ 00058 typedef struct time_scale_state_s time_scale_state_t; 00059 00060 #if defined(__cplusplus) 00061 extern "C" 00062 { 00063 #endif 00064 00065 /*! Initialise a time scale context. This must be called before the first 00066 use of the context, to initialise its contents. 00067 \brief Initialise a time scale context. 00068 \param s The time scale context. 00069 \param sample_rate The sample rate of the signal. 00070 \param playout_rate The ratio between the output speed and the input speed. 00071 \return A pointer to the context, or NULL if there was a problem. */ 00072 SPAN_DECLARE(time_scale_state_t *) time_scale_init(time_scale_state_t *s, int sample_rate, float playout_rate); 00073 00074 /*! \brief Release a time scale context. 00075 \param s The time scale context. 00076 \return 0 for OK, else -1. */ 00077 SPAN_DECLARE(int) time_scale_release(time_scale_state_t *s); 00078 00079 /*! \brief Free a time scale context. 00080 \param s The time scale context. 00081 \return 0 for OK, else -1. */ 00082 SPAN_DECLARE(int) time_scale_free(time_scale_state_t *s); 00083 00084 /*! Change the time scale rate. 00085 \brief Change the time scale rate. 00086 \param s The time scale context. 00087 \param playout_rate The ratio between the output speed and the input speed. 00088 \return 0 if changed OK, else -1. */ 00089 SPAN_DECLARE(int) time_scale_rate(time_scale_state_t *s, float playout_rate); 00090 00091 /*! Find the maximum possible samples which could result from scaling the specified 00092 number of input samples, at the current playback rate. 00093 \brief Find the maximum possible output samples. 00094 \param s The time scale context. 00095 \param input_len The number of input samples. 00096 \return The maximum possible output samples. */ 00097 SPAN_DECLARE(int) time_scale_max_output_len(time_scale_state_t *s, int input_len); 00098 00099 /*! Time scale a chunk of audio samples. 00100 \brief Time scale a chunk of audio samples. 00101 \param s The time scale context. 00102 \param out The output audio sample buffer. This must be large enough to accept 00103 the longest possible result from processing the input data. See the 00104 algorithm documentation for how the longest possible result may be calculated. 00105 \param in The input audio sample buffer. 00106 \param len The number of input samples. 00107 \return The number of output samples. 00108 */ 00109 SPAN_DECLARE(int) time_scale(time_scale_state_t *s, int16_t out[], int16_t in[], int len); 00110 00111 #if defined(__cplusplus) 00112 } 00113 #endif 00114 00115 #endif 00116 /*- End of file ------------------------------------------------------------*/