spandsp
0.0.6
|
00001 /* 00002 * SpanDSP - a series of DSP components for telephony 00003 * 00004 * fast_convert.h - Quick ways to convert floating point numbers to integers 00005 * 00006 * Written by Steve Underwood <steveu@coppice.org> 00007 * 00008 * Copyright (C) 2009 Steve Underwood 00009 * 00010 * All rights reserved. 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU Lesser General Public License version 2.1, 00014 * as published by the Free Software Foundation. 00015 * 00016 * This program is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Lesser General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU Lesser General Public 00022 * License along with this program; if not, write to the Free Software 00023 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00024 */ 00025 00026 #if !defined(_SPANDSP_FAST_CONVERT_H_) 00027 #define _SPANDSP_FAST_CONVERT_H_ 00028 00029 #if defined(__cplusplus) 00030 extern "C" 00031 { 00032 #endif 00033 00034 /* The following code, to handle issues with lrint() and lrintf() on various 00035 * platforms, is adapted from similar code in libsndfile, which is: 00036 * 00037 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com> 00038 * 00039 * This program is free software; you can redistribute it and/or modify 00040 * it under the terms of the GNU Lesser General Public License as published by 00041 * the Free Software Foundation; either version 2.1 of the License, or 00042 * (at your option) any later version. 00043 * 00044 * This program is distributed in the hope that it will be useful, 00045 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00046 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00047 * GNU Lesser General Public License for more details. 00048 */ 00049 00050 /* 00051 * On Intel Pentium processors (especially PIII and probably P4), converting 00052 * from float to int is very slow. To meet the C specs, the code produced by 00053 * most C compilers targeting Pentium needs to change the FPU rounding mode 00054 * before the float to int conversion is performed. 00055 * 00056 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It 00057 * is this flushing of the pipeline which is so slow. 00058 * 00059 * Fortunately the ISO C99 specification defines the functions lrint, lrintf, 00060 * llrint and llrintf which fix this problem as a side effect. 00061 * 00062 * On Unix-like systems, the configure process should have detected the 00063 * presence of these functions. If they weren't found we have to replace them 00064 * here with a standard C cast. 00065 */ 00066 00067 /* 00068 * The C99 prototypes for these functions are as follows: 00069 * 00070 * int rintf(float x); 00071 * int rint(double x); 00072 * long int lrintf(float x); 00073 * long int lrint(double x); 00074 * long long int llrintf(float x); 00075 * long long int llrint(double x); 00076 * 00077 * The presence of the required functions are detected during the configure 00078 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in 00079 * the config file. 00080 */ 00081 00082 #if defined(__CYGWIN__) 00083 #if !defined(__cplusplus) && (__GNUC__ < 4) 00084 /* 00085 * CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but 00086 * they are slow and buggy: 00087 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html 00088 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html 00089 * These replacement functions (pulled from the Public Domain MinGW 00090 * math.h header) replace the native versions. 00091 */ 00092 static __inline__ long int lrint(double x) 00093 { 00094 long int retval; 00095 00096 __asm__ __volatile__ 00097 ( 00098 "fistpl %0" 00099 : "=m" (retval) 00100 : "t" (x) 00101 : "st" 00102 ); 00103 00104 return retval; 00105 } 00106 00107 static __inline__ long int lrintf(float x) 00108 { 00109 long int retval; 00110 00111 __asm__ __volatile__ 00112 ( 00113 "fistpl %0" 00114 : "=m" (retval) 00115 : "t" (x) 00116 : "st" 00117 ); 00118 return retval; 00119 } 00120 #endif 00121 00122 /* The fastest way to convert is the equivalent of lrint() */ 00123 static __inline__ long int lfastrint(double x) 00124 { 00125 long int retval; 00126 00127 __asm__ __volatile__ 00128 ( 00129 "fistpl %0" 00130 : "=m" (retval) 00131 : "t" (x) 00132 : "st" 00133 ); 00134 00135 return retval; 00136 } 00137 00138 static __inline__ long int lfastrintf(float x) 00139 { 00140 long int retval; 00141 00142 __asm__ __volatile__ 00143 ( 00144 "fistpl %0" 00145 : "=m" (retval) 00146 : "t" (x) 00147 : "st" 00148 ); 00149 return retval; 00150 } 00151 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590) 00152 00153 #if defined(__i386__) 00154 /* These routines are guaranteed fast on an i386 machine. Using the built in 00155 lrint() and lrintf() should be similar, but they may not always be enabled. 00156 Sometimes, especially with "-O0", you might get slow calls to routines. */ 00157 static __inline__ long int lfastrint(double x) 00158 { 00159 long int retval; 00160 00161 __asm__ __volatile__ 00162 ( 00163 "fistpl %0" 00164 : "=m" (retval) 00165 : "t" (x) 00166 : "st" 00167 ); 00168 00169 return retval; 00170 } 00171 00172 static __inline__ long int lfastrintf(float x) 00173 { 00174 long int retval; 00175 00176 __asm__ __volatile__ 00177 ( 00178 "fistpl %0" 00179 : "=m" (retval) 00180 : "t" (x) 00181 : "st" 00182 ); 00183 return retval; 00184 } 00185 #elif defined(__x86_64__) 00186 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a 00187 double or float to an int. It looks like the design on the x86_64 took account 00188 of the default behaviour specified for C. */ 00189 static __inline__ long int lfastrint(double x) 00190 { 00191 return (long int) (x); 00192 } 00193 00194 static __inline__ long int lfastrintf(float x) 00195 { 00196 return (long int) (x); 00197 } 00198 #elif defined(__ppc__) || defined(__powerpc__) 00199 static __inline__ long int lfastrint(register double x) 00200 { 00201 int res[2]; 00202 00203 __asm__ __volatile__ 00204 ( 00205 "fctiw %1, %1\n\t" 00206 "stfd %1, %0" 00207 : "=m" (res) /* Output */ 00208 : "f" (x) /* Input */ 00209 : "memory" 00210 ); 00211 00212 return res[1]; 00213 } 00214 00215 static __inline__ long int lfastrintf(register float x) 00216 { 00217 int res[2]; 00218 00219 __asm__ __volatile__ 00220 ( 00221 "fctiw %1, %1\n\t" 00222 "stfd %1, %0" 00223 : "=m" (res) /* Output */ 00224 : "f" (x) /* Input */ 00225 : "memory" 00226 ); 00227 00228 return res[1]; 00229 } 00230 #else 00231 /* Fallback routines, for unrecognised platforms */ 00232 static __inline__ long int lfastrint(double x) 00233 { 00234 return (long int) x; 00235 } 00236 00237 static __inline__ long int lfastrintf(float x) 00238 { 00239 return (long int) x; 00240 } 00241 #endif 00242 00243 #elif defined(_M_IX86) 00244 /* Visual Studio i386 */ 00245 /* 00246 * Win32 doesn't seem to have the lrint() and lrintf() functions. 00247 * Therefore implement inline versions of these functions here. 00248 */ 00249 00250 __inline long int lrint(double x) 00251 { 00252 long int i; 00253 00254 _asm 00255 { 00256 fld x 00257 fistp i 00258 }; 00259 return i; 00260 } 00261 00262 __inline long int lrintf(float x) 00263 { 00264 long int i; 00265 00266 _asm 00267 { 00268 fld x 00269 fistp i 00270 }; 00271 return i; 00272 } 00273 00274 __inline float rintf(float flt) 00275 { 00276 _asm 00277 { fld flt 00278 frndint 00279 } 00280 } 00281 00282 __inline double rint(double dbl) 00283 { 00284 _asm 00285 { 00286 fld dbl 00287 frndint 00288 } 00289 } 00290 00291 __inline long int lfastrint(double x) 00292 { 00293 long int i; 00294 00295 _asm 00296 { 00297 fld x 00298 fistp i 00299 }; 00300 return i; 00301 } 00302 00303 __inline long int lfastrintf(float x) 00304 { 00305 long int i; 00306 00307 _asm 00308 { 00309 fld x 00310 fistp i 00311 }; 00312 return i; 00313 } 00314 #elif defined(_M_X64) 00315 /* Visual Studio x86_64 */ 00316 /* x86_64 machines will do best with a simple assignment. */ 00317 #include <intrin.h> 00318 00319 __inline long int lrint(double x) 00320 { 00321 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) ); 00322 } 00323 00324 __inline long int lrintf(float x) 00325 { 00326 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) ); 00327 } 00328 00329 __inline long int lfastrint(double x) 00330 { 00331 return (long int) (x); 00332 } 00333 00334 __inline long int lfastrintf(float x) 00335 { 00336 return (long int) (x); 00337 } 00338 #elif defined(__MWERKS__) && defined(macintosh) 00339 /* This MacOS 9 solution was provided by Stephane Letz */ 00340 00341 long int __inline__ lfastrint(register double x) 00342 { 00343 long int res[2]; 00344 00345 asm 00346 { 00347 fctiw x, x 00348 stfd x, res 00349 } 00350 return res[1]; 00351 } 00352 00353 long int __inline__ lfastrintf(register float x) 00354 { 00355 long int res[2]; 00356 00357 asm 00358 { 00359 fctiw x, x 00360 stfd x, res 00361 } 00362 return res[1]; 00363 } 00364 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) 00365 /* For Apple Mac OS/X - do recent versions still need this? */ 00366 00367 static __inline__ long int lfastrint(register double x) 00368 { 00369 int res[2]; 00370 00371 __asm__ __volatile__ 00372 ( 00373 "fctiw %1, %1\n\t" 00374 "stfd %1, %0" 00375 : "=m" (res) /* Output */ 00376 : "f" (x) /* Input */ 00377 : "memory" 00378 ); 00379 00380 return res[1]; 00381 } 00382 00383 static __inline__ long int lfastrintf(register float x) 00384 { 00385 int res[2]; 00386 00387 __asm__ __volatile__ 00388 ( 00389 "fctiw %1, %1\n\t" 00390 "stfd %1, %0" 00391 : "=m" (res) /* Output */ 00392 : "f" (x) /* Input */ 00393 : "memory" 00394 ); 00395 00396 return res[1]; 00397 } 00398 #else 00399 /* There is nothing else to do, but use a simple casting operation, instead of a real 00400 rint() type function. Since we are only trying to use rint() to speed up conversions, 00401 the accuracy issues related to changing the rounding scheme are of little concern 00402 to us. */ 00403 00404 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun) 00405 #warning "No usable lrint() and lrintf() functions available." 00406 #warning "Replacing these functions with a simple C cast." 00407 #endif 00408 00409 static __inline__ long int lrint(double x) 00410 { 00411 return (long int) (x); 00412 } 00413 00414 static __inline__ long int lrintf(float x) 00415 { 00416 return (long int) (x); 00417 } 00418 00419 static __inline__ long int lfastrint(double x) 00420 { 00421 return (long int) (x); 00422 } 00423 00424 static __inline__ long int lfastrintf(float x) 00425 { 00426 return (long int) (x); 00427 } 00428 #endif 00429 00430 #if defined(__cplusplus) 00431 } 00432 #endif 00433 00434 #endif 00435 00436 /*- End of file ------------------------------------------------------------*/