1 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
2 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
17 static inline void volk_32f_s32f_convert_32i_a_avx(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int eighthPoints = num_points / 8;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int32_t* outputVectorPtr = outputVector;
25 float min_val = -2147483647;
26 float max_val = 2147483647;
29 __m256 vScalar = _mm256_set1_ps(scalar);
32 __m256 vmin_val = _mm256_set1_ps(min_val);
33 __m256 vmax_val = _mm256_set1_ps(max_val);
35 for(;number < eighthPoints; number++){
36 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
38 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
39 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
41 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
45 number = eighthPoints * 8;
46 for(; number < num_points; number++){
47 r = inputVector[number] * scalar;
52 outputVector[number] = (
int32_t)(r);
58 #include <emmintrin.h>
66 static inline void volk_32f_s32f_convert_32i_a_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
67 unsigned int number = 0;
69 const unsigned int quarterPoints = num_points / 4;
71 const float* inputVectorPtr = (
const float*)inputVector;
72 int32_t* outputVectorPtr = outputVector;
74 float min_val = -2147483647;
75 float max_val = 2147483647;
78 __m128 vScalar = _mm_set_ps1(scalar);
81 __m128 vmin_val = _mm_set_ps1(min_val);
82 __m128 vmax_val = _mm_set_ps1(max_val);
84 for(;number < quarterPoints; number++){
85 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
87 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
88 intInputVal1 = _mm_cvtps_epi32(inputVal1);
90 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
94 number = quarterPoints * 4;
95 for(; number < num_points; number++){
96 r = inputVector[number] * scalar;
101 outputVector[number] = (
int32_t)(r);
107 #include <xmmintrin.h>
115 static inline void volk_32f_s32f_convert_32i_a_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
116 unsigned int number = 0;
118 const unsigned int quarterPoints = num_points / 4;
120 const float* inputVectorPtr = (
const float*)inputVector;
121 int32_t* outputVectorPtr = outputVector;
123 float min_val = -2147483647;
124 float max_val = 2147483647;
127 __m128 vScalar = _mm_set_ps1(scalar);
129 __m128 vmin_val = _mm_set_ps1(min_val);
130 __m128 vmax_val = _mm_set_ps1(max_val);
134 for(;number < quarterPoints; number++){
135 ret = _mm_load_ps(inputVectorPtr);
138 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
140 _mm_store_ps(outputFloatBuffer, ret);
141 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
142 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
143 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
144 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
147 number = quarterPoints * 4;
148 for(; number < num_points; number++){
149 r = inputVector[number] * scalar;
154 outputVector[number] = (
int32_t)(r);
159 #ifdef LV_HAVE_GENERIC
167 static inline void volk_32f_s32f_convert_32i_a_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
168 int32_t* outputVectorPtr = outputVector;
169 const float* inputVectorPtr = inputVector;
170 unsigned int number = 0;
171 float min_val = -2147483647;
172 float max_val = 2147483647;
175 for(number = 0; number < num_points; number++){
176 r = *inputVectorPtr++ * scalar;
181 *outputVectorPtr++ = (
int32_t)(r);