1 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
2 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
16 static inline void volk_32f_s32f_multiply_32f_u_sse(
float* cVector,
const float* aVector,
const float scalar,
unsigned int num_points){
17 unsigned int number = 0;
18 const unsigned int quarterPoints = num_points / 4;
20 float* cPtr = cVector;
21 const float* aPtr = aVector;
23 __m128 aVal, bVal, cVal;
24 bVal = _mm_set_ps1(scalar);
25 for(;number < quarterPoints; number++){
27 aVal = _mm_loadu_ps(aPtr);
29 cVal = _mm_mul_ps(aVal, bVal);
31 _mm_storeu_ps(cPtr,cVal);
37 number = quarterPoints * 4;
38 for(;number < num_points; number++){
39 *cPtr++ = (*aPtr++) * scalar;
45 #include <immintrin.h>
53 static inline void volk_32f_s32f_multiply_32f_u_avx(
float* cVector,
const float* aVector,
const float scalar,
unsigned int num_points){
54 unsigned int number = 0;
55 const unsigned int eighthPoints = num_points / 8;
57 float* cPtr = cVector;
58 const float* aPtr = aVector;
60 __m256 aVal, bVal, cVal;
61 bVal = _mm256_set1_ps(scalar);
62 for(;number < eighthPoints; number++){
64 aVal = _mm256_loadu_ps(aPtr);
66 cVal = _mm256_mul_ps(aVal, bVal);
68 _mm256_storeu_ps(cPtr,cVal);
74 number = eighthPoints * 8;
75 for(;number < num_points; number++){
76 *cPtr++ = (*aPtr++) * scalar;
81 #ifdef LV_HAVE_GENERIC
89 static inline void volk_32f_s32f_multiply_32f_u_generic(
float* cVector,
const float* aVector,
const float scalar,
unsigned int num_points){
90 unsigned int number = 0;
91 const float* inputPtr = aVector;
92 float* outputPtr = cVector;
93 for(number = 0; number < num_points; number++){
94 *outputPtr = (*inputPtr) * scalar;