71 #ifndef INCLUDED_volk_32f_64f_multiply_64f_H
72 #define INCLUDED_volk_32f_64f_multiply_64f_H
77 #ifdef LV_HAVE_GENERIC
81 const double* bVector,
82 unsigned int num_points)
84 double* cPtr = cVector;
85 const float* aPtr = aVector;
86 const double* bPtr = bVector;
87 unsigned int number = 0;
89 for (number = 0; number < num_points; number++) {
90 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
103 #include <immintrin.h>
104 #include <xmmintrin.h>
107 const float* aVector,
108 const double* bVector,
109 unsigned int num_points)
111 unsigned int number = 0;
112 const unsigned int eighth_points = num_points / 8;
114 double* cPtr = cVector;
115 const float* aPtr = aVector;
116 const double* bPtr = bVector;
120 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
121 for (; number < eighth_points; number++) {
123 aVal = _mm256_loadu_ps(aPtr);
124 bVal1 = _mm256_loadu_pd(bPtr);
125 bVal2 = _mm256_loadu_pd(bPtr + 4);
127 aVal1 = _mm256_extractf128_ps(aVal, 0);
128 aVal2 = _mm256_extractf128_ps(aVal, 1);
130 aDbl1 = _mm256_cvtps_pd(aVal1);
131 aDbl2 = _mm256_cvtps_pd(aVal2);
133 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
134 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
136 _mm256_storeu_pd(cPtr, cVal1);
137 _mm256_storeu_pd(cPtr + 4, cVal2);
144 number = eighth_points * 8;
145 for (; number < num_points; number++) {
146 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
155 #include <immintrin.h>
156 #include <xmmintrin.h>
159 const float* aVector,
160 const double* bVector,
161 unsigned int num_points)
163 unsigned int number = 0;
164 const unsigned int eighth_points = num_points / 8;
166 double* cPtr = cVector;
167 const float* aPtr = aVector;
168 const double* bPtr = bVector;
172 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
173 for (; number < eighth_points; number++) {
175 aVal = _mm256_load_ps(aPtr);
176 bVal1 = _mm256_load_pd(bPtr);
177 bVal2 = _mm256_load_pd(bPtr + 4);
179 aVal1 = _mm256_extractf128_ps(aVal, 0);
180 aVal2 = _mm256_extractf128_ps(aVal, 1);
182 aDbl1 = _mm256_cvtps_pd(aVal1);
183 aDbl2 = _mm256_cvtps_pd(aVal2);
185 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
186 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
188 _mm256_store_pd(cPtr, cVal1);
189 _mm256_store_pd(cPtr + 4, cVal2);
196 number = eighth_points * 8;
197 for (; number < num_points; number++) {
198 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
static void volk_32f_64f_multiply_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:79
static void volk_32f_64f_multiply_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:106
static void volk_32f_64f_multiply_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:158