71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
79 #include <immintrin.h>
84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
89 const float* complexVectorPtr = (
float*)complexVector;
90 float* magnitudeVectorPtr = magnitudeVector;
92 __m256 cplxValue1, cplxValue2, result;
94 for (; number < eighthPoints; number++) {
95 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
98 _mm256_storeu_ps(magnitudeVectorPtr, result);
100 complexVectorPtr += 16;
101 magnitudeVectorPtr += 8;
104 number = eighthPoints * 8;
105 for (; number < num_points; number++) {
106 float val1Real = *complexVectorPtr++;
107 float val1Imag = *complexVectorPtr++;
108 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
115 #include <pmmintrin.h>
120 unsigned int num_points)
122 unsigned int number = 0;
123 const unsigned int quarterPoints = num_points / 4;
125 const float* complexVectorPtr = (
float*)complexVector;
126 float* magnitudeVectorPtr = magnitudeVector;
128 __m128 cplxValue1, cplxValue2, result;
129 for (; number < quarterPoints; number++) {
130 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
131 complexVectorPtr += 4;
133 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
134 complexVectorPtr += 4;
137 _mm_storeu_ps(magnitudeVectorPtr, result);
138 magnitudeVectorPtr += 4;
141 number = quarterPoints * 4;
142 for (; number < num_points; number++) {
143 float val1Real = *complexVectorPtr++;
144 float val1Imag = *complexVectorPtr++;
145 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
153 #include <xmmintrin.h>
157 unsigned int num_points)
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
162 const float* complexVectorPtr = (
float*)complexVector;
163 float* magnitudeVectorPtr = magnitudeVector;
165 __m128 cplxValue1, cplxValue2, result;
167 for (; number < quarterPoints; number++) {
168 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169 complexVectorPtr += 4;
171 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172 complexVectorPtr += 4;
175 _mm_storeu_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
179 number = quarterPoints * 4;
180 for (; number < num_points; number++) {
181 float val1Real = *complexVectorPtr++;
182 float val1Imag = *complexVectorPtr++;
183 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
189 #ifdef LV_HAVE_GENERIC
193 unsigned int num_points)
195 const float* complexVectorPtr = (
float*)complexVector;
196 float* magnitudeVectorPtr = magnitudeVector;
197 unsigned int number = 0;
198 for (number = 0; number < num_points; number++) {
199 const float real = *complexVectorPtr++;
200 const float imag = *complexVectorPtr++;
201 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
208 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
209 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
211 #include <inttypes.h>
216 #include <immintrin.h>
221 unsigned int num_points)
223 unsigned int number = 0;
224 const unsigned int eighthPoints = num_points / 8;
226 const float* complexVectorPtr = (
float*)complexVector;
227 float* magnitudeVectorPtr = magnitudeVector;
229 __m256 cplxValue1, cplxValue2, result;
230 for (; number < eighthPoints; number++) {
231 cplxValue1 = _mm256_load_ps(complexVectorPtr);
232 complexVectorPtr += 8;
234 cplxValue2 = _mm256_load_ps(complexVectorPtr);
235 complexVectorPtr += 8;
238 _mm256_store_ps(magnitudeVectorPtr, result);
239 magnitudeVectorPtr += 8;
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 float val1Real = *complexVectorPtr++;
245 float val1Imag = *complexVectorPtr++;
246 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
253 #include <pmmintrin.h>
258 unsigned int num_points)
260 unsigned int number = 0;
261 const unsigned int quarterPoints = num_points / 4;
263 const float* complexVectorPtr = (
float*)complexVector;
264 float* magnitudeVectorPtr = magnitudeVector;
266 __m128 cplxValue1, cplxValue2, result;
267 for (; number < quarterPoints; number++) {
268 cplxValue1 = _mm_load_ps(complexVectorPtr);
269 complexVectorPtr += 4;
271 cplxValue2 = _mm_load_ps(complexVectorPtr);
272 complexVectorPtr += 4;
275 _mm_store_ps(magnitudeVectorPtr, result);
276 magnitudeVectorPtr += 4;
279 number = quarterPoints * 4;
280 for (; number < num_points; number++) {
281 float val1Real = *complexVectorPtr++;
282 float val1Imag = *complexVectorPtr++;
283 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
291 #include <xmmintrin.h>
295 unsigned int num_points)
297 unsigned int number = 0;
298 const unsigned int quarterPoints = num_points / 4;
300 const float* complexVectorPtr = (
float*)complexVector;
301 float* magnitudeVectorPtr = magnitudeVector;
303 __m128 cplxValue1, cplxValue2, result;
304 for (; number < quarterPoints; number++) {
305 cplxValue1 = _mm_load_ps(complexVectorPtr);
306 complexVectorPtr += 4;
308 cplxValue2 = _mm_load_ps(complexVectorPtr);
309 complexVectorPtr += 4;
312 _mm_store_ps(magnitudeVectorPtr, result);
313 magnitudeVectorPtr += 4;
316 number = quarterPoints * 4;
317 for (; number < num_points; number++) {
318 float val1Real = *complexVectorPtr++;
319 float val1Imag = *complexVectorPtr++;
320 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
327 #include <arm_neon.h>
331 unsigned int num_points)
333 unsigned int number = 0;
334 const unsigned int quarterPoints = num_points / 4;
336 const float* complexVectorPtr = (
float*)complexVector;
337 float* magnitudeVectorPtr = magnitudeVector;
339 float32x4x2_t cmplx_val;
341 for (; number < quarterPoints; number++) {
342 cmplx_val = vld2q_f32(complexVectorPtr);
343 complexVectorPtr += 8;
346 vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
348 vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
351 vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
353 vst1q_f32(magnitudeVectorPtr, result);
354 magnitudeVectorPtr += 4;
357 number = quarterPoints * 4;
358 for (; number < num_points; number++) {
359 float val1Real = *complexVectorPtr++;
360 float val1Imag = *complexVectorPtr++;
361 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
367 #ifdef LV_HAVE_GENERIC
370 float* magnitudeVector,
const lv_32fc_t* complexVector,
unsigned int num_points)
372 const float* complexVectorPtr = (
float*)complexVector;
373 float* magnitudeVectorPtr = magnitudeVector;
374 unsigned int number = 0;
375 for (number = 0; number < num_points; number++) {
376 const float real = *complexVectorPtr++;
377 const float imag = *complexVectorPtr++;
378 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
static void volk_32fc_magnitude_squared_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:369
static void volk_32fc_magnitude_squared_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:329
static void volk_32fc_magnitude_squared_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:82
static void volk_32fc_magnitude_squared_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:256
static void volk_32fc_magnitude_squared_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:293
static void volk_32fc_magnitude_squared_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:219
static void volk_32fc_magnitude_squared_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:155
static void volk_32fc_magnitude_squared_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:191
static void volk_32fc_magnitude_squared_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:118
static __m256 _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:73
float complex lv_32fc_t
Definition: volk_complex.h:65
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:51
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:32