Vector Optimized Library of Kernels  2.5.0
Architecture-tuned implementations of math kernels
volk_32f_convert_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
66 #ifndef INCLUDED_volk_32f_convert_64f_u_H
67 #define INCLUDED_volk_32f_convert_64f_u_H
68 
69 #include <inttypes.h>
70 #include <stdio.h>
71 
72 #ifdef LV_HAVE_AVX
73 #include <immintrin.h>
74 
75 static inline void volk_32f_convert_64f_u_avx(double* outputVector,
76  const float* inputVector,
77  unsigned int num_points)
78 {
79  unsigned int number = 0;
80 
81  const unsigned int quarterPoints = num_points / 4;
82 
83  const float* inputVectorPtr = (const float*)inputVector;
84  double* outputVectorPtr = outputVector;
85  __m256d ret;
86  __m128 inputVal;
87 
88  for (; number < quarterPoints; number++) {
89  inputVal = _mm_loadu_ps(inputVectorPtr);
90  inputVectorPtr += 4;
91 
92  ret = _mm256_cvtps_pd(inputVal);
93  _mm256_storeu_pd(outputVectorPtr, ret);
94 
95  outputVectorPtr += 4;
96  }
97 
98  number = quarterPoints * 4;
99  for (; number < num_points; number++) {
100  outputVector[number] = (double)(inputVector[number]);
101  }
102 }
103 
104 #endif /* LV_HAVE_AVX */
105 
106 #ifdef LV_HAVE_SSE2
107 #include <emmintrin.h>
108 
109 static inline void volk_32f_convert_64f_u_sse2(double* outputVector,
110  const float* inputVector,
111  unsigned int num_points)
112 {
113  unsigned int number = 0;
114 
115  const unsigned int quarterPoints = num_points / 4;
116 
117  const float* inputVectorPtr = (const float*)inputVector;
118  double* outputVectorPtr = outputVector;
119  __m128d ret;
120  __m128 inputVal;
121 
122  for (; number < quarterPoints; number++) {
123  inputVal = _mm_loadu_ps(inputVectorPtr);
124  inputVectorPtr += 4;
125 
126  ret = _mm_cvtps_pd(inputVal);
127 
128  _mm_storeu_pd(outputVectorPtr, ret);
129  outputVectorPtr += 2;
130 
131  inputVal = _mm_movehl_ps(inputVal, inputVal);
132 
133  ret = _mm_cvtps_pd(inputVal);
134 
135  _mm_storeu_pd(outputVectorPtr, ret);
136  outputVectorPtr += 2;
137  }
138 
139  number = quarterPoints * 4;
140  for (; number < num_points; number++) {
141  outputVector[number] = (double)(inputVector[number]);
142  }
143 }
144 #endif /* LV_HAVE_SSE2 */
145 
146 
147 #ifdef LV_HAVE_GENERIC
148 
149 static inline void volk_32f_convert_64f_generic(double* outputVector,
150  const float* inputVector,
151  unsigned int num_points)
152 {
153  double* outputVectorPtr = outputVector;
154  const float* inputVectorPtr = inputVector;
155  unsigned int number = 0;
156 
157  for (number = 0; number < num_points; number++) {
158  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
159  }
160 }
161 #endif /* LV_HAVE_GENERIC */
162 
163 
164 #endif /* INCLUDED_volk_32f_convert_64f_u_H */
165 
166 
167 #ifndef INCLUDED_volk_32f_convert_64f_a_H
168 #define INCLUDED_volk_32f_convert_64f_a_H
169 
170 #include <inttypes.h>
171 #include <stdio.h>
172 
173 #ifdef LV_HAVE_AVX
174 #include <immintrin.h>
175 
176 static inline void volk_32f_convert_64f_a_avx(double* outputVector,
177  const float* inputVector,
178  unsigned int num_points)
179 {
180  unsigned int number = 0;
181 
182  const unsigned int quarterPoints = num_points / 4;
183 
184  const float* inputVectorPtr = (const float*)inputVector;
185  double* outputVectorPtr = outputVector;
186  __m256d ret;
187  __m128 inputVal;
188 
189  for (; number < quarterPoints; number++) {
190  inputVal = _mm_load_ps(inputVectorPtr);
191  inputVectorPtr += 4;
192 
193  ret = _mm256_cvtps_pd(inputVal);
194  _mm256_store_pd(outputVectorPtr, ret);
195 
196  outputVectorPtr += 4;
197  }
198 
199  number = quarterPoints * 4;
200  for (; number < num_points; number++) {
201  outputVector[number] = (double)(inputVector[number]);
202  }
203 }
204 #endif /* LV_HAVE_AVX */
205 
206 #ifdef LV_HAVE_SSE2
207 #include <emmintrin.h>
208 
209 static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
210  const float* inputVector,
211  unsigned int num_points)
212 {
213  unsigned int number = 0;
214 
215  const unsigned int quarterPoints = num_points / 4;
216 
217  const float* inputVectorPtr = (const float*)inputVector;
218  double* outputVectorPtr = outputVector;
219  __m128d ret;
220  __m128 inputVal;
221 
222  for (; number < quarterPoints; number++) {
223  inputVal = _mm_load_ps(inputVectorPtr);
224  inputVectorPtr += 4;
225 
226  ret = _mm_cvtps_pd(inputVal);
227 
228  _mm_store_pd(outputVectorPtr, ret);
229  outputVectorPtr += 2;
230 
231  inputVal = _mm_movehl_ps(inputVal, inputVal);
232 
233  ret = _mm_cvtps_pd(inputVal);
234 
235  _mm_store_pd(outputVectorPtr, ret);
236  outputVectorPtr += 2;
237  }
238 
239  number = quarterPoints * 4;
240  for (; number < num_points; number++) {
241  outputVector[number] = (double)(inputVector[number]);
242  }
243 }
244 #endif /* LV_HAVE_SSE2 */
245 
246 
247 #ifdef LV_HAVE_GENERIC
248 
249 static inline void volk_32f_convert_64f_a_generic(double* outputVector,
250  const float* inputVector,
251  unsigned int num_points)
252 {
253  double* outputVectorPtr = outputVector;
254  const float* inputVectorPtr = inputVector;
255  unsigned int number = 0;
256 
257  for (number = 0; number < num_points; number++) {
258  *outputVectorPtr++ = ((double)(*inputVectorPtr++));
259  }
260 }
261 #endif /* LV_HAVE_GENERIC */
262 
263 
264 #endif /* INCLUDED_volk_32f_convert_64f_a_H */
static void volk_32f_convert_64f_a_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:249
static void volk_32f_convert_64f_u_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:109
static void volk_32f_convert_64f_a_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:176
static void volk_32f_convert_64f_a_sse2(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:209
static void volk_32f_convert_64f_generic(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:149
static void volk_32f_convert_64f_u_avx(double *outputVector, const float *inputVector, unsigned int num_points)
Definition: volk_32f_convert_64f.h:75