/* * Conversion of byte IQ data from RTLSDR device to complex floats. * These exist because the pure Haskell implementations are slow. * Uses SIMD instructions for performance. */ #include <stdio.h> #include <stdint.h> #include <x86intrin.h> /* * Conversion */ void convertC(int num, uint8_t *in, float *out){ int i; for(i=0; i<num; i++){ out[i] = ((float) in[i] - 128.0) * (1/128.0); } } void convertCSSE(int num, uint8_t *in, float *out){ int i; __m128 sub = _mm_set1_ps(128.0); __m128 mul = _mm_set1_ps(1/128.0); for(i=0; i<num; i+=4){ __m128i val = _mm_loadu_si128((__m128i *)(in + i)); __m128i ints = _mm_cvtepu8_epi32(val); __m128 cvtd = _mm_cvtepi32_ps(ints); __m128 res = _mm_mul_ps(_mm_sub_ps(cvtd, sub), mul); _mm_storeu_ps(out + i, res); } } void convertCAVX(int num, uint8_t *in, float *out){ int i; __m256 sub = _mm256_set1_ps(128.0); __m256 mul = _mm256_set1_ps(1/128.0); for(i=0; i<num; i+=8){ __m128i val = _mm_loadu_si128((__m128i *)(in + i)); __m256i ints = _mm256_cvtepu8_epi32(val); __m256 cvtd = _mm256_cvtepi32_ps(ints); __m256 res = _mm256_mul_ps(_mm256_sub_ps(cvtd, sub), mul); _mm256_storeu_ps(out + i, res); } } void convertCBladeRF(int num, int16_t *in, float *out){ int i; for(i=0; i<num; i++){ out[i] = (float)in[i] * (1/2048.0); } } void convertCSSEBladeRF(int num, int16_t *in, float *out){ int i; __m128 mul = _mm_set1_ps(1/2048.0); for(i=0; i<num; i+=4){ __m128i val = _mm_loadu_si128((__m128i *)(in + i)); __m128i ints = _mm_cvtepi16_epi32(val); __m128 cvtd = _mm_cvtepi32_ps(ints); __m128 res = _mm_mul_ps(cvtd, mul); _mm_storeu_ps(out + i, res); } } void convertCAVXBladeRF(int num, int16_t *in, float *out){ int i; __m256 mul = _mm256_set1_ps(1/2048.0); for(i=0; i<num; i+=8){ __m128i val = _mm_loadu_si128((__m128i *)(in + i)); __m256i ints = _mm256_cvtepi16_epi32(val); __m256 cvtd = _mm256_cvtepi32_ps(ints); __m256 res = _mm256_mul_ps(cvtd, mul); _mm256_storeu_ps(out + i, res); } } void convertBladeRFTransmit(int num, float *in, int16_t *out){ int i; for(i=0; i<num; i++){ float val = in[i]; // [-1, 1) val = val + 1; // [0, 2) val = val * 2048; // [0, 4096) int16_t res = (int16_t)val; // [0, 4095] res = res - 2048; // [-2048, 2047] if(res > 2047) res = 2047; if(res < -2048) res = -2048; out[i] = res; } }