fix16_fft.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /* Real-input FFT implementation using the libfixmath fix16_t datatype.
  2. * Not the fastest implementation ever, but has a small code size.
  3. *
  4. * Refer to http://www.dspguide.com/ch12/2.htm for information on the
  5. * algorithm.
  6. *
  7. * (c) 2012 Petteri Aimonen <jpa @ kapsi.fi>
  8. * This file is released to public domain.
  9. */
  10. #include <stdint.h>
  11. #include <fix16.h>
  12. // You can change the input datatype and intermediate scaling here.
  13. // By default, the output is divided by the transform length to get a normalized FFT.
  14. // Input_convert determines the scaling of intermediate values. Multiplication by 256
  15. // gives a nice compromise between precision and numeric range.
  16. #ifndef INPUT_TYPE
  17. #define INPUT_TYPE uint8_t
  18. #endif
  19. #ifndef INPUT_CONVERT
  20. #define INPUT_CONVERT(x) ((x) << 8)
  21. #endif
  22. #ifndef INPUT_INDEX
  23. #define INPUT_INDEX(x) (x)
  24. #endif
  25. #ifndef OUTPUT_SCALE
  26. #define OUTPUT_SCALE(transform_size) (fix16_one * 256 / transform_size)
  27. #endif
  28. // Fast calculation of DFT for a 4-point signal. Based on the simplicity
  29. // of 4-point sinewave.
  30. static void four_point_dft(INPUT_TYPE *input, unsigned input_stride,
  31. fix16_t *real, fix16_t *imag)
  32. {
  33. fix16_t x0 = INPUT_CONVERT(input[0 * input_stride]);
  34. fix16_t x1 = INPUT_CONVERT(input[1 * input_stride]);
  35. fix16_t x2 = INPUT_CONVERT(input[2 * input_stride]);
  36. fix16_t x3 = INPUT_CONVERT(input[3 * input_stride]);
  37. real[0] = x0 + x1 + x2 + x3;
  38. imag[0] = 0;
  39. real[1] = x0 - x2;
  40. imag[1] = -x1 + x3;
  41. real[2] = x0 - x1 + x2 - x3;
  42. imag[2] = 0;
  43. real[3] = x0 - x2;
  44. imag[3] = x1 - x3;
  45. }
  46. // Mix N blocksize-sized transforms pairwise together to get N/2 2*blocksize-sized transforms.
  47. static void butterfly(fix16_t *real, fix16_t *imag, unsigned blocksize, unsigned blockpairs)
  48. {
  49. unsigned i, j;
  50. for (i = 0; i < blocksize; i++)
  51. {
  52. fix16_t angle = fix16_pi * i / blocksize;
  53. fix16_t c = fix16_cos(angle);
  54. fix16_t s = -fix16_sin(angle);
  55. fix16_t *rp = real + i;
  56. fix16_t *ip = imag + i;
  57. for (j = 0; j < blockpairs; j++)
  58. {
  59. // Get the odd-indexed tranform and multiply by sine
  60. fix16_t re = fix16_mul(rp[blocksize], c) - fix16_mul(ip[blocksize], s);
  61. fix16_t im = fix16_mul(ip[blocksize], c) + fix16_mul(rp[blocksize], s);
  62. // Update the transforms
  63. rp[blocksize] = rp[0] - re;
  64. ip[blocksize] = ip[0] - im;
  65. rp[0] += re;
  66. ip[0] += im;
  67. rp += blocksize * 2;
  68. ip += blocksize * 2;
  69. }
  70. }
  71. }
  72. // Reverse bits in a 32-bit number
  73. static uint32_t rbit_32(uint32_t x)
  74. {
  75. #if defined(__GNUC__) && defined(__ARM_ARCH_7M__)
  76. __asm__("rbit %0,%0" : "=r"(x) : "0"(x));
  77. return x;
  78. #else
  79. x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
  80. x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
  81. x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
  82. x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
  83. return((x >> 16) | (x << 16));
  84. #endif
  85. }
  86. // Reverse bits in an n-bit number.
  87. static uint32_t rbit_n(uint32_t x, unsigned n)
  88. {
  89. return rbit_32(x << (32 - n));
  90. }
  91. // Base-2 integer logarithm
  92. static int ilog2(unsigned x)
  93. {
  94. int result = -1;
  95. while (x)
  96. {
  97. x >>= 1;
  98. result++;
  99. }
  100. return result;
  101. }
  102. // Compute a transform of the real-valued input array, and store results in two arrays.
  103. // Size of each array is the same as transform_length.
  104. // Transform length must be a power of two and atleast 4.
  105. void fix16_fft(INPUT_TYPE *input, fix16_t *real, fix16_t *imag, unsigned transform_length)
  106. {
  107. int log_length = ilog2(transform_length);
  108. transform_length = 1 << log_length;
  109. unsigned i;
  110. for (i = 0; i < transform_length / 4; i++)
  111. {
  112. four_point_dft(input + INPUT_INDEX(rbit_n(i, log_length - 2)), transform_length / 4, real + 4*i, imag + 4*i);
  113. }
  114. for (i = 2; i < log_length; i++)
  115. {
  116. butterfly(real, imag, 1 << i, transform_length / (2 << i));
  117. }
  118. #ifdef OUTPUT_SCALE
  119. fix16_t scale = OUTPUT_SCALE(transform_length);
  120. for (i = 0; i < transform_length; i++)
  121. {
  122. real[i] = fix16_mul(real[i], scale);
  123. imag[i] = fix16_mul(imag[i], scale);
  124. }
  125. #endif
  126. }
  127. /* Just some test code
  128. #include <stdio.h>
  129. int main()
  130. {
  131. INPUT_TYPE input[16] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
  132. fix16_t real[16], imag[16];
  133. fix16_fft(input, real, imag, 16);
  134. int count = 16;
  135. int i;
  136. for (i = 0; i < count; i++)
  137. {
  138. printf("%d: %0.4f, %0.4f\n", i, fix16_to_float(real[i]), fix16_to_float(imag[i]));
  139. }
  140. return 0;
  141. }
  142. */