cpuid.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /*! \file CPUID.cpp
  2. Functions which probe the CPU to discover what SIMD functionality is present
  3. */
  4. /************************************************************************************************
  5. Copyright 2008 Gregory W Heckler
  6. This file is part of the GPS Software Defined Radio (GPS-SDR)
  7. The GPS-SDR is free software; you can redistribute it and/or modify it under the terms of the
  8. GNU General Public License as published by the Free Software Foundation; either version 2 of the
  9. License, or (at your option) any later version.
  10. The GPS-SDR is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
  11. even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. General Public License for more details.
  13. You should have received a copy of the GNU General Public License along with GPS-SDR; if not,
  14. write to the:
  15. Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. ************************************************************************************************/
  17. #include "includes.h"
  18. #include "defines.h"
  19. #include "simd.h"
  20. #include <cpuid.h>
  21. /* https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set */
  22. void cpuid(int info[4], int InfoType){
  23. __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
  24. }
  25. /* https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set */
  26. bool CPU_CHECK_SIMD()
  27. {
  28. // Misc.
  29. bool HW_MMX;
  30. bool HW_x64;
  31. bool HW_ABM; // Advanced Bit Manipulation
  32. bool HW_RDRAND;
  33. bool HW_BMI1;
  34. bool HW_BMI2;
  35. bool HW_ADX;
  36. bool HW_PREFETCHWT1;
  37. // SIMD: 128-bit
  38. bool HW_SSE;
  39. bool HW_SSE2;
  40. bool HW_SSE3;
  41. bool HW_SSSE3;
  42. bool HW_SSE41;
  43. bool HW_SSE42;
  44. bool HW_SSE4a;
  45. bool HW_AES;
  46. bool HW_SHA;
  47. // SIMD: 256-bit
  48. bool HW_AVX;
  49. bool HW_XOP;
  50. bool HW_FMA3;
  51. bool HW_FMA4;
  52. bool HW_AVX2;
  53. // SIMD: 512-bit
  54. bool HW_AVX512F; // AVX512 Foundation
  55. bool HW_AVX512CD; // AVX512 Conflict Detection
  56. bool HW_AVX512PF; // AVX512 Prefetch
  57. bool HW_AVX512ER; // AVX512 Exponential + Reciprocal
  58. bool HW_AVX512VL; // AVX512 Vector Length Extensions
  59. bool HW_AVX512BW; // AVX512 Byte + Word
  60. bool HW_AVX512DQ; // AVX512 Doubleword + Quadword
  61. bool HW_AVX512IFMA; // AVX512 Integer 52-bit Fused Multiply-Add
  62. bool HW_AVX512VBMI; // AVX512 Vector Byte Manipulation Instructions
  63. int info[4];
  64. cpuid(info, 0);
  65. int nIds = info[0];
  66. cpuid(info, 0x80000000);
  67. unsigned nExIds = info[0];
  68. // Detect Features
  69. if (nIds >= 0x00000001){
  70. cpuid(info,0x00000001);
  71. HW_MMX = (info[3] & ((int)1 << 23)) != 0;
  72. HW_SSE = (info[3] & ((int)1 << 25)) != 0;
  73. HW_SSE2 = (info[3] & ((int)1 << 26)) != 0;
  74. HW_SSE3 = (info[2] & ((int)1 << 0)) != 0;
  75. HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0;
  76. HW_SSE41 = (info[2] & ((int)1 << 19)) != 0;
  77. HW_SSE42 = (info[2] & ((int)1 << 20)) != 0;
  78. HW_AES = (info[2] & ((int)1 << 25)) != 0;
  79. HW_AVX = (info[2] & ((int)1 << 28)) != 0;
  80. HW_FMA3 = (info[2] & ((int)1 << 12)) != 0;
  81. HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
  82. }
  83. if (nIds >= 0x00000007){
  84. cpuid(info,0x00000007);
  85. HW_AVX2 = (info[1] & ((int)1 << 5)) != 0;
  86. HW_BMI1 = (info[1] & ((int)1 << 3)) != 0;
  87. HW_BMI2 = (info[1] & ((int)1 << 8)) != 0;
  88. HW_ADX = (info[1] & ((int)1 << 19)) != 0;
  89. HW_SHA = (info[1] & ((int)1 << 29)) != 0;
  90. HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0;
  91. HW_AVX512F = (info[1] & ((int)1 << 16)) != 0;
  92. HW_AVX512CD = (info[1] & ((int)1 << 28)) != 0;
  93. HW_AVX512PF = (info[1] & ((int)1 << 26)) != 0;
  94. HW_AVX512ER = (info[1] & ((int)1 << 27)) != 0;
  95. HW_AVX512VL = (info[1] & ((int)1 << 31)) != 0;
  96. HW_AVX512BW = (info[1] & ((int)1 << 30)) != 0;
  97. HW_AVX512DQ = (info[1] & ((int)1 << 17)) != 0;
  98. HW_AVX512IFMA = (info[1] & ((int)1 << 21)) != 0;
  99. HW_AVX512VBMI = (info[2] & ((int)1 << 1)) != 0;
  100. }
  101. if (nExIds >= 0x80000001){
  102. cpuid(info,0x80000001);
  103. HW_x64 = (info[3] & ((int)1 << 29)) != 0;
  104. HW_ABM = (info[2] & ((int)1 << 5)) != 0;
  105. HW_SSE4a = (info[2] & ((int)1 << 6)) != 0;
  106. HW_FMA4 = (info[2] & ((int)1 << 16)) != 0;
  107. HW_XOP = (info[2] & ((int)1 << 11)) != 0;
  108. }
  109. }
  110. bool CPU_IS_SIMD_OPTION_ENABLED(enum CPU_SIMD_OPTIONS cpu_option)
  111. {
  112. bool HW_MMX = false;
  113. bool HW_SSE = false;
  114. bool HW_SSE2 = false;
  115. bool HW_SSE3 = false;
  116. bool HW_SSSE3 = false;
  117. bool HW_SSE41 = false;
  118. bool HW_SSE42 = false;
  119. int info[4];
  120. cpuid(info, 0);
  121. int nIds = info[0];
  122. // Detect Features
  123. if (nIds >= 0x00000001){
  124. cpuid(info,0x00000001);
  125. HW_MMX = (info[3] & ((int)1 << 23)) != 0;
  126. HW_SSE = (info[3] & ((int)1 << 25)) != 0;
  127. HW_SSE2 = (info[3] & ((int)1 << 26)) != 0;
  128. HW_SSE3 = (info[2] & ((int)1 << 0)) != 0;
  129. HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0;
  130. HW_SSE41 = (info[2] & ((int)1 << 19)) != 0;
  131. HW_SSE42 = (info[2] & ((int)1 << 20)) != 0;
  132. }
  133. switch(cpu_option)
  134. {
  135. case CPU_MMX_EN:
  136. {
  137. return HW_MMX;
  138. break;
  139. }
  140. case CPU_SSE_EN:
  141. {
  142. return HW_SSE;
  143. break;
  144. }
  145. case CPU_SSE2_EN:
  146. {
  147. return HW_SSE2;
  148. break;
  149. }
  150. case CPU_SSE3_EN:
  151. {
  152. return HW_SSE3;
  153. break;
  154. }
  155. case CPU_SSSE3_EN:
  156. {
  157. return HW_SSSE3;
  158. break;
  159. }
  160. case CPU_SSE41_EN:
  161. {
  162. return HW_SSE41;
  163. break;
  164. }
  165. case CPU_SSE42_EN:
  166. {
  167. return HW_SSE42;
  168. break;
  169. }
  170. default:
  171. {
  172. return false;
  173. break;
  174. }
  175. }
  176. }
  177. // Default definitions of simd_xxx functions. In function Init_SIMD() they cen be changed if required.
  178. void (*simd_add)(int16 *A, int16 *B, int32 cnt) = &x86_add; //simd_add = &x86_add;
  179. void (*simd_sub)(int16 *A, int16 *B, int32 cnt) = &x86_sub; //simd_sub = &x86_sub;
  180. void (*simd_mul)(int16 *A, int16 *B, int32 cnt) = &x86_mul; //simd_mul = &x86_mul;
  181. int32 (*simd_dot)(int16 *A, int16 *B, int32 cnt) = &x86_dot; //simd_dot = &x86_dot;
  182. void (*simd_conj)(CPX *A, int32 cnt) = &x86_conj; //simd_conj = &x86_conj;
  183. void (*simd_cacc)(CPX *A, MIX *B, int32 cnt, int32 *iaccum, int32 *baccum) = &x86_cacc; //simd_cacc = &x86_cacc;
  184. void (*simd_cmul)(CPX *A, CPX *B, int32 cnt) = &x86_cmul; //simd_cmul = &x86_cmul;
  185. void (*simd_cmuls)(CPX *A, CPX *B, int32 cnt, int32 shift) = &x86_cmuls; //simd_cmuls = &x86_cmuls;
  186. void (*simd_cmulsc)(CPX *A, CPX *B, CPX *C, int32 cnt, int32 shift) = &x86_cmulsc; //simd_cmulsc = &x86_cmulsc;
  187. void (*simd_cmag)(CPX *A, int32 cnt) = &x86_cmag; //simd_cmag = &x86_cmag;
  188. void (*simd_prn_accum)(CPX *A, CPX *E, CPX *P, CPX *L, int32 cnt, CPX *accum) = &x86_prn_accum;
  189. void (*simd_prn_accum_new)(CPX *A, MIX *E, MIX *P, MIX *L, int32 cnt, CPX_ACCUM *accum) = &x86_prn_accum_new;
  190. void (*simd_max)(int32 *A, int32 *index, int32 *magt, int32 cnt) = &x86_max; //simd_max = &x86_max;
  191. void Init_SIMD()
  192. {
  193. if (CPU_IS_SIMD_OPTION_ENABLED(CPU_SSE3_EN))
  194. {
  195. void (*simd_add)(int16 *A, int16 *B, int32 cnt) = &sse_add; //simd_add = &sse_add;
  196. void (*simd_sub)(int16 *A, int16 *B, int32 cnt) = &sse_sub; //simd_sub = &sse_sub;
  197. void (*simd_mul)(int16 *A, int16 *B, int32 cnt) = &sse_mul; //simd_mul = &sse_mul;
  198. int32 (*simd_dot)(int16 *A, int16 *B, int32 cnt) = &sse_dot; //simd_dot = &sse_dot;
  199. void (*simd_conj)(CPX *A, int32 cnt) = &sse_conj; //simd_conj = &sse_conj;
  200. void (*simd_cacc)(CPX *A, MIX *B, int32 cnt, int32 *iaccum, int32 *baccum) = &sse_cacc; //simd_cacc = &sse_cacc;
  201. void (*simd_cmul)(CPX *A, CPX *B, int32 cnt) = &sse_cmul; //simd_cmul = &sse_cmul;
  202. void (*simd_cmuls)(CPX *A, CPX *B, int32 cnt, int32 shift) = &sse_cmuls; //simd_cmuls = &sse_cmuls;
  203. void (*simd_cmulsc)(CPX *A, CPX *B, CPX *C, int32 cnt, int32 shift) = &sse_cmulsc; //simd_cmulsc = &sse_cmulsc;
  204. }
  205. else
  206. {
  207. void (*simd_add)(int16 *A, int16 *B, int32 cnt) = &x86_add; //simd_add = &x86_add;
  208. void (*simd_sub)(int16 *A, int16 *B, int32 cnt) = &x86_sub; //simd_sub = &x86_sub;
  209. void (*simd_mul)(int16 *A, int16 *B, int32 cnt) = &x86_mul; //simd_mul = &x86_mul;
  210. int32 (*simd_dot)(int16 *A, int16 *B, int32 cnt) = &x86_dot; //simd_dot = &x86_dot;
  211. void (*simd_conj)(CPX *A, int32 cnt) = &x86_conj; //simd_conj = &x86_conj;
  212. void (*simd_cacc)(CPX *A, MIX *B, int32 cnt, int32 *iaccum, int32 *baccum) = &x86_cacc; //simd_cacc = &x86_cacc;
  213. void (*simd_cmul)(CPX *A, CPX *B, int32 cnt) = &x86_cmul; //simd_cmul = &x86_cmul;
  214. void (*simd_cmuls)(CPX *A, CPX *B, int32 cnt, int32 shift) = &x86_cmuls; //simd_cmuls = &x86_cmuls;
  215. void (*simd_cmulsc)(CPX *A, CPX *B, CPX *C, int32 cnt, int32 shift) = &x86_cmulsc; //simd_cmulsc = &x86_cmulsc;
  216. }
  217. void (*simd_cmag)(CPX *A, int32 cnt) = &x86_cmag; //simd_cmag = &x86_cmag;
  218. void (*simd_max)(int32 *A, int32 *index, int32 *magt, int32 cnt) = &x86_max; //simd_max = &x86_max;
  219. void (*simd_prn_accum_new)(CPX *A, MIX *E, MIX *P, MIX *L, int32 cnt, CPX_ACCUM *accum) = &x86_prn_accum_new;
  220. }