Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CPU features detection
5 : * Author: Even Rouault, <even dot rouault at spatialys dot com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2016, Even Rouault <even dot rouault at spatialys dot com>
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #include "cpl_port.h"
30 : #include "cpl_string.h"
31 : #include "cpl_cpu_features.h"
32 :
33 : //! @cond Doxygen_Suppress
34 :
35 : #define CPUID_SSSE3_ECX_BIT 9
36 : #define CPUID_OSXSAVE_ECX_BIT 27
37 : #define CPUID_AVX_ECX_BIT 28
38 :
39 : #define CPUID_SSE_EDX_BIT 25
40 :
41 : #define BIT_XMM_STATE (1 << 1)
42 : #define BIT_YMM_STATE (2 << 1)
43 :
44 : #define REG_EAX 0
45 : #define REG_EBX 1
46 : #define REG_ECX 2
47 : #define REG_EDX 3
48 :
49 : #if defined(__GNUC__)
50 : #if defined(__x86_64)
51 : #define GCC_CPUID(level, a, b, c, d) \
52 : __asm__("xchgq %%rbx, %q1\n" \
53 : "cpuid\n" \
54 : "xchgq %%rbx, %q1" \
55 : : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \
56 : : "0"(level))
57 : #else
58 : #define GCC_CPUID(level, a, b, c, d) \
59 : __asm__("xchgl %%ebx, %1\n" \
60 : "cpuid\n" \
61 : "xchgl %%ebx, %1" \
62 : : "=a"(a), "=r"(b), "=c"(c), "=d"(d) \
63 : : "0"(level))
64 : #endif
65 :
66 : #define CPL_CPUID(level, array) \
67 : GCC_CPUID(level, array[0], array[1], array[2], array[3])
68 :
69 : #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
70 :
71 : #include <intrin.h>
72 : #define CPL_CPUID(level, array) __cpuid(array, level)
73 :
74 : #endif
75 :
76 : #if defined(HAVE_SSE_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSE)
77 :
78 : /************************************************************************/
79 : /* CPLHaveRuntimeSSE() */
80 : /************************************************************************/
81 :
82 : bool CPLHaveRuntimeSSE()
83 : {
84 : int cpuinfo[4] = {0, 0, 0, 0};
85 : CPL_CPUID(1, cpuinfo);
86 : return (cpuinfo[REG_EDX] & (1 << CPUID_SSE_EDX_BIT)) != 0;
87 : }
88 :
89 : #endif
90 :
91 : #if defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
92 :
93 : /************************************************************************/
94 : /* CPLHaveRuntimeSSSE3() */
95 : /************************************************************************/
96 :
97 238181 : static inline bool CPLDetectSSSE3()
98 : {
99 238181 : int cpuinfo[4] = {0, 0, 0, 0};
100 238181 : CPL_CPUID(1, cpuinfo);
101 238181 : return (cpuinfo[REG_ECX] & (1 << CPUID_SSSE3_ECX_BIT)) != 0;
102 : }
103 :
104 : #if defined(__GNUC__) && !defined(DEBUG)
105 : bool bCPLHasSSSE3 = false;
106 : static void CPLHaveRuntimeSSSE3Initialize() __attribute__((constructor));
107 :
108 : static void CPLHaveRuntimeSSSE3Initialize()
109 : {
110 : bCPLHasSSSE3 = CPLDetectSSSE3();
111 : }
112 : #else
113 238180 : bool CPLHaveRuntimeSSSE3()
114 : {
115 : #ifdef DEBUG
116 238180 : if (!CPLTestBool(CPLGetConfigOption("GDAL_USE_SSSE3", "YES")))
117 3 : return false;
118 : #endif
119 238180 : return CPLDetectSSSE3();
120 : }
121 : #endif
122 :
123 : #endif // defined(HAVE_SSSE3_AT_COMPILE_TIME) && !defined(HAVE_INLINE_SSSE3)
124 :
125 : #if defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(HAVE_INLINE_AVX)
126 :
127 : /************************************************************************/
128 : /* CPLHaveRuntimeAVX() */
129 : /************************************************************************/
130 :
131 : #if defined(__GNUC__)
132 :
133 1242 : static bool CPLDetectRuntimeAVX()
134 : {
135 1242 : int cpuinfo[4] = {0, 0, 0, 0};
136 1242 : CPL_CPUID(1, cpuinfo);
137 :
138 : // Check OSXSAVE feature.
139 1242 : if ((cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0)
140 : {
141 0 : return false;
142 : }
143 :
144 : // Check AVX feature.
145 1242 : if ((cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0)
146 : {
147 0 : return false;
148 : }
149 :
150 : // Issue XGETBV and check the XMM and YMM state bit.
151 : unsigned int nXCRLow;
152 : unsigned int nXCRHigh;
153 1242 : __asm__("xgetbv" : "=a"(nXCRLow), "=d"(nXCRHigh) : "c"(0));
154 1242 : if ((nXCRLow & (BIT_XMM_STATE | BIT_YMM_STATE)) !=
155 : (BIT_XMM_STATE | BIT_YMM_STATE))
156 : {
157 0 : return false;
158 : }
159 1242 : CPL_IGNORE_RET_VAL(nXCRHigh); // unused
160 :
161 1242 : return true;
162 : }
163 :
164 : bool bCPLHasAVX = false;
165 : static void CPLHaveRuntimeAVXInitialize() __attribute__((constructor));
166 :
167 1242 : static void CPLHaveRuntimeAVXInitialize()
168 : {
169 1242 : bCPLHasAVX = CPLDetectRuntimeAVX();
170 1242 : }
171 :
172 : #elif defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
173 : (defined(_M_IX86) || defined(_M_X64))
174 : // _xgetbv available only in Visual Studio 2010 SP1 or later
175 :
176 : bool CPLHaveRuntimeAVX()
177 : {
178 : int cpuinfo[4] = {0, 0, 0, 0};
179 : CPL_CPUID(1, cpuinfo);
180 :
181 : // Check OSXSAVE feature.
182 : if ((cpuinfo[REG_ECX] & (1 << CPUID_OSXSAVE_ECX_BIT)) == 0)
183 : {
184 : return false;
185 : }
186 :
187 : // Check AVX feature.
188 : if ((cpuinfo[REG_ECX] & (1 << CPUID_AVX_ECX_BIT)) == 0)
189 : {
190 : return false;
191 : }
192 :
193 : // Issue XGETBV and check the XMM and YMM state bit.
194 : unsigned __int64 xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
195 : if ((xcrFeatureMask & (BIT_XMM_STATE | BIT_YMM_STATE)) !=
196 : (BIT_XMM_STATE | BIT_YMM_STATE))
197 : {
198 : return false;
199 : }
200 :
201 : return true;
202 : }
203 :
204 : #else
205 :
206 : bool CPLHaveRuntimeAVX()
207 : {
208 : return false;
209 : }
210 :
211 : #endif
212 :
213 : #endif // defined(HAVE_AVX_AT_COMPILE_TIME) && !defined(CPLHaveRuntimeAVX)
214 :
215 : //! @endcond
|