cpuid_arm.hpp
Go to the documentation of this file.
1 #include <nil/crypto3/utilities/cpuid/cpuid.hpp>
2 
3 #if defined(BOOST_ARCH_ARM)
4 
5 #if defined(CRYPTO3_TARGET_OS_HAS_GETAUXVAL)
6 #include <sys/auxv.h>
7 
8 #elif defined(CRYPTO3_TARGET_OS_IS_IOS)
9 #include <sys/types.h>
10 #include <sys/sysctl.h>
11 
12 #else
13 #include <nil/crypto3/utilities/os_utils.h>
14 
15 #endif
16 
17 #endif
18 
19 namespace nil {
20  namespace crypto3 {
21 
22 #if defined(BOOST_ARCH_ARM)
23 
24 #if defined(CRYPTO3_TARGET_OS_IS_IOS)
25 
26  namespace {
27 
28  uint64_t flags_by_ios_machine_type(const std::string &machine) {
29  /*
30  * This relies on a map of known machine names to features. This
31  * will quickly grow out of date as new products are introduced, but
32  * is apparently the best we can do for iOS.
33  */
34 
35  struct version_info {
36  std::string name;
37  size_t min_version_neon;
38  size_t min_version_armv8;
39  };
40 
41  static const version_info min_versions[] = {
42  {"iPhone", 2, 6},
43  {"iPad", 1, 4},
44  {"iPod", 4, 7},
45  {"AppleTV", 2, 5},
46  };
47 
48  if (machine.size() < 3)
49  return 0;
50 
51  auto comma = machine.find(',');
52 
53  // Simulator, or something we don't know about
54  if (comma == std::string::npos)
55  return 0;
56 
57  std::string product = machine.substr(0, comma);
58 
59  size_t version = 0;
60  size_t place = 1;
61  while (product.size() > 1 && ::isdigit(product.back())) {
62  const size_t digit = product.back() - '0';
63  version += digit * place;
64  place *= 10;
65  product.pop_back();
66  }
67 
68  if (version == 0)
69  return 0;
70 
71  for (const version_info &info : min_versions) {
72  if (info.name != product)
73  continue;
74 
75  if (version >= info.min_version_armv8) {
76  return cpuid::CPUID_ARM_RIJNDAEL_BIT | cpuid::CPUID_ARM_PMULL_BIT | cpuid::CPUID_ARM_SHA1_BIT |
77  cpuid::CPUID_ARM_SHA2_BIT | cpuid::CPUID_ARM_NEON_BIT;
78  }
79 
80  if (version >= info.min_version_neon)
81  return cpuid::CPUID_ARM_NEON_BIT;
82  }
83 
84  // Some other product we don't know about
85  return 0;
86  }
87 
88  } // namespace
89 
90 #endif
91 
92  uint64_t cpuid::detect_cpu_features(size_t *cache_line_size) {
93  uint64_t detected_features = 0;
94 
95 #if defined(CRYPTO3_TARGET_OS_HAS_GETAUXVAL)
96  /*
97  * On systems with getauxval these bits should normally be defined
98  * in bits/auxv.h but some buggy? glibc installs seem to miss them.
99  * These following values are all fixed, for the Linux ELF format,
100  * so we just hardcode them in ARM_hwcap_bit enum.
101  */
102 
103  enum ARM_hwcap_bit {
104 #if defined(CRYPTO3_TARGET_ARCHITECTURE_IS_ARM32)
105  NEON_bit = (1 << 12),
106  AES_bit = (1 << 0),
107  PMULL_bit = (1 << 1),
108  SHA1_bit = (1 << 2),
109  SHA2_bit = (1 << 3),
110 
111  ARCH_hwcap_neon = 16, // AT_HWCAP
112  ARCH_hwcap_crypto = 26, // AT_HWCAP2
113 #elif defined(CRYPTO3_TARGET_ARCHITECTURE_IS_ARM64)
114  NEON_bit = (1 << 1),
115  AES_bit = (1 << 3),
116  PMULL_bit = (1 << 4),
117  SHA1_bit = (1 << 5),
118  SHA2_bit = (1 << 6),
119 
120  ARCH_hwcap_neon = 16, // AT_HWCAP
121  ARCH_hwcap_crypto = 16, // AT_HWCAP
122 #endif
123  };
124 
125 #if defined(AT_DCACHEBSIZE)
126  const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
127 
128  // plausibility check
129  if (dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
130  *cache_line_size = static_cast<size_t>(dcache_line);
131 #endif
132 
133  const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
134  if (hwcap_neon & ARM_hwcap_bit::NEON_bit)
135  detected_features |= cpuid::CPUID_ARM_NEON_BIT;
136 
137  /*
138  On aarch64 this ends up calling getauxval twice with AT_HWCAP
139  It doesn't seem worth optimizing this out, since getauxval is
140  just reading a field in the ELF header.
141  */
142  const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
143  if (hwcap_crypto & ARM_hwcap_bit::AES_bit)
144  detected_features |= cpuid::CPUID_ARM_RIJNDAEL_BIT;
145  if (hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
146  detected_features |= cpuid::CPUID_ARM_PMULL_BIT;
147  if (hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
148  detected_features |= cpuid::CPUID_ARM_SHA1_BIT;
149  if (hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
150  detected_features |= cpuid::CPUID_ARM_SHA2_BIT;
151 
152 #elif defined(CRYPTO3_TARGET_OS_IS_IOS)
153 
154  char machine[64] = {0};
155  size_t size = sizeof(machine) - 1;
156  ::sysctlbyname("hw.machine", machine, &size, nullptr, 0);
157 
158  detected_features = flags_by_ios_machine_type(machine);
159 
160 #elif defined(CRYPTO3_USE_GCC_INLINE_ASM) && defined(CRYPTO3_TARGET_ARCHITECTURE_IS_ARM64)
161 
162  /*
163  No getauxval API available, fall back on probe functions. We only
164  bother with Aarch64 here to simplify the code and because going to
165  extreme contortions to support detect NEON on devices that probably
166  don't support it doesn't seem worthwhile.
167 
168  NEON registers v0-v7 are caller saved in Aarch64
169  */
170 
171  auto neon_probe = []() -> int {
172  asm("and v0.16b, v0.16b, v0.16b");
173  return 1;
174  };
175  auto aes_probe = []() -> int {
176  asm(".word 0x4e284800");
177  return 1;
178  };
179  auto pmull_probe = []() -> int {
180  asm(".word 0x0ee0e000");
181  return 1;
182  };
183  auto sha1_probe = []() -> int {
184  asm(".word 0x5e280800");
185  return 1;
186  };
187  auto sha2_probe = []() -> int {
188  asm(".word 0x5e282800");
189  return 1;
190  };
191 
192  // Only bother running the crypto3 detection if we found NEON
193 
194  if (run_cpu_instruction_probe(neon_probe) == 1) {
195  detected_features |= cpuid::CPUID_ARM_NEON_BIT;
196 
197  if (run_cpu_instruction_probe(aes_probe) == 1)
198  detected_features |= cpuid::CPUID_ARM_RIJNDAEL_BIT;
199  if (run_cpu_instruction_probe(pmull_probe) == 1)
200  detected_features |= cpuid::CPUID_ARM_PMULL_BIT;
201  if (run_cpu_instruction_probe(sha1_probe) == 1)
202  detected_features |= cpuid::CPUID_ARM_SHA1_BIT;
203  if (run_cpu_instruction_probe(sha2_probe) == 1)
204  detected_features |= cpuid::CPUID_ARM_SHA2_BIT;
205  }
206 
207 #endif
208 
209  return detected_features;
210  }
211 
212 #endif
213  } // namespace crypto3
214 } // namespace nil
static size_t cache_line_size()
Definition: cpuid.hpp:134
Definition: pair.hpp:31