rijndael_armv8_impl.hpp
Go to the documentation of this file.
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2018-2020 Mikhail Komarov <nemo@nil.foundation>
3 //
4 // MIT License
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 // of this software and associated documentation files (the "Software"), to deal
8 // in the Software without restriction, including without limitation the rights
9 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 // copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in all
14 // copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 // SOFTWARE.
23 //---------------------------------------------------------------------------//
24 
25 #ifndef CRYPTO3_RIJNDAEL_ARMV8_IMPL_HPP
26 #define CRYPTO3_RIJNDAEL_ARMV8_IMPL_HPP
27 
28 #include <nil/crypto3/block/detail/rijndael_impl.hpp>
29 
30 #include <cstddef>
31 #include <arm_neon.h>
32 
33 namespace nil {
34  namespace crypto3 {
35  namespace block {
36  namespace detail {
37 #define AES_ENC_4_ROUNDS(K) \
38  do { \
39  B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \
40  B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \
41  B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \
42  B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \
43  } while (0)
44 
45 #define AES_ENC_4_LAST_ROUNDS(K, K2) \
46  do { \
47  B0 = veorq_u8(vaeseq_u8(B0, K), K2); \
48  B1 = veorq_u8(vaeseq_u8(B1, K), K2); \
49  B2 = veorq_u8(vaeseq_u8(B2, K), K2); \
50  B3 = veorq_u8(vaeseq_u8(B3, K), K2); \
51  } while (0)
52 
53 #define AES_DEC_4_ROUNDS(K) \
54  do { \
55  B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \
56  B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \
57  B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \
58  B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \
59  } while (0)
60 
61 #define AES_DEC_4_LAST_ROUNDS(K, K2) \
62  do { \
63  B0 = veorq_u8(vaesdq_u8(B0, K), K2); \
64  B1 = veorq_u8(vaesdq_u8(B1, K), K2); \
65  B2 = veorq_u8(vaesdq_u8(B2, K), K2); \
66  B3 = veorq_u8(vaesdq_u8(B3, K), K2); \
67  } while (0)
68 
69  template<std::size_t KeyBitsImpl, std::size_t BlockBitsImpl, typename PolicyType>
71  static_assert(BlockBitsImpl != 128, "Wrong block size!");
72  };
73 
74  template<std::size_t KeyBitsImpl, typename PolicyType>
75  class basic_armv8_rijndael_impl<KeyBitsImpl, 128, PolicyType>;
76 
77  template<std::size_t KeyBitsImpl, std::size_t BlockBitsImpl, typename PolicyType>
78  class rijndael_armv8_impl : public basic_armv8_rijndael_impl<KeyBitsImpl, BlockBitsImpl, PolicyType> {
79  static_assert(BlockBitsImpl != 128, "Wrong block size!");
80  };
81 
82  template<std::size_t KeyBitsImpl, typename PolicyType>
83  class rijndael_armv8_impl<KeyBitsImpl, 128, PolicyType>;
84 
85  template<std::size_t KeyBitsImpl, typename PolicyType>
86  class basic_armv8_rijndael_impl<KeyBitsImpl, 128, PolicyType> {
87  protected:
88  typedef PolicyType policy_type;
89  typedef typename policy_type::block_type block_type;
90  typedef typename policy_type::key_type key_type;
91  typedef typename policy_type::key_schedule_type key_schedule_type;
92 
93  public:
94  static inline void schedule_key(const key_type &key,
95  key_schedule_type encryption_key,
96  key_schedule_type &decryption_key) {
97  rijndael_impl<KeyBitsImpl, 128>::schedule_key(key, encryption_key, decryption_key);
98 
99  for (typename basic_type::key_schedule_type::value_type &c : encryption_key) {
100  c = reverse_bytes(c);
101  }
102  for (typename basic_type::key_schedule_type::value_type &c : decryption_key) {
103  c = reverse_bytes(c);
104  }
105  }
106  };
107  } // namespace detail
108 
109  template<typename PolicyType>
110  class rijndael_armv8_impl<128, 128, PolicyType> : public basic_armv8_rijndael_impl<128, 128, PolicyType> {
111  public:
112  static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key) {
113  typename basic_type::block_type out = {0};
114 
115  const uint8_t *skey = reinterpret_cast<const uint8_t *>(encryption_key.data());
116  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_ME.data());
117 
118  const uint8x16_t K0 = vld1q_u8(skey + 0);
119  const uint8x16_t K1 = vld1q_u8(skey + 16);
120  const uint8x16_t K2 = vld1q_u8(skey + 32);
121  const uint8x16_t K3 = vld1q_u8(skey + 48);
122  const uint8x16_t K4 = vld1q_u8(skey + 64);
123  const uint8x16_t K5 = vld1q_u8(skey + 80);
124  const uint8x16_t K6 = vld1q_u8(skey + 96);
125  const uint8x16_t K7 = vld1q_u8(skey + 112);
126  const uint8x16_t K8 = vld1q_u8(skey + 128);
127  const uint8x16_t K9 = vld1q_u8(skey + 144);
128  const uint8x16_t K10 = vld1q_u8(mkey);
129 
130  uint8x16_t B = vld1q_u8(plaintext.data());
131  B = vaesmcq_u8(vaeseq_u8(B, K0));
132  B = vaesmcq_u8(vaeseq_u8(B, K1));
133  B = vaesmcq_u8(vaeseq_u8(B, K2));
134  B = vaesmcq_u8(vaeseq_u8(B, K3));
135  B = vaesmcq_u8(vaeseq_u8(B, K4));
136  B = vaesmcq_u8(vaeseq_u8(B, K5));
137  B = vaesmcq_u8(vaeseq_u8(B, K6));
138  B = vaesmcq_u8(vaeseq_u8(B, K7));
139  B = vaesmcq_u8(vaeseq_u8(B, K8));
140  B = veorq_u8(vaeseq_u8(B, K9), K10);
141  vst1q_u8(out.data(), B);
142 
143  return out;
144  }
145 
146  static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key) {
147  block_type out = {0};
148 
149  const uint8_t *skey = reinterpret_cast<const uint8_t *>(decryption_key.data());
150  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_MD.data());
151 
152  const uint8x16_t K0 = vld1q_u8(skey + 0);
153  const uint8x16_t K1 = vld1q_u8(skey + 16);
154  const uint8x16_t K2 = vld1q_u8(skey + 32);
155  const uint8x16_t K3 = vld1q_u8(skey + 48);
156  const uint8x16_t K4 = vld1q_u8(skey + 64);
157  const uint8x16_t K5 = vld1q_u8(skey + 80);
158  const uint8x16_t K6 = vld1q_u8(skey + 96);
159  const uint8x16_t K7 = vld1q_u8(skey + 112);
160  const uint8x16_t K8 = vld1q_u8(skey + 128);
161  const uint8x16_t K9 = vld1q_u8(skey + 144);
162  const uint8x16_t K10 = vld1q_u8(mkey);
163 
164  uint8x16_t B = vld1q_u8(plaintext.data());
165  B = vaesimcq_u8(vaesdq_u8(B, K0));
166  B = vaesimcq_u8(vaesdq_u8(B, K1));
167  B = vaesimcq_u8(vaesdq_u8(B, K2));
168  B = vaesimcq_u8(vaesdq_u8(B, K3));
169  B = vaesimcq_u8(vaesdq_u8(B, K4));
170  B = vaesimcq_u8(vaesdq_u8(B, K5));
171  B = vaesimcq_u8(vaesdq_u8(B, K6));
172  B = vaesimcq_u8(vaesdq_u8(B, K7));
173  B = vaesimcq_u8(vaesdq_u8(B, K8));
174  B = veorq_u8(vaesdq_u8(B, K9), K10);
175 
176  vst1q_u8(out.data(), B);
177 
178  return out;
179  }
180  };
181 
182  template<typename PolicyType>
183  class rijndael_armv8_impl<192, 128, PolicyType> : public basic_armv8_rijndael_impl<192, 128, PolicyType> {
184  public:
185  static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key) {
186  block_type out = {0};
187 
188  const uint8_t *skey = reinterpret_cast<const uint8_t *>(encryption_key.data());
189  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_ME.data());
190 
191  const uint8x16_t K0 = vld1q_u8(skey + 0);
192  const uint8x16_t K1 = vld1q_u8(skey + 16);
193  const uint8x16_t K2 = vld1q_u8(skey + 32);
194  const uint8x16_t K3 = vld1q_u8(skey + 48);
195  const uint8x16_t K4 = vld1q_u8(skey + 64);
196  const uint8x16_t K5 = vld1q_u8(skey + 80);
197  const uint8x16_t K6 = vld1q_u8(skey + 96);
198  const uint8x16_t K7 = vld1q_u8(skey + 112);
199  const uint8x16_t K8 = vld1q_u8(skey + 128);
200  const uint8x16_t K9 = vld1q_u8(skey + 144);
201  const uint8x16_t K10 = vld1q_u8(skey + 160);
202  const uint8x16_t K11 = vld1q_u8(skey + 176);
203  const uint8x16_t K12 = vld1q_u8(mkey);
204 
205  uint8x16_t B = vld1q_u8(plaintext.data());
206  B = vaesmcq_u8(vaeseq_u8(B, K0));
207  B = vaesmcq_u8(vaeseq_u8(B, K1));
208  B = vaesmcq_u8(vaeseq_u8(B, K2));
209  B = vaesmcq_u8(vaeseq_u8(B, K3));
210  B = vaesmcq_u8(vaeseq_u8(B, K4));
211  B = vaesmcq_u8(vaeseq_u8(B, K5));
212  B = vaesmcq_u8(vaeseq_u8(B, K6));
213  B = vaesmcq_u8(vaeseq_u8(B, K7));
214  B = vaesmcq_u8(vaeseq_u8(B, K8));
215  B = vaesmcq_u8(vaeseq_u8(B, K9));
216  B = vaesmcq_u8(vaeseq_u8(B, K10));
217  B = veorq_u8(vaeseq_u8(B, K11), K12);
218  vst1q_u8(out.data(), B);
219 
220  return out;
221  }
222 
223  static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key) {
224  block_type out = {0};
225  const uint8_t *skey = reinterpret_cast<const uint8_t *>(decryption_key.data());
226  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_MD.data());
227 
228  const uint8x16_t K0 = vld1q_u8(skey + 0);
229  const uint8x16_t K1 = vld1q_u8(skey + 16);
230  const uint8x16_t K2 = vld1q_u8(skey + 32);
231  const uint8x16_t K3 = vld1q_u8(skey + 48);
232  const uint8x16_t K4 = vld1q_u8(skey + 64);
233  const uint8x16_t K5 = vld1q_u8(skey + 80);
234  const uint8x16_t K6 = vld1q_u8(skey + 96);
235  const uint8x16_t K7 = vld1q_u8(skey + 112);
236  const uint8x16_t K8 = vld1q_u8(skey + 128);
237  const uint8x16_t K9 = vld1q_u8(skey + 144);
238  const uint8x16_t K10 = vld1q_u8(skey + 160);
239  const uint8x16_t K11 = vld1q_u8(skey + 176);
240  const uint8x16_t K12 = vld1q_u8(mkey);
241 
242  uint8x16_t B = vld1q_u8(plaintext.data());
243  B = vaesimcq_u8(vaesdq_u8(B, K0));
244  B = vaesimcq_u8(vaesdq_u8(B, K1));
245  B = vaesimcq_u8(vaesdq_u8(B, K2));
246  B = vaesimcq_u8(vaesdq_u8(B, K3));
247  B = vaesimcq_u8(vaesdq_u8(B, K4));
248  B = vaesimcq_u8(vaesdq_u8(B, K5));
249  B = vaesimcq_u8(vaesdq_u8(B, K6));
250  B = vaesimcq_u8(vaesdq_u8(B, K7));
251  B = vaesimcq_u8(vaesdq_u8(B, K8));
252  B = vaesimcq_u8(vaesdq_u8(B, K9));
253  B = vaesimcq_u8(vaesdq_u8(B, K10));
254  B = veorq_u8(vaesdq_u8(B, K11), K12);
255  vst1q_u8(out.data(), B);
256 
257  return out;
258  }
259  };
260 
261  template<typename PolicyType>
262  class rijndael_armv8_impl<256, 128, PolicyType> : public basic_armv8_rijndael_impl<256, 128, PolicyType> {
263  public:
264  static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key) {
265  block_type out = {0};
266  const uint8_t *skey = reinterpret_cast<const uint8_t *>(encryption_key.data());
267  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_ME.data());
268 
269  const uint8x16_t K0 = vld1q_u8(skey + 0);
270  const uint8x16_t K1 = vld1q_u8(skey + 16);
271  const uint8x16_t K2 = vld1q_u8(skey + 32);
272  const uint8x16_t K3 = vld1q_u8(skey + 48);
273  const uint8x16_t K4 = vld1q_u8(skey + 64);
274  const uint8x16_t K5 = vld1q_u8(skey + 80);
275  const uint8x16_t K6 = vld1q_u8(skey + 96);
276  const uint8x16_t K7 = vld1q_u8(skey + 112);
277  const uint8x16_t K8 = vld1q_u8(skey + 128);
278  const uint8x16_t K9 = vld1q_u8(skey + 144);
279  const uint8x16_t K10 = vld1q_u8(skey + 160);
280  const uint8x16_t K11 = vld1q_u8(skey + 176);
281  const uint8x16_t K12 = vld1q_u8(skey + 192);
282  const uint8x16_t K13 = vld1q_u8(skey + 208);
283  const uint8x16_t K14 = vld1q_u8(mkey);
284 
285  uint8x16_t B = vld1q_u8(plaintext.data());
286  B = vaesmcq_u8(vaeseq_u8(B, K0));
287  B = vaesmcq_u8(vaeseq_u8(B, K1));
288  B = vaesmcq_u8(vaeseq_u8(B, K2));
289  B = vaesmcq_u8(vaeseq_u8(B, K3));
290  B = vaesmcq_u8(vaeseq_u8(B, K4));
291  B = vaesmcq_u8(vaeseq_u8(B, K5));
292  B = vaesmcq_u8(vaeseq_u8(B, K6));
293  B = vaesmcq_u8(vaeseq_u8(B, K7));
294  B = vaesmcq_u8(vaeseq_u8(B, K8));
295  B = vaesmcq_u8(vaeseq_u8(B, K9));
296  B = vaesmcq_u8(vaeseq_u8(B, K10));
297  B = vaesmcq_u8(vaeseq_u8(B, K11));
298  B = vaesmcq_u8(vaeseq_u8(B, K12));
299  B = veorq_u8(vaeseq_u8(B, K13), K14);
300  vst1q_u8(out.data(), B);
301 
302  return out;
303  }
304 
305  static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key) {
306  const uint8_t *skey = reinterpret_cast<const uint8_t *>(decryption_key.data());
307  const uint8_t *mkey = reinterpret_cast<const uint8_t *>(m_MD.data());
308 
309  const uint8x16_t K0 = vld1q_u8(skey + 0);
310  const uint8x16_t K1 = vld1q_u8(skey + 16);
311  const uint8x16_t K2 = vld1q_u8(skey + 32);
312  const uint8x16_t K3 = vld1q_u8(skey + 48);
313  const uint8x16_t K4 = vld1q_u8(skey + 64);
314  const uint8x16_t K5 = vld1q_u8(skey + 80);
315  const uint8x16_t K6 = vld1q_u8(skey + 96);
316  const uint8x16_t K7 = vld1q_u8(skey + 112);
317  const uint8x16_t K8 = vld1q_u8(skey + 128);
318  const uint8x16_t K9 = vld1q_u8(skey + 144);
319  const uint8x16_t K10 = vld1q_u8(skey + 160);
320  const uint8x16_t K11 = vld1q_u8(skey + 176);
321  const uint8x16_t K12 = vld1q_u8(skey + 192);
322  const uint8x16_t K13 = vld1q_u8(skey + 208);
323  const uint8x16_t K14 = vld1q_u8(mkey);
324 
325  uint8x16_t B = vld1q_u8(plaintext.data());
326  B = vaesimcq_u8(vaesdq_u8(B, K0));
327  B = vaesimcq_u8(vaesdq_u8(B, K1));
328  B = vaesimcq_u8(vaesdq_u8(B, K2));
329  B = vaesimcq_u8(vaesdq_u8(B, K3));
330  B = vaesimcq_u8(vaesdq_u8(B, K4));
331  B = vaesimcq_u8(vaesdq_u8(B, K5));
332  B = vaesimcq_u8(vaesdq_u8(B, K6));
333  B = vaesimcq_u8(vaesdq_u8(B, K7));
334  B = vaesimcq_u8(vaesdq_u8(B, K8));
335  B = vaesimcq_u8(vaesdq_u8(B, K9));
336  B = vaesimcq_u8(vaesdq_u8(B, K10));
337  B = vaesimcq_u8(vaesdq_u8(B, K11));
338  B = vaesimcq_u8(vaesdq_u8(B, K12));
339  B = veorq_u8(vaesdq_u8(B, K13), K14);
340  vst1q_u8(out.data(), B);
341 
342  return out;
343  }
344  };
345  } // namespace block
346  } // namespace crypto3
347 } // namespace nil
348 
349 #endif // CRYPTO3_RIJNDAEL_ARMV8_IMPL_HPP
policy_type::block_type block_type
Definition: rijndael_armv8_impl.hpp:89
policy_type::key_type key_type
Definition: rijndael_armv8_impl.hpp:90
policy_type::key_schedule_type key_schedule_type
Definition: rijndael_armv8_impl.hpp:91
static void schedule_key(const key_type &key, key_schedule_type encryption_key, key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:94
Definition: rijndael_armv8_impl.hpp:70
Definition: rijndael_armv8_impl.hpp:78
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:146
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:112
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:185
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:223
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:264
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:305
boost::mpl::apply< AccumulatorSet, tag::block< Mode > >::type::result_type block(const AccumulatorSet &acc)
Definition: accumulators/block.hpp:259
Definition: pair.hpp:31