25 #ifndef CRYPTO3_RIJNDAEL_ARMV8_IMPL_HPP
26 #define CRYPTO3_RIJNDAEL_ARMV8_IMPL_HPP
28 #include <nil/crypto3/block/detail/rijndael_impl.hpp>
37 #define AES_ENC_4_ROUNDS(K) \
39 B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \
40 B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \
41 B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \
42 B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \
45 #define AES_ENC_4_LAST_ROUNDS(K, K2) \
47 B0 = veorq_u8(vaeseq_u8(B0, K), K2); \
48 B1 = veorq_u8(vaeseq_u8(B1, K), K2); \
49 B2 = veorq_u8(vaeseq_u8(B2, K), K2); \
50 B3 = veorq_u8(vaeseq_u8(B3, K), K2); \
53 #define AES_DEC_4_ROUNDS(K) \
55 B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \
56 B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \
57 B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \
58 B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \
61 #define AES_DEC_4_LAST_ROUNDS(K, K2) \
63 B0 = veorq_u8(vaesdq_u8(B0, K), K2); \
64 B1 = veorq_u8(vaesdq_u8(B1, K), K2); \
65 B2 = veorq_u8(vaesdq_u8(B2, K), K2); \
66 B3 = veorq_u8(vaesdq_u8(B3, K), K2); \
69 template<std::
size_t KeyBitsImpl, std::
size_t BlockBitsImpl,
typename PolicyType>
71 static_assert(BlockBitsImpl != 128,
"Wrong block size!");
74 template<std::
size_t KeyBitsImpl,
typename PolicyType>
77 template<std::
size_t KeyBitsImpl, std::
size_t BlockBitsImpl,
typename PolicyType>
79 static_assert(BlockBitsImpl != 128,
"Wrong block size!");
82 template<std::
size_t KeyBitsImpl,
typename PolicyType>
85 template<std::
size_t KeyBitsImpl,
typename PolicyType>
90 typedef typename policy_type::key_type
key_type;
97 rijndael_impl<KeyBitsImpl, 128>::schedule_key(key, encryption_key, decryption_key);
99 for (
typename basic_type::key_schedule_type::value_type &c : encryption_key) {
100 c = reverse_bytes(c);
102 for (
typename basic_type::key_schedule_type::value_type &c : decryption_key) {
103 c = reverse_bytes(c);
109 template<
typename PolicyType>
110 class rijndael_armv8_impl<128, 128, PolicyType> :
public basic_armv8_rijndael_impl<128, 128, PolicyType> {
112 static block_type
encrypt_block(
const block_type &plaintext,
const key_schedule_type &encryption_key) {
113 typename basic_type::block_type out = {0};
115 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(encryption_key.data());
116 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_ME.data());
118 const uint8x16_t K0 = vld1q_u8(skey + 0);
119 const uint8x16_t K1 = vld1q_u8(skey + 16);
120 const uint8x16_t K2 = vld1q_u8(skey + 32);
121 const uint8x16_t K3 = vld1q_u8(skey + 48);
122 const uint8x16_t K4 = vld1q_u8(skey + 64);
123 const uint8x16_t K5 = vld1q_u8(skey + 80);
124 const uint8x16_t K6 = vld1q_u8(skey + 96);
125 const uint8x16_t K7 = vld1q_u8(skey + 112);
126 const uint8x16_t K8 = vld1q_u8(skey + 128);
127 const uint8x16_t K9 = vld1q_u8(skey + 144);
128 const uint8x16_t K10 = vld1q_u8(mkey);
130 uint8x16_t B = vld1q_u8(plaintext.data());
131 B = vaesmcq_u8(vaeseq_u8(B, K0));
132 B = vaesmcq_u8(vaeseq_u8(B, K1));
133 B = vaesmcq_u8(vaeseq_u8(B, K2));
134 B = vaesmcq_u8(vaeseq_u8(B, K3));
135 B = vaesmcq_u8(vaeseq_u8(B, K4));
136 B = vaesmcq_u8(vaeseq_u8(B, K5));
137 B = vaesmcq_u8(vaeseq_u8(B, K6));
138 B = vaesmcq_u8(vaeseq_u8(B, K7));
139 B = vaesmcq_u8(vaeseq_u8(B, K8));
140 B = veorq_u8(vaeseq_u8(B, K9), K10);
141 vst1q_u8(out.data(), B);
146 static block_type
decrypt_block(
const block_type &plaintext,
const key_schedule_type &decryption_key) {
147 block_type out = {0};
149 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(decryption_key.data());
150 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_MD.data());
152 const uint8x16_t K0 = vld1q_u8(skey + 0);
153 const uint8x16_t K1 = vld1q_u8(skey + 16);
154 const uint8x16_t K2 = vld1q_u8(skey + 32);
155 const uint8x16_t K3 = vld1q_u8(skey + 48);
156 const uint8x16_t K4 = vld1q_u8(skey + 64);
157 const uint8x16_t K5 = vld1q_u8(skey + 80);
158 const uint8x16_t K6 = vld1q_u8(skey + 96);
159 const uint8x16_t K7 = vld1q_u8(skey + 112);
160 const uint8x16_t K8 = vld1q_u8(skey + 128);
161 const uint8x16_t K9 = vld1q_u8(skey + 144);
162 const uint8x16_t K10 = vld1q_u8(mkey);
164 uint8x16_t B = vld1q_u8(plaintext.data());
165 B = vaesimcq_u8(vaesdq_u8(B, K0));
166 B = vaesimcq_u8(vaesdq_u8(B, K1));
167 B = vaesimcq_u8(vaesdq_u8(B, K2));
168 B = vaesimcq_u8(vaesdq_u8(B, K3));
169 B = vaesimcq_u8(vaesdq_u8(B, K4));
170 B = vaesimcq_u8(vaesdq_u8(B, K5));
171 B = vaesimcq_u8(vaesdq_u8(B, K6));
172 B = vaesimcq_u8(vaesdq_u8(B, K7));
173 B = vaesimcq_u8(vaesdq_u8(B, K8));
174 B = veorq_u8(vaesdq_u8(B, K9), K10);
176 vst1q_u8(out.data(), B);
182 template<
typename PolicyType>
183 class rijndael_armv8_impl<192, 128, PolicyType> :
public basic_armv8_rijndael_impl<192, 128, PolicyType> {
185 static block_type
encrypt_block(
const block_type &plaintext,
const key_schedule_type &encryption_key) {
186 block_type out = {0};
188 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(encryption_key.data());
189 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_ME.data());
191 const uint8x16_t K0 = vld1q_u8(skey + 0);
192 const uint8x16_t K1 = vld1q_u8(skey + 16);
193 const uint8x16_t K2 = vld1q_u8(skey + 32);
194 const uint8x16_t K3 = vld1q_u8(skey + 48);
195 const uint8x16_t K4 = vld1q_u8(skey + 64);
196 const uint8x16_t K5 = vld1q_u8(skey + 80);
197 const uint8x16_t K6 = vld1q_u8(skey + 96);
198 const uint8x16_t K7 = vld1q_u8(skey + 112);
199 const uint8x16_t K8 = vld1q_u8(skey + 128);
200 const uint8x16_t K9 = vld1q_u8(skey + 144);
201 const uint8x16_t K10 = vld1q_u8(skey + 160);
202 const uint8x16_t K11 = vld1q_u8(skey + 176);
203 const uint8x16_t K12 = vld1q_u8(mkey);
205 uint8x16_t B = vld1q_u8(plaintext.data());
206 B = vaesmcq_u8(vaeseq_u8(B, K0));
207 B = vaesmcq_u8(vaeseq_u8(B, K1));
208 B = vaesmcq_u8(vaeseq_u8(B, K2));
209 B = vaesmcq_u8(vaeseq_u8(B, K3));
210 B = vaesmcq_u8(vaeseq_u8(B, K4));
211 B = vaesmcq_u8(vaeseq_u8(B, K5));
212 B = vaesmcq_u8(vaeseq_u8(B, K6));
213 B = vaesmcq_u8(vaeseq_u8(B, K7));
214 B = vaesmcq_u8(vaeseq_u8(B, K8));
215 B = vaesmcq_u8(vaeseq_u8(B, K9));
216 B = vaesmcq_u8(vaeseq_u8(B, K10));
217 B = veorq_u8(vaeseq_u8(B, K11), K12);
218 vst1q_u8(out.data(), B);
223 static block_type
decrypt_block(
const block_type &plaintext,
const key_schedule_type &decryption_key) {
224 block_type out = {0};
225 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(decryption_key.data());
226 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_MD.data());
228 const uint8x16_t K0 = vld1q_u8(skey + 0);
229 const uint8x16_t K1 = vld1q_u8(skey + 16);
230 const uint8x16_t K2 = vld1q_u8(skey + 32);
231 const uint8x16_t K3 = vld1q_u8(skey + 48);
232 const uint8x16_t K4 = vld1q_u8(skey + 64);
233 const uint8x16_t K5 = vld1q_u8(skey + 80);
234 const uint8x16_t K6 = vld1q_u8(skey + 96);
235 const uint8x16_t K7 = vld1q_u8(skey + 112);
236 const uint8x16_t K8 = vld1q_u8(skey + 128);
237 const uint8x16_t K9 = vld1q_u8(skey + 144);
238 const uint8x16_t K10 = vld1q_u8(skey + 160);
239 const uint8x16_t K11 = vld1q_u8(skey + 176);
240 const uint8x16_t K12 = vld1q_u8(mkey);
242 uint8x16_t B = vld1q_u8(plaintext.data());
243 B = vaesimcq_u8(vaesdq_u8(B, K0));
244 B = vaesimcq_u8(vaesdq_u8(B, K1));
245 B = vaesimcq_u8(vaesdq_u8(B, K2));
246 B = vaesimcq_u8(vaesdq_u8(B, K3));
247 B = vaesimcq_u8(vaesdq_u8(B, K4));
248 B = vaesimcq_u8(vaesdq_u8(B, K5));
249 B = vaesimcq_u8(vaesdq_u8(B, K6));
250 B = vaesimcq_u8(vaesdq_u8(B, K7));
251 B = vaesimcq_u8(vaesdq_u8(B, K8));
252 B = vaesimcq_u8(vaesdq_u8(B, K9));
253 B = vaesimcq_u8(vaesdq_u8(B, K10));
254 B = veorq_u8(vaesdq_u8(B, K11), K12);
255 vst1q_u8(out.data(), B);
261 template<
typename PolicyType>
262 class rijndael_armv8_impl<256, 128, PolicyType> :
public basic_armv8_rijndael_impl<256, 128, PolicyType> {
264 static block_type
encrypt_block(
const block_type &plaintext,
const key_schedule_type &encryption_key) {
265 block_type out = {0};
266 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(encryption_key.data());
267 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_ME.data());
269 const uint8x16_t K0 = vld1q_u8(skey + 0);
270 const uint8x16_t K1 = vld1q_u8(skey + 16);
271 const uint8x16_t K2 = vld1q_u8(skey + 32);
272 const uint8x16_t K3 = vld1q_u8(skey + 48);
273 const uint8x16_t K4 = vld1q_u8(skey + 64);
274 const uint8x16_t K5 = vld1q_u8(skey + 80);
275 const uint8x16_t K6 = vld1q_u8(skey + 96);
276 const uint8x16_t K7 = vld1q_u8(skey + 112);
277 const uint8x16_t K8 = vld1q_u8(skey + 128);
278 const uint8x16_t K9 = vld1q_u8(skey + 144);
279 const uint8x16_t K10 = vld1q_u8(skey + 160);
280 const uint8x16_t K11 = vld1q_u8(skey + 176);
281 const uint8x16_t K12 = vld1q_u8(skey + 192);
282 const uint8x16_t K13 = vld1q_u8(skey + 208);
283 const uint8x16_t K14 = vld1q_u8(mkey);
285 uint8x16_t B = vld1q_u8(plaintext.data());
286 B = vaesmcq_u8(vaeseq_u8(B, K0));
287 B = vaesmcq_u8(vaeseq_u8(B, K1));
288 B = vaesmcq_u8(vaeseq_u8(B, K2));
289 B = vaesmcq_u8(vaeseq_u8(B, K3));
290 B = vaesmcq_u8(vaeseq_u8(B, K4));
291 B = vaesmcq_u8(vaeseq_u8(B, K5));
292 B = vaesmcq_u8(vaeseq_u8(B, K6));
293 B = vaesmcq_u8(vaeseq_u8(B, K7));
294 B = vaesmcq_u8(vaeseq_u8(B, K8));
295 B = vaesmcq_u8(vaeseq_u8(B, K9));
296 B = vaesmcq_u8(vaeseq_u8(B, K10));
297 B = vaesmcq_u8(vaeseq_u8(B, K11));
298 B = vaesmcq_u8(vaeseq_u8(B, K12));
299 B = veorq_u8(vaeseq_u8(B, K13), K14);
300 vst1q_u8(out.data(), B);
305 static block_type
decrypt_block(
const block_type &plaintext,
const key_schedule_type &decryption_key) {
306 const uint8_t *skey =
reinterpret_cast<const uint8_t *
>(decryption_key.data());
307 const uint8_t *mkey =
reinterpret_cast<const uint8_t *
>(m_MD.data());
309 const uint8x16_t K0 = vld1q_u8(skey + 0);
310 const uint8x16_t K1 = vld1q_u8(skey + 16);
311 const uint8x16_t K2 = vld1q_u8(skey + 32);
312 const uint8x16_t K3 = vld1q_u8(skey + 48);
313 const uint8x16_t K4 = vld1q_u8(skey + 64);
314 const uint8x16_t K5 = vld1q_u8(skey + 80);
315 const uint8x16_t K6 = vld1q_u8(skey + 96);
316 const uint8x16_t K7 = vld1q_u8(skey + 112);
317 const uint8x16_t K8 = vld1q_u8(skey + 128);
318 const uint8x16_t K9 = vld1q_u8(skey + 144);
319 const uint8x16_t K10 = vld1q_u8(skey + 160);
320 const uint8x16_t K11 = vld1q_u8(skey + 176);
321 const uint8x16_t K12 = vld1q_u8(skey + 192);
322 const uint8x16_t K13 = vld1q_u8(skey + 208);
323 const uint8x16_t K14 = vld1q_u8(mkey);
325 uint8x16_t B = vld1q_u8(plaintext.data());
326 B = vaesimcq_u8(vaesdq_u8(B, K0));
327 B = vaesimcq_u8(vaesdq_u8(B, K1));
328 B = vaesimcq_u8(vaesdq_u8(B, K2));
329 B = vaesimcq_u8(vaesdq_u8(B, K3));
330 B = vaesimcq_u8(vaesdq_u8(B, K4));
331 B = vaesimcq_u8(vaesdq_u8(B, K5));
332 B = vaesimcq_u8(vaesdq_u8(B, K6));
333 B = vaesimcq_u8(vaesdq_u8(B, K7));
334 B = vaesimcq_u8(vaesdq_u8(B, K8));
335 B = vaesimcq_u8(vaesdq_u8(B, K9));
336 B = vaesimcq_u8(vaesdq_u8(B, K10));
337 B = vaesimcq_u8(vaesdq_u8(B, K11));
338 B = vaesimcq_u8(vaesdq_u8(B, K12));
339 B = veorq_u8(vaesdq_u8(B, K13), K14);
340 vst1q_u8(out.data(), B);
policy_type::block_type block_type
Definition: rijndael_armv8_impl.hpp:89
policy_type::key_type key_type
Definition: rijndael_armv8_impl.hpp:90
PolicyType policy_type
Definition: rijndael_armv8_impl.hpp:88
policy_type::key_schedule_type key_schedule_type
Definition: rijndael_armv8_impl.hpp:91
static void schedule_key(const key_type &key, key_schedule_type encryption_key, key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:94
Definition: rijndael_armv8_impl.hpp:70
Definition: rijndael_armv8_impl.hpp:78
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:146
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:112
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:185
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:223
static block_type encrypt_block(const block_type &plaintext, const key_schedule_type &encryption_key)
Definition: rijndael_armv8_impl.hpp:264
static block_type decrypt_block(const block_type &plaintext, const key_schedule_type &decryption_key)
Definition: rijndael_armv8_impl.hpp:305