1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/zfs_context.h>
26 #include <modes/modes.h>
27 #include <sys/crypto/common.h>
28 #include <sys/crypto/icp.h>
29 #include <sys/crypto/impl.h>
30 #include <sys/byteorder.h>
31 #include <sys/simd.h>
32 #include <modes/gcm_impl.h>
33 #ifdef CAN_USE_GCM_ASM
34 #include <aes/aes_impl.h>
35 #endif
36
37 #define GHASH(c, d, t, o) \
38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
40 (uint64_t *)(void *)(t));
41
42 /* Select GCM implementation */
43 #define IMPL_FASTEST (UINT32_MAX)
44 #define IMPL_CYCLE (UINT32_MAX-1)
45 #ifdef CAN_USE_GCM_ASM
46 #define IMPL_AVX (UINT32_MAX-2)
47 #endif
48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
49 static uint32_t icp_gcm_impl = IMPL_FASTEST;
50 static uint32_t user_sel_impl = IMPL_FASTEST;
51
52 #ifdef CAN_USE_GCM_ASM
53 /* Does the architecture we run on support the MOVBE instruction? */
54 boolean_t gcm_avx_can_use_movbe = B_FALSE;
55 /*
56 * Whether to use the optimized openssl gcm and ghash implementations.
57 * Set to true if module parameter icp_gcm_impl == "avx".
58 */
59 static boolean_t gcm_use_avx = B_FALSE;
60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
61
62 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
63
64 static inline boolean_t gcm_avx_will_work(void);
65 static inline void gcm_set_avx(boolean_t);
66 static inline boolean_t gcm_toggle_avx(void);
67 static inline size_t gcm_simd_get_htab_size(boolean_t);
68
69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
70 crypto_data_t *, size_t);
71
72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
75 size_t, size_t);
76 #endif /* ifdef CAN_USE_GCM_ASM */
77
78 /*
79 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
80 * is done in another function.
81 */
82 int
gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
84 crypto_data_t *out, size_t block_size,
85 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
86 void (*copy_block)(uint8_t *, uint8_t *),
87 void (*xor_block)(uint8_t *, uint8_t *))
88 {
89 #ifdef CAN_USE_GCM_ASM
90 if (ctx->gcm_use_avx == B_TRUE)
91 return (gcm_mode_encrypt_contiguous_blocks_avx(
92 ctx, data, length, out, block_size));
93 #endif
94
95 const gcm_impl_ops_t *gops;
96 size_t remainder = length;
97 size_t need = 0;
98 uint8_t *datap = (uint8_t *)data;
99 uint8_t *blockp;
100 uint8_t *lastp;
101 void *iov_or_mp;
102 offset_t offset;
103 uint8_t *out_data_1;
104 uint8_t *out_data_2;
105 size_t out_data_1_len;
106 uint64_t counter;
107 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
108
109 if (length + ctx->gcm_remainder_len < block_size) {
110 /* accumulate bytes here and return */
111 bcopy(datap,
112 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
113 length);
114 ctx->gcm_remainder_len += length;
115 if (ctx->gcm_copy_to == NULL) {
116 ctx->gcm_copy_to = datap;
117 }
118 return (CRYPTO_SUCCESS);
119 }
120
121 lastp = (uint8_t *)ctx->gcm_cb;
122 crypto_init_ptrs(out, &iov_or_mp, &offset);
123
124 gops = gcm_impl_get_ops();
125 do {
126 /* Unprocessed data from last call. */
127 if (ctx->gcm_remainder_len > 0) {
128 need = block_size - ctx->gcm_remainder_len;
129
130 if (need > remainder)
131 return (CRYPTO_DATA_LEN_RANGE);
132
133 bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
134 [ctx->gcm_remainder_len], need);
135
136 blockp = (uint8_t *)ctx->gcm_remainder;
137 } else {
138 blockp = datap;
139 }
140
141 /*
142 * Increment counter. Counter bits are confined
143 * to the bottom 32 bits of the counter block.
144 */
145 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
146 counter = htonll(counter + 1);
147 counter &= counter_mask;
148 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
149
150 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
151 (uint8_t *)ctx->gcm_tmp);
152 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
153
154 lastp = (uint8_t *)ctx->gcm_tmp;
155
156 ctx->gcm_processed_data_len += block_size;
157
158 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
159 &out_data_1_len, &out_data_2, block_size);
160
161 /* copy block to where it belongs */
162 if (out_data_1_len == block_size) {
163 copy_block(lastp, out_data_1);
164 } else {
165 bcopy(lastp, out_data_1, out_data_1_len);
166 if (out_data_2 != NULL) {
167 bcopy(lastp + out_data_1_len,
168 out_data_2,
169 block_size - out_data_1_len);
170 }
171 }
172 /* update offset */
173 out->cd_offset += block_size;
174
175 /* add ciphertext to the hash */
176 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
177
178 /* Update pointer to next block of data to be processed. */
179 if (ctx->gcm_remainder_len != 0) {
180 datap += need;
181 ctx->gcm_remainder_len = 0;
182 } else {
183 datap += block_size;
184 }
185
186 remainder = (size_t)&data[length] - (size_t)datap;
187
188 /* Incomplete last block. */
189 if (remainder > 0 && remainder < block_size) {
190 bcopy(datap, ctx->gcm_remainder, remainder);
191 ctx->gcm_remainder_len = remainder;
192 ctx->gcm_copy_to = datap;
193 goto out;
194 }
195 ctx->gcm_copy_to = NULL;
196
197 } while (remainder > 0);
198 out:
199 return (CRYPTO_SUCCESS);
200 }
201
202 /* ARGSUSED */
203 int
gcm_encrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))204 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
205 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
206 void (*copy_block)(uint8_t *, uint8_t *),
207 void (*xor_block)(uint8_t *, uint8_t *))
208 {
209 #ifdef CAN_USE_GCM_ASM
210 if (ctx->gcm_use_avx == B_TRUE)
211 return (gcm_encrypt_final_avx(ctx, out, block_size));
212 #endif
213
214 const gcm_impl_ops_t *gops;
215 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
216 uint8_t *ghash, *macp = NULL;
217 int i, rv;
218
219 if (out->cd_length <
220 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
221 return (CRYPTO_DATA_LEN_RANGE);
222 }
223
224 gops = gcm_impl_get_ops();
225 ghash = (uint8_t *)ctx->gcm_ghash;
226
227 if (ctx->gcm_remainder_len > 0) {
228 uint64_t counter;
229 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
230
231 /*
232 * Here is where we deal with data that is not a
233 * multiple of the block size.
234 */
235
236 /*
237 * Increment counter.
238 */
239 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
240 counter = htonll(counter + 1);
241 counter &= counter_mask;
242 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
243
244 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
245 (uint8_t *)ctx->gcm_tmp);
246
247 macp = (uint8_t *)ctx->gcm_remainder;
248 bzero(macp + ctx->gcm_remainder_len,
249 block_size - ctx->gcm_remainder_len);
250
251 /* XOR with counter block */
252 for (i = 0; i < ctx->gcm_remainder_len; i++) {
253 macp[i] ^= tmpp[i];
254 }
255
256 /* add ciphertext to the hash */
257 GHASH(ctx, macp, ghash, gops);
258
259 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
260 }
261
262 ctx->gcm_len_a_len_c[1] =
263 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
264 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
265 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
266 (uint8_t *)ctx->gcm_J0);
267 xor_block((uint8_t *)ctx->gcm_J0, ghash);
268
269 if (ctx->gcm_remainder_len > 0) {
270 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
271 if (rv != CRYPTO_SUCCESS)
272 return (rv);
273 }
274 out->cd_offset += ctx->gcm_remainder_len;
275 ctx->gcm_remainder_len = 0;
276 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
277 if (rv != CRYPTO_SUCCESS)
278 return (rv);
279 out->cd_offset += ctx->gcm_tag_len;
280
281 return (CRYPTO_SUCCESS);
282 }
283
284 /*
285 * This will only deal with decrypting the last block of the input that
286 * might not be a multiple of block length.
287 */
288 static void
gcm_decrypt_incomplete_block(gcm_ctx_t * ctx,size_t block_size,size_t index,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))289 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
290 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
291 void (*xor_block)(uint8_t *, uint8_t *))
292 {
293 uint8_t *datap, *outp, *counterp;
294 uint64_t counter;
295 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
296 int i;
297
298 /*
299 * Increment counter.
300 * Counter bits are confined to the bottom 32 bits
301 */
302 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
303 counter = htonll(counter + 1);
304 counter &= counter_mask;
305 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
306
307 datap = (uint8_t *)ctx->gcm_remainder;
308 outp = &((ctx->gcm_pt_buf)[index]);
309 counterp = (uint8_t *)ctx->gcm_tmp;
310
311 /* authentication tag */
312 bzero((uint8_t *)ctx->gcm_tmp, block_size);
313 bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
314
315 /* add ciphertext to the hash */
316 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
317
318 /* decrypt remaining ciphertext */
319 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
320
321 /* XOR with counter block */
322 for (i = 0; i < ctx->gcm_remainder_len; i++) {
323 outp[i] = datap[i] ^ counterp[i];
324 }
325 }
326
327 /* ARGSUSED */
328 int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))329 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
330 crypto_data_t *out, size_t block_size,
331 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
332 void (*copy_block)(uint8_t *, uint8_t *),
333 void (*xor_block)(uint8_t *, uint8_t *))
334 {
335 size_t new_len;
336 uint8_t *new;
337
338 /*
339 * Copy contiguous ciphertext input blocks to plaintext buffer.
340 * Ciphertext will be decrypted in the final.
341 */
342 if (length > 0) {
343 new_len = ctx->gcm_pt_buf_len + length;
344 new = vmem_alloc(new_len, ctx->gcm_kmflag);
345 if (new == NULL) {
346 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
347 ctx->gcm_pt_buf = NULL;
348 return (CRYPTO_HOST_MEMORY);
349 }
350 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
351 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
352 ctx->gcm_pt_buf = new;
353 ctx->gcm_pt_buf_len = new_len;
354 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
355 length);
356 ctx->gcm_processed_data_len += length;
357 }
358
359 ctx->gcm_remainder_len = 0;
360 return (CRYPTO_SUCCESS);
361 }
362
363 int
gcm_decrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))364 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
365 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
366 void (*xor_block)(uint8_t *, uint8_t *))
367 {
368 #ifdef CAN_USE_GCM_ASM
369 if (ctx->gcm_use_avx == B_TRUE)
370 return (gcm_decrypt_final_avx(ctx, out, block_size));
371 #endif
372
373 const gcm_impl_ops_t *gops;
374 size_t pt_len;
375 size_t remainder;
376 uint8_t *ghash;
377 uint8_t *blockp;
378 uint8_t *cbp;
379 uint64_t counter;
380 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
381 int processed = 0, rv;
382
383 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
384
385 gops = gcm_impl_get_ops();
386 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
387 ghash = (uint8_t *)ctx->gcm_ghash;
388 blockp = ctx->gcm_pt_buf;
389 remainder = pt_len;
390 while (remainder > 0) {
391 /* Incomplete last block */
392 if (remainder < block_size) {
393 bcopy(blockp, ctx->gcm_remainder, remainder);
394 ctx->gcm_remainder_len = remainder;
395 /*
396 * not expecting anymore ciphertext, just
397 * compute plaintext for the remaining input
398 */
399 gcm_decrypt_incomplete_block(ctx, block_size,
400 processed, encrypt_block, xor_block);
401 ctx->gcm_remainder_len = 0;
402 goto out;
403 }
404 /* add ciphertext to the hash */
405 GHASH(ctx, blockp, ghash, gops);
406
407 /*
408 * Increment counter.
409 * Counter bits are confined to the bottom 32 bits
410 */
411 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
412 counter = htonll(counter + 1);
413 counter &= counter_mask;
414 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
415
416 cbp = (uint8_t *)ctx->gcm_tmp;
417 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
418
419 /* XOR with ciphertext */
420 xor_block(cbp, blockp);
421
422 processed += block_size;
423 blockp += block_size;
424 remainder -= block_size;
425 }
426 out:
427 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
428 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
429 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
430 (uint8_t *)ctx->gcm_J0);
431 xor_block((uint8_t *)ctx->gcm_J0, ghash);
432
433 /* compare the input authentication tag with what we calculated */
434 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
435 /* They don't match */
436 return (CRYPTO_INVALID_MAC);
437 } else {
438 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
439 if (rv != CRYPTO_SUCCESS)
440 return (rv);
441 out->cd_offset += pt_len;
442 }
443 return (CRYPTO_SUCCESS);
444 }
445
446 static int
gcm_validate_args(CK_AES_GCM_PARAMS * gcm_param)447 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
448 {
449 size_t tag_len;
450
451 /*
452 * Check the length of the authentication tag (in bits).
453 */
454 tag_len = gcm_param->ulTagBits;
455 switch (tag_len) {
456 case 32:
457 case 64:
458 case 96:
459 case 104:
460 case 112:
461 case 120:
462 case 128:
463 break;
464 default:
465 return (CRYPTO_MECHANISM_PARAM_INVALID);
466 }
467
468 if (gcm_param->ulIvLen == 0)
469 return (CRYPTO_MECHANISM_PARAM_INVALID);
470
471 return (CRYPTO_SUCCESS);
472 }
473
474 static void
gcm_format_initial_blocks(uchar_t * iv,ulong_t iv_len,gcm_ctx_t * ctx,size_t block_size,void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))475 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
476 gcm_ctx_t *ctx, size_t block_size,
477 void (*copy_block)(uint8_t *, uint8_t *),
478 void (*xor_block)(uint8_t *, uint8_t *))
479 {
480 const gcm_impl_ops_t *gops;
481 uint8_t *cb;
482 ulong_t remainder = iv_len;
483 ulong_t processed = 0;
484 uint8_t *datap, *ghash;
485 uint64_t len_a_len_c[2];
486
487 gops = gcm_impl_get_ops();
488 ghash = (uint8_t *)ctx->gcm_ghash;
489 cb = (uint8_t *)ctx->gcm_cb;
490 if (iv_len == 12) {
491 bcopy(iv, cb, 12);
492 cb[12] = 0;
493 cb[13] = 0;
494 cb[14] = 0;
495 cb[15] = 1;
496 /* J0 will be used again in the final */
497 copy_block(cb, (uint8_t *)ctx->gcm_J0);
498 } else {
499 /* GHASH the IV */
500 do {
501 if (remainder < block_size) {
502 bzero(cb, block_size);
503 bcopy(&(iv[processed]), cb, remainder);
504 datap = (uint8_t *)cb;
505 remainder = 0;
506 } else {
507 datap = (uint8_t *)(&(iv[processed]));
508 processed += block_size;
509 remainder -= block_size;
510 }
511 GHASH(ctx, datap, ghash, gops);
512 } while (remainder > 0);
513
514 len_a_len_c[0] = 0;
515 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
516 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
517
518 /* J0 will be used again in the final */
519 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
520 }
521 }
522
523 static int
gcm_init(gcm_ctx_t * ctx,unsigned char * iv,size_t iv_len,unsigned char * auth_data,size_t auth_data_len,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))524 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
525 unsigned char *auth_data, size_t auth_data_len, size_t block_size,
526 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
527 void (*copy_block)(uint8_t *, uint8_t *),
528 void (*xor_block)(uint8_t *, uint8_t *))
529 {
530 const gcm_impl_ops_t *gops;
531 uint8_t *ghash, *datap, *authp;
532 size_t remainder, processed;
533
534 /* encrypt zero block to get subkey H */
535 bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
536 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
537 (uint8_t *)ctx->gcm_H);
538
539 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
540 copy_block, xor_block);
541
542 gops = gcm_impl_get_ops();
543 authp = (uint8_t *)ctx->gcm_tmp;
544 ghash = (uint8_t *)ctx->gcm_ghash;
545 bzero(authp, block_size);
546 bzero(ghash, block_size);
547
548 processed = 0;
549 remainder = auth_data_len;
550 do {
551 if (remainder < block_size) {
552 /*
553 * There's not a block full of data, pad rest of
554 * buffer with zero
555 */
556 bzero(authp, block_size);
557 bcopy(&(auth_data[processed]), authp, remainder);
558 datap = (uint8_t *)authp;
559 remainder = 0;
560 } else {
561 datap = (uint8_t *)(&(auth_data[processed]));
562 processed += block_size;
563 remainder -= block_size;
564 }
565
566 /* add auth data to the hash */
567 GHASH(ctx, datap, ghash, gops);
568
569 } while (remainder > 0);
570
571 return (CRYPTO_SUCCESS);
572 }
573
574 /*
575 * The following function is called at encrypt or decrypt init time
576 * for AES GCM mode.
577 *
578 * Init the GCM context struct. Handle the cycle and avx implementations here.
579 */
580 int
gcm_init_ctx(gcm_ctx_t * gcm_ctx,char * param,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))581 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
582 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
583 void (*copy_block)(uint8_t *, uint8_t *),
584 void (*xor_block)(uint8_t *, uint8_t *))
585 {
586 int rv;
587 CK_AES_GCM_PARAMS *gcm_param;
588
589 if (param != NULL) {
590 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
591
592 if ((rv = gcm_validate_args(gcm_param)) != 0) {
593 return (rv);
594 }
595
596 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
597 gcm_ctx->gcm_tag_len >>= 3;
598 gcm_ctx->gcm_processed_data_len = 0;
599
600 /* these values are in bits */
601 gcm_ctx->gcm_len_a_len_c[0]
602 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
603
604 rv = CRYPTO_SUCCESS;
605 gcm_ctx->gcm_flags |= GCM_MODE;
606 } else {
607 return (CRYPTO_MECHANISM_PARAM_INVALID);
608 }
609
610 #ifdef CAN_USE_GCM_ASM
611 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
612 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
613 } else {
614 /*
615 * Handle the "cycle" implementation by creating avx and
616 * non-avx contexts alternately.
617 */
618 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
619 /*
620 * We don't handle byte swapped key schedules in the avx
621 * code path.
622 */
623 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
624 if (ks->ops->needs_byteswap == B_TRUE) {
625 gcm_ctx->gcm_use_avx = B_FALSE;
626 }
627 /* Use the MOVBE and the BSWAP variants alternately. */
628 if (gcm_ctx->gcm_use_avx == B_TRUE &&
629 zfs_movbe_available() == B_TRUE) {
630 (void) atomic_toggle_boolean_nv(
631 (volatile boolean_t *)&gcm_avx_can_use_movbe);
632 }
633 }
634 /* Allocate Htab memory as needed. */
635 if (gcm_ctx->gcm_use_avx == B_TRUE) {
636 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
637
638 if (htab_len == 0) {
639 return (CRYPTO_MECHANISM_PARAM_INVALID);
640 }
641 gcm_ctx->gcm_htab_len = htab_len;
642 gcm_ctx->gcm_Htable =
643 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
644
645 if (gcm_ctx->gcm_Htable == NULL) {
646 return (CRYPTO_HOST_MEMORY);
647 }
648 }
649 /* Avx and non avx context initialization differs from here on. */
650 if (gcm_ctx->gcm_use_avx == B_FALSE) {
651 #endif /* ifdef CAN_USE_GCM_ASM */
652 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
653 gcm_param->pAAD, gcm_param->ulAADLen, block_size,
654 encrypt_block, copy_block, xor_block) != 0) {
655 rv = CRYPTO_MECHANISM_PARAM_INVALID;
656 }
657 #ifdef CAN_USE_GCM_ASM
658 } else {
659 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
660 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
661 rv = CRYPTO_MECHANISM_PARAM_INVALID;
662 }
663 }
664 #endif /* ifdef CAN_USE_GCM_ASM */
665
666 return (rv);
667 }
668
669 int
gmac_init_ctx(gcm_ctx_t * gcm_ctx,char * param,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))670 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
671 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
672 void (*copy_block)(uint8_t *, uint8_t *),
673 void (*xor_block)(uint8_t *, uint8_t *))
674 {
675 int rv;
676 CK_AES_GMAC_PARAMS *gmac_param;
677
678 if (param != NULL) {
679 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
680
681 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
682 gcm_ctx->gcm_processed_data_len = 0;
683
684 /* these values are in bits */
685 gcm_ctx->gcm_len_a_len_c[0]
686 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
687
688 rv = CRYPTO_SUCCESS;
689 gcm_ctx->gcm_flags |= GMAC_MODE;
690 } else {
691 return (CRYPTO_MECHANISM_PARAM_INVALID);
692 }
693
694 #ifdef CAN_USE_GCM_ASM
695 /*
696 * Handle the "cycle" implementation by creating avx and non avx
697 * contexts alternately.
698 */
699 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
700 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
701 } else {
702 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
703 }
704 /* We don't handle byte swapped key schedules in the avx code path. */
705 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
706 if (ks->ops->needs_byteswap == B_TRUE) {
707 gcm_ctx->gcm_use_avx = B_FALSE;
708 }
709 /* Allocate Htab memory as needed. */
710 if (gcm_ctx->gcm_use_avx == B_TRUE) {
711 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
712
713 if (htab_len == 0) {
714 return (CRYPTO_MECHANISM_PARAM_INVALID);
715 }
716 gcm_ctx->gcm_htab_len = htab_len;
717 gcm_ctx->gcm_Htable =
718 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
719
720 if (gcm_ctx->gcm_Htable == NULL) {
721 return (CRYPTO_HOST_MEMORY);
722 }
723 }
724
725 /* Avx and non avx context initialization differs from here on. */
726 if (gcm_ctx->gcm_use_avx == B_FALSE) {
727 #endif /* ifdef CAN_USE_GCM_ASM */
728 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
729 gmac_param->pAAD, gmac_param->ulAADLen, block_size,
730 encrypt_block, copy_block, xor_block) != 0) {
731 rv = CRYPTO_MECHANISM_PARAM_INVALID;
732 }
733 #ifdef CAN_USE_GCM_ASM
734 } else {
735 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
736 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
737 rv = CRYPTO_MECHANISM_PARAM_INVALID;
738 }
739 }
740 #endif /* ifdef CAN_USE_GCM_ASM */
741
742 return (rv);
743 }
744
745 void *
gcm_alloc_ctx(int kmflag)746 gcm_alloc_ctx(int kmflag)
747 {
748 gcm_ctx_t *gcm_ctx;
749
750 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
751 return (NULL);
752
753 gcm_ctx->gcm_flags = GCM_MODE;
754 return (gcm_ctx);
755 }
756
757 void *
gmac_alloc_ctx(int kmflag)758 gmac_alloc_ctx(int kmflag)
759 {
760 gcm_ctx_t *gcm_ctx;
761
762 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
763 return (NULL);
764
765 gcm_ctx->gcm_flags = GMAC_MODE;
766 return (gcm_ctx);
767 }
768
769 void
gcm_set_kmflag(gcm_ctx_t * ctx,int kmflag)770 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
771 {
772 ctx->gcm_kmflag = kmflag;
773 }
774
775 /* GCM implementation that contains the fastest methods */
776 static gcm_impl_ops_t gcm_fastest_impl = {
777 .name = "fastest"
778 };
779
780 /* All compiled in implementations */
781 const gcm_impl_ops_t *gcm_all_impl[] = {
782 &gcm_generic_impl,
783 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
784 &gcm_pclmulqdq_impl,
785 #endif
786 };
787
788 /* Indicate that benchmark has been completed */
789 static boolean_t gcm_impl_initialized = B_FALSE;
790
791 /* Hold all supported implementations */
792 static size_t gcm_supp_impl_cnt = 0;
793 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
794
795 /*
796 * Returns the GCM operations for encrypt/decrypt/key setup. When a
797 * SIMD implementation is not allowed in the current context, then
798 * fallback to the fastest generic implementation.
799 */
800 const gcm_impl_ops_t *
gcm_impl_get_ops()801 gcm_impl_get_ops()
802 {
803 if (!kfpu_allowed())
804 return (&gcm_generic_impl);
805
806 const gcm_impl_ops_t *ops = NULL;
807 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
808
809 switch (impl) {
810 case IMPL_FASTEST:
811 ASSERT(gcm_impl_initialized);
812 ops = &gcm_fastest_impl;
813 break;
814 case IMPL_CYCLE:
815 /* Cycle through supported implementations */
816 ASSERT(gcm_impl_initialized);
817 ASSERT3U(gcm_supp_impl_cnt, >, 0);
818 static size_t cycle_impl_idx = 0;
819 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
820 ops = gcm_supp_impl[idx];
821 break;
822 #ifdef CAN_USE_GCM_ASM
823 case IMPL_AVX:
824 /*
825 * Make sure that we return a valid implementation while
826 * switching to the avx implementation since there still
827 * may be unfinished non-avx contexts around.
828 */
829 ops = &gcm_generic_impl;
830 break;
831 #endif
832 default:
833 ASSERT3U(impl, <, gcm_supp_impl_cnt);
834 ASSERT3U(gcm_supp_impl_cnt, >, 0);
835 if (impl < ARRAY_SIZE(gcm_all_impl))
836 ops = gcm_supp_impl[impl];
837 break;
838 }
839
840 ASSERT3P(ops, !=, NULL);
841
842 return (ops);
843 }
844
845 /*
846 * Initialize all supported implementations.
847 */
848 void
gcm_impl_init(void)849 gcm_impl_init(void)
850 {
851 gcm_impl_ops_t *curr_impl;
852 int i, c;
853
854 /* Move supported implementations into gcm_supp_impls */
855 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
856 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
857
858 if (curr_impl->is_supported())
859 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
860 }
861 gcm_supp_impl_cnt = c;
862
863 /*
864 * Set the fastest implementation given the assumption that the
865 * hardware accelerated version is the fastest.
866 */
867 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
868 if (gcm_pclmulqdq_impl.is_supported()) {
869 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
870 sizeof (gcm_fastest_impl));
871 } else
872 #endif
873 {
874 memcpy(&gcm_fastest_impl, &gcm_generic_impl,
875 sizeof (gcm_fastest_impl));
876 }
877
878 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
879
880 #ifdef CAN_USE_GCM_ASM
881 /*
882 * Use the avx implementation if it's available and the implementation
883 * hasn't changed from its default value of fastest on module load.
884 */
885 if (gcm_avx_will_work()) {
886 #ifdef HAVE_MOVBE
887 if (zfs_movbe_available() == B_TRUE) {
888 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
889 }
890 #endif
891 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
892 gcm_set_avx(B_TRUE);
893 }
894 }
895 #endif
896 /* Finish initialization */
897 atomic_swap_32(&icp_gcm_impl, user_sel_impl);
898 gcm_impl_initialized = B_TRUE;
899 }
900
901 static const struct {
902 char *name;
903 uint32_t sel;
904 } gcm_impl_opts[] = {
905 { "cycle", IMPL_CYCLE },
906 { "fastest", IMPL_FASTEST },
907 #ifdef CAN_USE_GCM_ASM
908 { "avx", IMPL_AVX },
909 #endif
910 };
911
912 /*
913 * Function sets desired gcm implementation.
914 *
915 * If we are called before init(), user preference will be saved in
916 * user_sel_impl, and applied in later init() call. This occurs when module
917 * parameter is specified on module load. Otherwise, directly update
918 * icp_gcm_impl.
919 *
920 * @val Name of gcm implementation to use
921 * @param Unused.
922 */
923 int
gcm_impl_set(const char * val)924 gcm_impl_set(const char *val)
925 {
926 int err = -EINVAL;
927 char req_name[GCM_IMPL_NAME_MAX];
928 uint32_t impl = GCM_IMPL_READ(user_sel_impl);
929 size_t i;
930
931 /* sanitize input */
932 i = strnlen(val, GCM_IMPL_NAME_MAX);
933 if (i == 0 || i >= GCM_IMPL_NAME_MAX)
934 return (err);
935
936 strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
937 while (i > 0 && isspace(req_name[i-1]))
938 i--;
939 req_name[i] = '\0';
940
941 /* Check mandatory options */
942 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
943 #ifdef CAN_USE_GCM_ASM
944 /* Ignore avx implementation if it won't work. */
945 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
946 continue;
947 }
948 #endif
949 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
950 impl = gcm_impl_opts[i].sel;
951 err = 0;
952 break;
953 }
954 }
955
956 /* check all supported impl if init() was already called */
957 if (err != 0 && gcm_impl_initialized) {
958 /* check all supported implementations */
959 for (i = 0; i < gcm_supp_impl_cnt; i++) {
960 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
961 impl = i;
962 err = 0;
963 break;
964 }
965 }
966 }
967 #ifdef CAN_USE_GCM_ASM
968 /*
969 * Use the avx implementation if available and the requested one is
970 * avx or fastest.
971 */
972 if (gcm_avx_will_work() == B_TRUE &&
973 (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
974 gcm_set_avx(B_TRUE);
975 } else {
976 gcm_set_avx(B_FALSE);
977 }
978 #endif
979
980 if (err == 0) {
981 if (gcm_impl_initialized)
982 atomic_swap_32(&icp_gcm_impl, impl);
983 else
984 atomic_swap_32(&user_sel_impl, impl);
985 }
986
987 return (err);
988 }
989
990 #if defined(_KERNEL) && defined(__linux__)
991
992 static int
icp_gcm_impl_set(const char * val,zfs_kernel_param_t * kp)993 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
994 {
995 return (gcm_impl_set(val));
996 }
997
998 static int
icp_gcm_impl_get(char * buffer,zfs_kernel_param_t * kp)999 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
1000 {
1001 int i, cnt = 0;
1002 char *fmt;
1003 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
1004
1005 ASSERT(gcm_impl_initialized);
1006
1007 /* list mandatory options */
1008 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
1009 #ifdef CAN_USE_GCM_ASM
1010 /* Ignore avx implementation if it won't work. */
1011 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
1012 continue;
1013 }
1014 #endif
1015 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
1016 cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
1017 }
1018
1019 /* list all supported implementations */
1020 for (i = 0; i < gcm_supp_impl_cnt; i++) {
1021 fmt = (i == impl) ? "[%s] " : "%s ";
1022 cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
1023 }
1024
1025 return (cnt);
1026 }
1027
1028 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
1029 NULL, 0644);
1030 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
1031 #endif /* defined(__KERNEL) */
1032
1033 #ifdef CAN_USE_GCM_ASM
1034 #define GCM_BLOCK_LEN 16
1035 /*
1036 * The openssl asm routines are 6x aggregated and need that many bytes
1037 * at minimum.
1038 */
1039 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1040 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1041 /*
1042 * Ensure the chunk size is reasonable since we are allocating a
1043 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1044 */
1045 #define GCM_AVX_MAX_CHUNK_SIZE \
1046 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1047
1048 /* Get the chunk size module parameter. */
1049 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1050
1051 /* Clear the FPU registers since they hold sensitive internal state. */
1052 #define clear_fpu_regs() clear_fpu_regs_avx()
1053 #define GHASH_AVX(ctx, in, len) \
1054 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
1055 in, len)
1056
1057 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1058
1059 /*
1060 * Module parameter: number of bytes to process at once while owning the FPU.
1061 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1062 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1063 */
1064 static uint32_t gcm_avx_chunk_size =
1065 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1066
1067 extern void clear_fpu_regs_avx(void);
1068 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
1069 extern void aes_encrypt_intel(const uint32_t rk[], int nr,
1070 const uint32_t pt[4], uint32_t ct[4]);
1071
1072 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
1073 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1074 const uint8_t *in, size_t len);
1075
1076 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1077 const void *, uint64_t *, uint64_t *);
1078
1079 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1080 const void *, uint64_t *, uint64_t *);
1081
1082 static inline boolean_t
gcm_avx_will_work(void)1083 gcm_avx_will_work(void)
1084 {
1085 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1086 return (kfpu_allowed() &&
1087 zfs_avx_available() && zfs_aes_available() &&
1088 zfs_pclmulqdq_available());
1089 }
1090
1091 static inline void
gcm_set_avx(boolean_t val)1092 gcm_set_avx(boolean_t val)
1093 {
1094 if (gcm_avx_will_work() == B_TRUE) {
1095 atomic_swap_32(&gcm_use_avx, val);
1096 }
1097 }
1098
1099 static inline boolean_t
gcm_toggle_avx(void)1100 gcm_toggle_avx(void)
1101 {
1102 if (gcm_avx_will_work() == B_TRUE) {
1103 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
1104 } else {
1105 return (B_FALSE);
1106 }
1107 }
1108
1109 static inline size_t
gcm_simd_get_htab_size(boolean_t simd_mode)1110 gcm_simd_get_htab_size(boolean_t simd_mode)
1111 {
1112 switch (simd_mode) {
1113 case B_TRUE:
1114 return (2 * 6 * 2 * sizeof (uint64_t));
1115
1116 default:
1117 return (0);
1118 }
1119 }
1120
1121 /*
1122 * Clear sensitive data in the context.
1123 *
1124 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
1125 * ctx->gcm_Htable contain the hash sub key which protects authentication.
1126 *
1127 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
1128 * a known plaintext attack, they consists of the IV and the first and last
1129 * counter respectively. If they should be cleared is debatable.
1130 */
1131 static inline void
gcm_clear_ctx(gcm_ctx_t * ctx)1132 gcm_clear_ctx(gcm_ctx_t *ctx)
1133 {
1134 bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
1135 bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
1136 bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
1137 bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
1138 }
1139
1140 /* Increment the GCM counter block by n. */
1141 static inline void
gcm_incr_counter_block_by(gcm_ctx_t * ctx,int n)1142 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1143 {
1144 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1145 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1146
1147 counter = htonll(counter + n);
1148 counter &= counter_mask;
1149 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1150 }
1151
1152 /*
1153 * Encrypt multiple blocks of data in GCM mode.
1154 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1155 * if possible. While processing a chunk the FPU is "locked".
1156 */
1157 static int
gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size)1158 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1159 size_t length, crypto_data_t *out, size_t block_size)
1160 {
1161 size_t bleft = length;
1162 size_t need = 0;
1163 size_t done = 0;
1164 uint8_t *datap = (uint8_t *)data;
1165 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1166 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1167 uint64_t *ghash = ctx->gcm_ghash;
1168 uint64_t *cb = ctx->gcm_cb;
1169 uint8_t *ct_buf = NULL;
1170 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1171 int rv = CRYPTO_SUCCESS;
1172
1173 ASSERT(block_size == GCM_BLOCK_LEN);
1174 /*
1175 * If the last call left an incomplete block, try to fill
1176 * it first.
1177 */
1178 if (ctx->gcm_remainder_len > 0) {
1179 need = block_size - ctx->gcm_remainder_len;
1180 if (length < need) {
1181 /* Accumulate bytes here and return. */
1182 bcopy(datap, (uint8_t *)ctx->gcm_remainder +
1183 ctx->gcm_remainder_len, length);
1184
1185 ctx->gcm_remainder_len += length;
1186 if (ctx->gcm_copy_to == NULL) {
1187 ctx->gcm_copy_to = datap;
1188 }
1189 return (CRYPTO_SUCCESS);
1190 } else {
1191 /* Complete incomplete block. */
1192 bcopy(datap, (uint8_t *)ctx->gcm_remainder +
1193 ctx->gcm_remainder_len, need);
1194
1195 ctx->gcm_copy_to = NULL;
1196 }
1197 }
1198
1199 /* Allocate a buffer to encrypt to if there is enough input. */
1200 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1201 ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
1202 if (ct_buf == NULL) {
1203 return (CRYPTO_HOST_MEMORY);
1204 }
1205 }
1206
1207 /* If we completed an incomplete block, encrypt and write it out. */
1208 if (ctx->gcm_remainder_len > 0) {
1209 kfpu_begin();
1210 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1211 (const uint32_t *)cb, (uint32_t *)tmp);
1212
1213 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1214 GHASH_AVX(ctx, tmp, block_size);
1215 clear_fpu_regs();
1216 kfpu_end();
1217 rv = crypto_put_output_data(tmp, out, block_size);
1218 out->cd_offset += block_size;
1219 gcm_incr_counter_block(ctx);
1220 ctx->gcm_processed_data_len += block_size;
1221 bleft -= need;
1222 datap += need;
1223 ctx->gcm_remainder_len = 0;
1224 }
1225
1226 /* Do the bulk encryption in chunk_size blocks. */
1227 for (; bleft >= chunk_size; bleft -= chunk_size) {
1228 kfpu_begin();
1229 done = aesni_gcm_encrypt(
1230 datap, ct_buf, chunk_size, key, cb, ghash);
1231
1232 clear_fpu_regs();
1233 kfpu_end();
1234 if (done != chunk_size) {
1235 rv = CRYPTO_FAILED;
1236 goto out_nofpu;
1237 }
1238 rv = crypto_put_output_data(ct_buf, out, chunk_size);
1239 if (rv != CRYPTO_SUCCESS) {
1240 goto out_nofpu;
1241 }
1242 out->cd_offset += chunk_size;
1243 datap += chunk_size;
1244 ctx->gcm_processed_data_len += chunk_size;
1245 }
1246 /* Check if we are already done. */
1247 if (bleft == 0) {
1248 goto out_nofpu;
1249 }
1250 /* Bulk encrypt the remaining data. */
1251 kfpu_begin();
1252 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1253 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
1254 if (done == 0) {
1255 rv = CRYPTO_FAILED;
1256 goto out;
1257 }
1258 rv = crypto_put_output_data(ct_buf, out, done);
1259 if (rv != CRYPTO_SUCCESS) {
1260 goto out;
1261 }
1262 out->cd_offset += done;
1263 ctx->gcm_processed_data_len += done;
1264 datap += done;
1265 bleft -= done;
1266
1267 }
1268 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1269 while (bleft > 0) {
1270 if (bleft < block_size) {
1271 bcopy(datap, ctx->gcm_remainder, bleft);
1272 ctx->gcm_remainder_len = bleft;
1273 ctx->gcm_copy_to = datap;
1274 goto out;
1275 }
1276 /* Encrypt, hash and write out. */
1277 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1278 (const uint32_t *)cb, (uint32_t *)tmp);
1279
1280 gcm_xor_avx(datap, tmp);
1281 GHASH_AVX(ctx, tmp, block_size);
1282 rv = crypto_put_output_data(tmp, out, block_size);
1283 if (rv != CRYPTO_SUCCESS) {
1284 goto out;
1285 }
1286 out->cd_offset += block_size;
1287 gcm_incr_counter_block(ctx);
1288 ctx->gcm_processed_data_len += block_size;
1289 datap += block_size;
1290 bleft -= block_size;
1291 }
1292 out:
1293 clear_fpu_regs();
1294 kfpu_end();
1295 out_nofpu:
1296 if (ct_buf != NULL) {
1297 vmem_free(ct_buf, chunk_size);
1298 }
1299 return (rv);
1300 }
1301
1302 /*
1303 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1304 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1305 */
1306 static int
gcm_encrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1307 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1308 {
1309 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1310 uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1311 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1312 size_t rem_len = ctx->gcm_remainder_len;
1313 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1314 int aes_rounds = ((aes_key_t *)keysched)->nr;
1315 int rv;
1316
1317 ASSERT(block_size == GCM_BLOCK_LEN);
1318
1319 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1320 return (CRYPTO_DATA_LEN_RANGE);
1321 }
1322
1323 kfpu_begin();
1324 /* Pad last incomplete block with zeros, encrypt and hash. */
1325 if (rem_len > 0) {
1326 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1327 const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1328
1329 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1330 bzero(remainder + rem_len, block_size - rem_len);
1331 for (int i = 0; i < rem_len; i++) {
1332 remainder[i] ^= tmp[i];
1333 }
1334 GHASH_AVX(ctx, remainder, block_size);
1335 ctx->gcm_processed_data_len += rem_len;
1336 /* No need to increment counter_block, it's the last block. */
1337 }
1338 /* Finish tag. */
1339 ctx->gcm_len_a_len_c[1] =
1340 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1341 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1342 aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1343
1344 gcm_xor_avx((uint8_t *)J0, ghash);
1345 clear_fpu_regs();
1346 kfpu_end();
1347
1348 /* Output remainder. */
1349 if (rem_len > 0) {
1350 rv = crypto_put_output_data(remainder, out, rem_len);
1351 if (rv != CRYPTO_SUCCESS)
1352 return (rv);
1353 }
1354 out->cd_offset += rem_len;
1355 ctx->gcm_remainder_len = 0;
1356 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1357 if (rv != CRYPTO_SUCCESS)
1358 return (rv);
1359
1360 out->cd_offset += ctx->gcm_tag_len;
1361 /* Clear sensitive data in the context before returning. */
1362 gcm_clear_ctx(ctx);
1363 return (CRYPTO_SUCCESS);
1364 }
1365
1366 /*
1367 * Finalize decryption: We just have accumulated crypto text, so now we
1368 * decrypt it here inplace.
1369 */
1370 static int
gcm_decrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1371 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1372 {
1373 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1374 ASSERT3U(block_size, ==, 16);
1375
1376 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1377 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1378 uint8_t *datap = ctx->gcm_pt_buf;
1379 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1380 uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1381 uint64_t *ghash = ctx->gcm_ghash;
1382 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1383 int rv = CRYPTO_SUCCESS;
1384 size_t bleft, done;
1385
1386 /*
1387 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1388 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1389 * GCM_AVX_MIN_DECRYPT_BYTES.
1390 */
1391 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1392 kfpu_begin();
1393 done = aesni_gcm_decrypt(datap, datap, chunk_size,
1394 (const void *)key, ctx->gcm_cb, ghash);
1395 clear_fpu_regs();
1396 kfpu_end();
1397 if (done != chunk_size) {
1398 return (CRYPTO_FAILED);
1399 }
1400 datap += done;
1401 }
1402 /* Decrypt remainder, which is less then chunk size, in one go. */
1403 kfpu_begin();
1404 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1405 done = aesni_gcm_decrypt(datap, datap, bleft,
1406 (const void *)key, ctx->gcm_cb, ghash);
1407 if (done == 0) {
1408 clear_fpu_regs();
1409 kfpu_end();
1410 return (CRYPTO_FAILED);
1411 }
1412 datap += done;
1413 bleft -= done;
1414 }
1415 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1416
1417 /*
1418 * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1419 * decrypt them block by block.
1420 */
1421 while (bleft > 0) {
1422 /* Incomplete last block. */
1423 if (bleft < block_size) {
1424 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1425
1426 bzero(lastb, block_size);
1427 bcopy(datap, lastb, bleft);
1428 /* The GCM processing. */
1429 GHASH_AVX(ctx, lastb, block_size);
1430 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1431 for (size_t i = 0; i < bleft; i++) {
1432 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1433 }
1434 break;
1435 }
1436 /* The GCM processing. */
1437 GHASH_AVX(ctx, datap, block_size);
1438 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1439 gcm_xor_avx((uint8_t *)tmp, datap);
1440 gcm_incr_counter_block(ctx);
1441
1442 datap += block_size;
1443 bleft -= block_size;
1444 }
1445 if (rv != CRYPTO_SUCCESS) {
1446 clear_fpu_regs();
1447 kfpu_end();
1448 return (rv);
1449 }
1450 /* Decryption done, finish the tag. */
1451 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1452 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1453 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1454 (uint32_t *)ctx->gcm_J0);
1455
1456 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1457
1458 /* We are done with the FPU, restore its state. */
1459 clear_fpu_regs();
1460 kfpu_end();
1461
1462 /* Compare the input authentication tag with what we calculated. */
1463 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1464 /* They don't match. */
1465 return (CRYPTO_INVALID_MAC);
1466 }
1467 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1468 if (rv != CRYPTO_SUCCESS) {
1469 return (rv);
1470 }
1471 out->cd_offset += pt_len;
1472 gcm_clear_ctx(ctx);
1473 return (CRYPTO_SUCCESS);
1474 }
1475
1476 /*
1477 * Initialize the GCM params H, Htabtle and the counter block. Save the
1478 * initial counter block.
1479 */
1480 static int
gcm_init_avx(gcm_ctx_t * ctx,unsigned char * iv,size_t iv_len,unsigned char * auth_data,size_t auth_data_len,size_t block_size)1481 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
1482 unsigned char *auth_data, size_t auth_data_len, size_t block_size)
1483 {
1484 uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1485 uint64_t *H = ctx->gcm_H;
1486 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1487 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
1488 uint8_t *datap = auth_data;
1489 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1490 size_t bleft;
1491
1492 ASSERT(block_size == GCM_BLOCK_LEN);
1493
1494 /* Init H (encrypt zero block) and create the initial counter block. */
1495 bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
1496 bzero(H, sizeof (ctx->gcm_H));
1497 kfpu_begin();
1498 aes_encrypt_intel(keysched, aes_rounds,
1499 (const uint32_t *)H, (uint32_t *)H);
1500
1501 gcm_init_htab_avx(ctx->gcm_Htable, H);
1502
1503 if (iv_len == 12) {
1504 bcopy(iv, cb, 12);
1505 cb[12] = 0;
1506 cb[13] = 0;
1507 cb[14] = 0;
1508 cb[15] = 1;
1509 /* We need the ICB later. */
1510 bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
1511 } else {
1512 /*
1513 * Most consumers use 12 byte IVs, so it's OK to use the
1514 * original routines for other IV sizes, just avoid nesting
1515 * kfpu_begin calls.
1516 */
1517 clear_fpu_regs();
1518 kfpu_end();
1519 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1520 aes_copy_block, aes_xor_block);
1521 kfpu_begin();
1522 }
1523
1524 /* Openssl post increments the counter, adjust for that. */
1525 gcm_incr_counter_block(ctx);
1526
1527 /* Ghash AAD in chunk_size blocks. */
1528 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1529 GHASH_AVX(ctx, datap, chunk_size);
1530 datap += chunk_size;
1531 clear_fpu_regs();
1532 kfpu_end();
1533 kfpu_begin();
1534 }
1535 /* Ghash the remainder and handle possible incomplete GCM block. */
1536 if (bleft > 0) {
1537 size_t incomp = bleft % block_size;
1538
1539 bleft -= incomp;
1540 if (bleft > 0) {
1541 GHASH_AVX(ctx, datap, bleft);
1542 datap += bleft;
1543 }
1544 if (incomp > 0) {
1545 /* Zero pad and hash incomplete last block. */
1546 uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1547
1548 bzero(authp, block_size);
1549 bcopy(datap, authp, incomp);
1550 GHASH_AVX(ctx, authp, block_size);
1551 }
1552 }
1553 clear_fpu_regs();
1554 kfpu_end();
1555 return (CRYPTO_SUCCESS);
1556 }
1557
1558 #if defined(_KERNEL)
1559 static int
icp_gcm_avx_set_chunk_size(const char * buf,zfs_kernel_param_t * kp)1560 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1561 {
1562 unsigned long val;
1563 char val_rounded[16];
1564 int error = 0;
1565
1566 error = kstrtoul(buf, 0, &val);
1567 if (error)
1568 return (error);
1569
1570 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1571
1572 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1573 return (-EINVAL);
1574
1575 snprintf(val_rounded, 16, "%u", (uint32_t)val);
1576 error = param_set_uint(val_rounded, kp);
1577 return (error);
1578 }
1579
1580 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1581 param_get_uint, &gcm_avx_chunk_size, 0644);
1582
1583 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1584 "How many bytes to process while owning the FPU");
1585
1586 #endif /* defined(__KERNEL) */
1587 #endif /* ifdef CAN_USE_GCM_ASM */
1588