9 NAMESPACE_BEGIN(CryptoPP)
11 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
15 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
17 #define const // Turbo C++ 2006 workaround
19 static const word64 p64 = W64LIT(0xfffffffffffffeff);
20 static const word64 m62 = W64LIT(0x3fffffffffffffff);
21 static const word64 m63 = W64LIT(0x7fffffffffffffff);
22 static const word64 m64 = W64LIT(0xffffffffffffffff);
23 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);
30 #define m126 ((word128(m62)<<64)|m64)
32 static const word128 m126 = (word128(m62)<<64)|m64;
36 void VMAC_Base::UncheckedSetKey(
const byte *userKey,
unsigned int keylength,
const NameValuePairs ¶ms)
39 if (digestLength != 8 && digestLength != 16)
41 m_is128 = digestLength == 16;
44 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
45 throw InvalidArgument(
"VMAC: L1KeyLength must be a positive multiple of 128");
50 cipher.
SetKey(userKey, keylength, params);
51 unsigned int blockSize = cipher.
BlockSize();
52 unsigned int blockSizeInWords = blockSize /
sizeof(word64);
60 cipher.
AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*
sizeof(word64), cipher.BT_InBlockIsCounter);
61 ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*
sizeof(word64));
66 for (i = 0; i <= (size_t)m_is128; i++)
69 m_polyState()[i*4+2] = GetWord<word64>(
true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
70 m_polyState()[i*4+3] = GetWord<word64>(
true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
77 word64 *l3Key = m_l3Key();
78 for (i = 0; i <= (size_t)m_is128; i++)
82 l3Key[i*2+0] = GetWord<word64>(
true, BIG_ENDIAN_ORDER, out.BytePtr());
83 l3Key[i*2+1] = GetWord<word64>(
true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
85 }
while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
89 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
101 size_t length = ThrowIfInvalidIVLength(len);
103 byte *storedNonce = m_nonce();
107 memset(storedNonce, 0, s-length);
108 memcpy(storedNonce+s-length, nonce, length);
113 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
115 m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
116 for (
size_t i=0; m_padCached && i<s-length; i++)
117 m_padCached = (storedNonce[i] == 0);
121 memset(storedNonce, 0, s-length);
122 memcpy(storedNonce+s-length, nonce, length-1);
123 storedNonce[s-1] = nonce[length-1] & 0xfe;
127 storedNonce[s-1] = nonce[length-1];
129 m_isFirstBlock =
true;
133 void VMAC_Base::HashEndianCorrectedBlock(
const word64 *data)
139 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
140 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
143 __attribute__ ((noinline))
145 VMAC_Base::VHASH_Update_SSE2(
const word64 *data,
size_t blocksRemainingInWord64,
int tagPart)
147 const word64 *nhK = m_nhKey();
148 word64 *polyS = m_polyState();
157 ".intel_syntax noprefix;"
159 #
if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
160 char isFirstBlock = m_isFirstBlock;
161 AS2( mov ebx, [L1KeyLength])
162 AS2( mov dl, [isFirstBlock])
165 AS2( mov ebx, [ecx+m_L1KeyLength])
166 AS2( mov dl, [ecx+m_isFirstBlock])
168 AS2( mov eax, tagPart)
176 AS2( mov ecx, blocksRemainingInWord64)
187 AS2( lea ebp, [edi+8*ebp])
188 AS2( movq mm6, [esi])
189 AS2( paddq mm6, [edi])
190 AS2( movq mm5, [esi+8])
191 AS2( paddq mm5, [edi+8])
195 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
196 AS2( pmuludq mm6, mm5)
197 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
198 AS2( pmuludq mm5, mm2)
199 AS2( pmuludq mm2, mm3)
200 AS2( pmuludq mm3, mm4)
202 AS2( movd [esp], mm6)
204 AS2( movd [esp+4], mm5)
209 AS2( movq mm0, [esi])
210 AS2( paddq mm0, [edi])
211 AS2( movq mm1, [esi+8])
212 AS2( paddq mm1, [edi+8])
217 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
218 AS2( pmuludq mm0, mm1)
219 AS2( movd [esp+8], mm3)
222 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
223 AS2( pmuludq mm1, mm2)
224 AS2( pmuludq mm2, mm3)
225 AS2( pmuludq mm3, mm4)
226 AS2( movd mm4, [esp])
228 AS2( movd mm4, [esp+4])
230 AS2( movd mm4, [esp+8])
232 AS2( movd [esp], mm0)
235 AS2( movd [esp+4], mm1)
242 AS2( movd [esp+8], mm3)
245 AS2( movd mm4, [esp])
247 AS2( movd mm4, [esp+4])
249 AS2( movd mm4, [esp+8])
251 AS2( lea ebp, [8*ebx])
254 AS2( movd [esp], mm7)
257 AS2( movd [esp+4], mm6)
267 #define k0 [eax+2*8+2*4]
268 #define k1 [eax+2*8+3*4]
269 #define k2 [eax+2*8+0*4]
270 #define k3 [eax+2*8+1*4]
274 AS2( movd mm0, [esp])
279 AS2( movd mm2, [esp+4])
292 AS2( pmuludq mm0, k3)
294 AS2( pmuludq mm1, k2)
297 AS2( pmuludq mm2, mm6)
303 AS2( pmuludq mm3, mm7)
304 AS2( pmuludq mm4, mm7)
305 AS2( pmuludq mm5, mm6)
310 AS2( pmuludq mm1, k2)
315 AS2( pmuludq mm2, k3)
316 AS2( pmuludq mm3, mm7)
317 AS2( movd [esp+8], mm0)
319 AS2( pmuludq mm7, mm5)
320 AS2( pmuludq mm5, k3)
323 AS2( pmuludq mm1, k2)
328 AS2( pmuludq mm2, mm6)
329 AS2( pmuludq mm6, a0)
332 AS2( movd mm3, [esp])
335 AS2( pmuludq mm3, k3)
338 AS2( pmuludq mm1, k2)
340 AS2( movd mm2, [esp+4])
347 AS2( movd mm7, [esp+8])
376 ".att_syntax prefix;"
379 :
"m" (L1KeyLength),
"c" (blocksRemainingInWord64),
"S" (data),
"D" (nhK+tagPart*2),
"d" (m_isFirstBlock),
"a" (polyS+tagPart*4)
386 #if VMAC_BOOL_WORD128
387 #define DeclareNH(a) word128 a=0
388 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
389 #define AccumulateNH(a, b, c) a += word128(b)*(c)
390 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
392 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
393 #define MUL32(a, b) __emulu(word32(a), word32(b))
395 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
397 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
398 #define DeclareNH(a) word64 a##0=0, a##1=0
399 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
400 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
401 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
402 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
403 #define DeclareNH(a) word64 a##0=0, a##1=0
404 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
405 #define AccumulateNH(a, b, c) {\
407 pl = _umul128(b,c,&ph);\
409 a##1 += ph + (a##0 < pl);}
411 #define VMAC_BOOL_32BIT 1
412 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
413 #define MUL64(rh,rl,i1,i2) \
414 { word64 _i1 = (i1), _i2 = (i2); \
415 word64 m1= MUL32(_i1,_i2>>32); \
416 word64 m2= MUL32(_i1>>32,_i2); \
417 rh = MUL32(_i1>>32,_i2>>32); \
418 rl = MUL32(_i1,_i2); \
419 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
420 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
422 #define AccumulateNH(a, b, c) {\
423 word64 p = MUL32(b, c);\
424 a##1 += word32((p)>>32);\
426 p = MUL32((b)>>32, c);\
427 a##2 += word32((p)>>32);\
429 p = MUL32((b)>>32, (c)>>32);\
431 p = MUL32(b, (c)>>32);\
433 a##2 += word32(p>>32);}
436 #ifndef VMAC_BOOL_32BIT
437 #define VMAC_BOOL_32BIT 0
440 #define ADD128(rh,rl,ih,il) \
441 { word64 _il = (il); \
443 (rh) += (ih) + ((rl) < (_il)); \
447 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
448 template <
bool T_128BitTag>
450 void VMAC_Base::VHASH_Update_Template(
const word64 *data,
size_t blocksRemainingInWord64)
452 #define INNER_LOOP_ITERATION(j) {\
453 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
454 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
455 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
457 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
460 #if (defined(_MSC_VER) && _MSC_VER < 1300)
461 bool T_128BitTag = m_is128;
463 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
464 size_t innerLoopEnd = L1KeyLengthInWord64;
465 const word64 *nhK = m_nhKey();
466 word64 *polyS = m_polyState();
467 bool isFirstBlock =
true;
471 #if VMAC_BOOL_WORD128
474 word64 ah1, al1, ah2, al2;
476 word64 kh1, kl1, kh2, kl2;
477 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
480 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
490 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
492 if (blocksRemainingInWord64 % 8)
494 innerLoopEnd = blocksRemainingInWord64 % 8;
495 for (; i<innerLoopEnd; i+=2)
496 INNER_LOOP_ITERATION(0);
498 innerLoopEnd = blocksRemainingInWord64;
500 for (; i<innerLoopEnd; i+=8)
502 INNER_LOOP_ITERATION(0);
503 INNER_LOOP_ITERATION(1);
504 INNER_LOOP_ITERATION(2);
505 INNER_LOOP_ITERATION(3);
507 blocksRemainingInWord64 -= innerLoopEnd;
508 data += innerLoopEnd;
511 word32 nh0[2], nh1[2];
514 nh0[0] = word32(nhA0);
515 nhA1 += (nhA0 >> 32);
516 nh1[0] = word32(nhA1);
517 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
521 nh0[1] = word32(nhB0);
522 nhB1 += (nhB0 >> 32);
523 nh1[1] = word32(nhB1);
524 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
527 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
528 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
529 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
530 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
531 #define aHi ((polyS+i*4)[0])
532 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
533 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
534 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
535 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
536 #define kHi ((polyS+i*4+2)[0])
540 isFirstBlock =
false;
543 m_isFirstBlock =
false;
544 for (i=0; i<=(size_t)T_128BitTag; i++)
546 word64 t = (word64)nh0[i] + k0;
548 t = (t >> 32) + nh1[i] + k1;
550 aHi = (t >> 32) + nh2[i] + kHi;
555 for (i=0; i<=(size_t)T_128BitTag; i++)
571 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
575 p += MUL32(a1, 2*k3);
576 p += MUL32(a2, 2*k2);
577 p += MUL32(a3, 2*k1);
583 p += MUL32(a2, 2*k3);
584 p += MUL32(a3, 2*k2);
600 #else // #if VMAC_BOOL_32BIT
603 isFirstBlock =
false;
606 m_isFirstBlock =
false;
607 #if VMAC_BOOL_WORD128
608 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
610 first_poly_step(a1, kh1, kl1, nhA);
612 first_poly_step(a2, kh2, kl2, nhB);
614 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
616 ADD128(mh, ml, kh, kl); \
619 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
621 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
627 #if VMAC_BOOL_WORD128
628 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
630 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
634 #if VMAC_BOOL_WORD128
635 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
637 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
643 #if VMAC_BOOL_WORD128
644 #define poly_step(a, kh, kl, m) \
645 { word128 t1, t2, t3, t4;\
646 Multiply128(t2, a>>64, kl);\
647 Multiply128(t3, a, kh);\
648 Multiply128(t1, a, kl);\
649 Multiply128(t4, a>>64, 2*kh);\
653 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
658 poly_step(a1, kh1, kl1, nhA);
660 poly_step(a2, kh2, kl2, nhB);
662 #define poly_step(ah, al, kh, kl, mh, ml) \
663 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
665 MUL64(t2h,t2l,ah,kl); \
666 MUL64(t3h,t3l,al,kh); \
667 MUL64(t1h,t1l,ah,2*kh); \
668 MUL64(ah,al,al,kl); \
670 ADD128(t2h,t2l,t3h,t3l); \
672 ADD128(ah,al,t1h,t1l); \
675 ADD128(t2h,ah,z,t2l); \
677 t2h += t2h + (ah >> 63); \
681 ADD128(ah,al,mh,ml); \
682 ADD128(ah,al,z,t2h); \
685 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
687 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
689 #endif // #if VMAC_BOOL_32BIT
690 }
while (blocksRemainingInWord64);
692 #if VMAC_BOOL_WORD128
693 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
696 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
698 #elif !VMAC_BOOL_32BIT
699 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
702 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
707 inline void VMAC_Base::VHASH_Update(
const word64 *data,
size_t blocksRemainingInWord64)
709 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
712 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
714 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
715 m_isFirstBlock =
false;
720 #if defined(_MSC_VER) && _MSC_VER < 1300
721 VHASH_Update_Template(data, blocksRemainingInWord64);
724 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
726 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
731 size_t VMAC_Base::HashMultipleBlocks(
const word64 *data,
size_t length)
733 size_t remaining = ModPowerOf2(length, m_L1KeyLength);
734 VHASH_Update(data, (length-remaining)/8);
738 static word64 L3Hash(
const word64 *input,
const word64 *l3Key,
size_t len)
740 word64 rh, rl, t, z=0;
741 word64 p1 = input[0], p2 = input[1];
742 word64 k1 = l3Key[0], k2 = l3Key[1];
747 ADD128(p1, p2, len, t);
749 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
750 ADD128(p1, p2, z, t);
756 t += (word32)t > 0xfffffffeU;
762 p1 += (0 - (p1 < k1)) & 257;
764 p2 += (0 - (p2 < k2)) & 257;
767 MUL64(rh, rl, p1, p2);
769 ADD128(t, rl, z, rh);
771 ADD128(t, rl, z, rh);
774 rl += (0 - (rl < t)) & 257;
775 rl += (0 - (rl > p64-1)) & 257;
781 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
785 memset(m_data()+len, 0, (0-len)%16);
786 VHASH_Update(DataBuf(), ((len+15)/16)*2);
789 else if (m_isFirstBlock)
792 m_polyState()[0] = m_polyState()[2];
793 m_polyState()[1] = m_polyState()[3];
796 m_polyState()[4] = m_polyState()[6];
797 m_polyState()[5] = m_polyState()[7];
804 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(
true, BIG_ENDIAN_ORDER, m_pad());
805 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(
true, BIG_ENDIAN_ORDER, m_pad()+8);
808 PutWord(
false, BIG_ENDIAN_ORDER, mac, t[0]);
809 PutWord(
false, BIG_ENDIAN_ORDER, mac+8, t[1]);
813 t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
814 t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
815 memcpy(mac, t, size);
820 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
821 t += GetWord<word64>(
true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
823 PutWord(
false, BIG_ENDIAN_ORDER, mac, t);
826 t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
827 memcpy(mac, &t, size);
const char * DigestSize()
int, in bytes
exception thrown when an invalid argument is detected
virtual void SetKey(const byte *key, size_t length, const NameValuePairs ¶ms=g_nullNameValuePairs)
set or reset the key of this object
void CleanNew(size_type newSize)
change size and set contents to 0
interface for random number generators
interface for one direction (encryption or decryption) of a block cipher
int GetIntValueWithDefault(const char *name, int defaultValue) const
get a named value with type int, with default
virtual void GetNextIV(RandomNumberGenerator &rng, byte *IV)
get a secure IV for the next message
void TruncatedFinal(byte *mac, size_t size)
truncated version of Final()
const char * L1KeyLength()
int, in bytes
void Resynchronize(const byte *nonce, int length=-1)
resynchronize with an IV. ivLength=-1 means use IVSize()
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
get a secure IV for the next message
interface for retrieving values given their names