7 #ifndef CRYPTOPP_GENERATE_X64_MASM
15 NAMESPACE_BEGIN(CryptoPP)
19 Serpent_KeySchedule(m_key, 24, userKey, keylen);
22 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer,
const byte *iv,
size_t length)
29 Block::Get(iv)(a)(b)(c)(d);
31 const word32 *k = m_key;
36 beforeS0(KX); beforeS0(S0); afterS0(LT);
37 afterS0(KX); afterS0(S1); afterS1(LT);
45 afterS1(KX); afterS1(S2); afterS2(LT);
46 afterS2(KX); afterS2(S3); afterS3(LT);
54 afterS3(KX); afterS3(S4); afterS4(LT);
55 afterS4(KX); afterS4(S5); afterS5(LT);
56 afterS5(KX); afterS5(S6); afterS6(LT);
57 afterS6(KX); afterS6(S7); afterS7(LT);
79 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1))))
80 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]);
81 m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7);
85 word32 s_sosemanukMulTables[512] = {
86 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
87 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836,
88 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E,
89 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6,
90 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE,
91 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF,
92 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7,
93 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F,
94 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67,
95 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D,
96 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5,
97 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D,
98 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855,
99 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04,
100 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C,
101 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794,
102 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC,
103 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9,
104 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1,
105 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079,
106 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31,
107 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60,
108 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328,
109 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0,
110 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8,
111 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52,
112 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A,
113 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2,
114 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A,
115 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB,
116 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193,
117 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B,
118 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03,
119 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021,
120 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69,
121 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1,
122 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9,
123 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8,
124 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0,
125 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38,
126 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370,
127 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A,
128 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2,
129 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A,
130 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042,
131 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313,
132 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B,
133 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83,
134 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB,
135 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE,
136 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6,
137 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E,
138 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626,
139 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577,
140 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F,
141 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7,
142 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF,
143 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645,
144 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D,
145 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5,
146 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D,
147 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC,
148 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984,
149 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C,
150 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514,
152 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
153 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
154 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
155 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
156 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
157 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
158 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
159 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
160 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
161 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
162 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
163 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
164 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
165 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
166 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
167 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
168 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
169 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
170 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
171 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
172 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
173 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
174 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
175 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
176 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
177 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
178 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
179 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
180 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
181 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
182 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
183 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
184 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
185 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
186 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
187 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
188 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
189 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
190 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
191 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
192 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
193 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
194 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
195 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
196 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
197 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
198 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
199 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
200 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
201 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
202 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
203 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
204 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
205 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
206 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
207 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
208 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
209 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
210 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
211 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
212 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
213 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
214 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
215 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB,
217 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
218 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
219 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
220 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
221 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
222 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
223 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
224 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
225 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
226 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
227 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
228 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
229 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
230 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
231 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
232 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
233 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
234 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
235 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
236 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
237 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
238 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
239 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
240 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
241 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
242 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
243 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
244 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
245 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
246 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
247 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
248 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
249 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
250 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
251 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
252 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
253 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
254 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
255 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
256 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
257 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
258 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
259 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
260 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
261 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
262 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
263 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
264 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
265 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
266 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
267 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
268 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
269 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
270 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
271 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
272 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
273 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
274 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
275 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
276 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
277 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
278 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
279 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
280 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
284 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
285 unsigned int SosemanukPolicy::GetAlignment()
const
287 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
288 #ifdef __INTEL_COMPILER
289 if (HasSSE2() && !IsP4())
296 return GetAlignmentOf<word32>();
299 unsigned int SosemanukPolicy::GetOptimalBlockSize()
const
301 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
302 #ifdef __INTEL_COMPILER
303 if (HasSSE2() && !IsP4())
307 return 4*BYTES_PER_ITERATION;
310 return BYTES_PER_ITERATION;
314 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
316 void Sosemanuk_OperateKeystream(
size_t iterationCount,
const byte *input, byte *output, word32 *state);
320 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
322 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
324 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
326 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
327 Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data());
331 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
332 #ifdef CRYPTOPP_GENERATE_X64_MASM
334 Sosemanuk_OperateKeystream PROC FRAME
337 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8)
338 save_xmm128 xmm6, 02f0h
339 save_xmm128 xmm7, 0300h
344 #ifdef __INTEL_COMPILER
345 if (HasSSE2() && !IsP4())
351 #if CRYPTOPP_BOOL_X64
356 ".intel_syntax noprefix;"
359 word32 *state = m_state;
360 AS2( mov WORD_REG(ax), state)
361 AS2( mov WORD_REG(di), output)
362 AS2( mov WORD_REG(dx), input)
363 AS2( mov WORD_REG(cx), iterationCount)
365 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
367 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64
368 #define SSE2_workspace %5
370 #define SSE2_workspace WORD_REG(sp)
373 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ]
374 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ]
375 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ]
376 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ]
377 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ]
378 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ]
379 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ]
380 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ
381 #define SSE2_uvStart SSE2_stateCopy + 12*4
383 #if CRYPTOPP_BOOL_X86
385 AS2( mov AS_REG_6, esp)
387 AS2( sub esp, 80*4*2+12*4+8*WORD_SZ)
388 AS2( mov [esp], AS_REG_6)
390 AS2( mov SSE2_output, WORD_REG(di))
391 AS2( mov SSE2_input, WORD_REG(dx))
392 AS2( mov SSE2_state, WORD_REG(ax))
394 AS2( mov SSE2_pMulTables, WORD_REG(si))
396 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)])
397 AS2( lea WORD_REG(si), [4*WORD_REG(cx)])
398 AS2( mov SSE2_wordsLeft, WORD_REG(si))
399 AS2( movdqa xmm0, [WORD_REG(ax)+0*16])
400 AS2( movdqa [SSE2_stateCopy+0*16], xmm0)
401 AS2( movdqa xmm0, [WORD_REG(ax)+1*16])
402 AS2( movdqa [SSE2_stateCopy+1*16], xmm0)
403 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16])
404 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0)
406 AS2( movd AS_REG_6d, xmm0)
407 AS2( mov ecx, [WORD_REG(ax)+10*4])
408 AS2( mov edx, [WORD_REG(ax)+11*4])
409 AS2( pcmpeqb xmm7, xmm7)
411 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4
412 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4
413 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4
420 #define R20r WORD_REG(dx)
421 #define R21r WORD_REG(cx)
423 #define SSE2_STEP(i, j) \
424 AS2( mov eax, [s(i+0)])\
425 AS2( mov [v(i)], eax)\
427 AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\
428 AS2( xor AS_REG_7d, R1##j)\
429 AS2( mov [u(i)], AS_REG_7d)\
430 AS2( mov AS_REG_7d, 1)\
431 AS2( and AS_REG_7d, R2##j)\
433 AS2( and AS_REG_7d, AS_REG_6d)\
434 AS2( xor AS_REG_6d, eax)\
436 AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\
437 AS2( mov eax, [s(i+3)])\
438 AS2( xor AS_REG_7d, [s(i+2)])\
439 AS2( add R1##j, AS_REG_7d)\
440 AS2( movzx AS_REG_7d, al)\
442 AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\
443 AS2( xor AS_REG_6d, eax)\
444 AS2( imul R2##j, AS_HEX(54655307))\
446 AS2( mov [s(i+0)], AS_REG_6d)\
449 AS2( lea WORD_REG(di), [SSE2_uvStart])
450 AS2( mov WORD_REG(ax), 80)
451 AS2( cmp WORD_REG(si), 80)
452 AS2( cmovg WORD_REG(si), WORD_REG(ax))
453 AS2( mov SSE2_wordsLeft2, WORD_REG(si))
454 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)])
455 AS2( mov SSE2_diEnd, WORD_REG(si))
457 AS2( lea WORD_REG(si), s_sosemanukMulTables)
459 AS2( mov WORD_REG(si), SSE2_pMulTables)
484 AS2( add WORD_REG(di), 5*4)
485 AS2( cmp WORD_REG(di), SSE2_diEnd)
488 AS2( mov WORD_REG(ax), SSE2_input)
489 AS2( mov AS_REG_7, SSE2_output)
490 AS2( lea WORD_REG(di), [SSE2_uvStart])
491 AS2( mov WORD_REG(si), SSE2_wordsLeft2)
494 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4])
495 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4])
496 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4])
497 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4])
499 AS2( movdqa xmm4, xmm0)
500 AS2( pand xmm0, xmm2)
501 AS2( pxor xmm0, xmm3)
502 AS2( pxor xmm2, xmm1)
503 AS2( pxor xmm2, xmm0)
505 AS2( pxor xmm3, xmm1)
506 AS2( pxor xmm4, xmm2)
507 AS2( movdqa xmm1, xmm3)
509 AS2( pxor xmm3, xmm0)
510 AS2( pand xmm0, xmm1)
511 AS2( pxor xmm4, xmm0)
512 AS2( pxor xmm1, xmm3)
513 AS2( pxor xmm1, xmm4)
514 AS2( pxor xmm4, xmm7)
516 AS2( pxor xmm2, [WORD_REG(di)+80*4])
517 AS2( pxor xmm3, [WORD_REG(di)+80*5])
518 AS2( pxor xmm1, [WORD_REG(di)+80*6])
519 AS2( pxor xmm4, [WORD_REG(di)+80*7])
522 AS2( cmp WORD_REG(si), 16)
525 AS2( movdqa xmm6, xmm2)
526 AS2( punpckldq xmm2, xmm3)
527 AS2( movdqa xmm5, xmm1)
528 AS2( punpckldq xmm1, xmm4)
529 AS2( movdqa xmm0, xmm2)
530 AS2( punpcklqdq xmm2, xmm1)
531 AS2( punpckhqdq xmm0, xmm1)
532 AS2( punpckhdq xmm6, xmm3)
533 AS2( punpckhdq xmm5, xmm4)
534 AS2( movdqa xmm3, xmm6)
535 AS2( punpcklqdq xmm6, xmm5)
536 AS2( punpckhqdq xmm3, xmm5)
538 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4)
541 AS2( add WORD_REG(di), 4*4)
542 AS2( sub WORD_REG(si), 16)
546 AS2( mov WORD_REG(si), SSE2_wordsLeft)
547 AS2( sub WORD_REG(si), 80)
549 AS2( mov SSE2_wordsLeft, WORD_REG(si))
550 AS2( mov SSE2_input, WORD_REG(ax))
551 AS2( mov SSE2_output, AS_REG_7)
555 AS2( test WORD_REG(ax), WORD_REG(ax))
557 AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4])
558 AS2( pxor xmm2, xmm0)
559 AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4])
560 AS2( pxor xmm3, xmm0)
561 AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4])
562 AS2( pxor xmm1, xmm0)
563 AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4])
564 AS2( pxor xmm4, xmm0)
565 AS2( add WORD_REG(ax), 16)
567 AS2( movd dword ptr [AS_REG_7+0*4], xmm2)
568 AS2( movd dword ptr [AS_REG_7+1*4], xmm3)
569 AS2( movd dword ptr [AS_REG_7+2*4], xmm1)
570 AS2( movd dword ptr [AS_REG_7+3*4], xmm4)
571 AS2( sub WORD_REG(si), 4)
573 AS2( add AS_REG_7, 16)
581 AS2( mov AS_REG_6, SSE2_state)
582 AS2( movdqa xmm0, [SSE2_stateCopy+0*16])
583 AS2( movdqa [AS_REG_6+0*16], xmm0)
584 AS2( movdqa xmm0, [SSE2_stateCopy+1*16])
585 AS2( movdqa [AS_REG_6+1*16], xmm0)
586 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16])
587 AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0)
588 AS2( mov [AS_REG_6+10*4], ecx)
589 AS2( mov [AS_REG_6+11*4], edx)
596 ".att_syntax prefix;"
598 :
"a" (m_state.m_ptr),
"c" (iterationCount),
"S" (s_sosemanukMulTables),
"D" (output),
"d" (input)
599 #
if CRYPTOPP_BOOL_X64
600 ,
"r" (workspace.m_ptr)
601 :
"memory",
"cc",
"%r9",
"%r10",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7"
607 #ifdef CRYPTOPP_GENERATE_X64_MASM
608 movdqa xmm6, [rsp + 02f0h]
609 movdqa xmm7, [rsp + 0300h]
610 add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8
614 Sosemanuk_OperateKeystream ENDP
620 #ifndef CRYPTOPP_GENERATE_X64_MASM
622 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64
623 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)])
625 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24])
628 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)])
630 #define r1(i) ((i%2) ? reg2 : reg1)
631 #define r2(i) ((i%2) ? reg1 : reg2)
633 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \
634 u = (s##x9 + r2(x0)) ^ r1(x0);\
636 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\
637 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\
638 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\
640 #define SOSEMANUK_OUTPUT(x) \
641 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\
642 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\
643 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\
644 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3);
647 S2(0, u0, u1, u2, u3, u4);\
648 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4);
650 word32 s0 = m_state[0];
651 word32 s1 = m_state[1];
652 word32 s2 = m_state[2];
653 word32 s3 = m_state[3];
654 word32 s4 = m_state[4];
655 word32 s5 = m_state[5];
656 word32 s6 = m_state[6];
657 word32 s7 = m_state[7];
658 word32 s8 = m_state[8];
659 word32 s9 = m_state[9];
660 word32 reg1 = m_state[10];
661 word32 reg2 = m_state[11];
662 word32 u0, u1, u2, u3, u4, v0, v1, v2, v3;
666 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0)
667 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1)
668 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2)
669 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3)
671 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0)
672 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1)
673 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2)
674 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3)
676 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0)
677 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1)
678 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2)
679 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3)
681 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0)
682 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1)
683 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2)
684 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3)
686 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0)
687 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1)
688 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2)
689 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3)
692 while (--iterationCount);
711 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
interface for retrieving values given their names