1 // SPDX-License-Identifier: BSD-2-Clause
2 /* LibTomCrypt, modular cryptographic library -- Tom St Denis
3 *
4 * LibTomCrypt is a library that provides various cryptographic
5 * algorithms in a highly modular and flexible manner.
6 *
7 * The library is free for all purposes without any express
8 * guarantee it works.
9 */
10
11 /*
12 * This LTC implementation was adapted from:
13 * http://www.ecrypt.eu.org/stream/e2-sosemanuk.html
14 */
15
16 /*
17 * SOSEMANUK reference implementation.
18 *
19 * This code is supposed to run on any conforming C implementation (C90
20 * or later).
21 *
22 * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
23 * any express or implied warranty. In no event will the authors be held
24 * liable for any damages arising from the use of this software.
25 *
26 * Permission is granted to anyone to use this software for any purpose,
27 * including commercial applications, and to alter it and redistribute it
28 * freely, subject to no restriction.
29 *
30 * Technical remarks and questions can be addressed to
31 * <thomas.pornin@cryptolog.com>
32 */
33
34 #include "tomcrypt_private.h"
35
36 #ifdef LTC_SOSEMANUK
37
38 /* ======================================================================== */
39
40 /*
41 * We want (and sometimes need) to perform explicit truncations to 32 bits.
42 */
43 #define T32(x) ((x) & (ulong32)0xFFFFFFFF)
44
45 /*
46 * Some of our functions will be tagged as "inline" to help the compiler
47 * optimize things. We use "inline" only if the compiler is advanced
48 * enough to understand it; C99 compilers, and pre-C99 versions of gcc,
49 * understand enough "inline" for our purposes.
50 */
51
52 /* ======================================================================== */
53
54 /*
55 * Serpent S-boxes, implemented in bitslice mode. These circuits have
56 * been published by Dag Arne Osvik ("Speeding up Serpent", published in
57 * the 3rd AES Candidate Conference) and work on five 32-bit registers:
58 * the four inputs, and a fifth scratch register. There are meant to be
59 * quite fast on Pentium-class processors. These are not the fastest
60 * published, but they are "fast enough" and they are unencumbered as
61 * far as intellectual property is concerned (note: these are rewritten
62 * from the article itself, and hence are not covered by the GPL on
63 * Dag's code, which was not used here).
64 *
65 * The output bits are permuted. Here is the correspondance:
66 * S0: 1420
67 * S1: 2031
68 * S2: 2314
69 * S3: 1234
70 * S4: 1403
71 * S5: 1302
72 * S6: 0142
73 * S7: 4310
74 * (for instance, the output of S0 is in "r1, r4, r2, r0").
75 */
76
77 #define S0(r0, r1, r2, r3, r4) do { \
78 r3 ^= r0; r4 = r1; \
79 r1 &= r3; r4 ^= r2; \
80 r1 ^= r0; r0 |= r3; \
81 r0 ^= r4; r4 ^= r3; \
82 r3 ^= r2; r2 |= r1; \
83 r2 ^= r4; r4 = ~r4; \
84 r4 |= r1; r1 ^= r3; \
85 r1 ^= r4; r3 |= r0; \
86 r1 ^= r3; r4 ^= r3; \
87 } while (0)
88
89 #define S1(r0, r1, r2, r3, r4) do { \
90 r0 = ~r0; r2 = ~r2; \
91 r4 = r0; r0 &= r1; \
92 r2 ^= r0; r0 |= r3; \
93 r3 ^= r2; r1 ^= r0; \
94 r0 ^= r4; r4 |= r1; \
95 r1 ^= r3; r2 |= r0; \
96 r2 &= r4; r0 ^= r1; \
97 r1 &= r2; \
98 r1 ^= r0; r0 &= r2; \
99 r0 ^= r4; \
100 } while (0)
101
102 #define S2(r0, r1, r2, r3, r4) do { \
103 r4 = r0; r0 &= r2; \
104 r0 ^= r3; r2 ^= r1; \
105 r2 ^= r0; r3 |= r4; \
106 r3 ^= r1; r4 ^= r2; \
107 r1 = r3; r3 |= r4; \
108 r3 ^= r0; r0 &= r1; \
109 r4 ^= r0; r1 ^= r3; \
110 r1 ^= r4; r4 = ~r4; \
111 } while (0)
112
113 #define S3(r0, r1, r2, r3, r4) do { \
114 r4 = r0; r0 |= r3; \
115 r3 ^= r1; r1 &= r4; \
116 r4 ^= r2; r2 ^= r3; \
117 r3 &= r0; r4 |= r1; \
118 r3 ^= r4; r0 ^= r1; \
119 r4 &= r0; r1 ^= r3; \
120 r4 ^= r2; r1 |= r0; \
121 r1 ^= r2; r0 ^= r3; \
122 r2 = r1; r1 |= r3; \
123 r1 ^= r0; \
124 } while (0)
125
126 #define S4(r0, r1, r2, r3, r4) do { \
127 r1 ^= r3; r3 = ~r3; \
128 r2 ^= r3; r3 ^= r0; \
129 r4 = r1; r1 &= r3; \
130 r1 ^= r2; r4 ^= r3; \
131 r0 ^= r4; r2 &= r4; \
132 r2 ^= r0; r0 &= r1; \
133 r3 ^= r0; r4 |= r1; \
134 r4 ^= r0; r0 |= r3; \
135 r0 ^= r2; r2 &= r3; \
136 r0 = ~r0; r4 ^= r2; \
137 } while (0)
138
139 #define S5(r0, r1, r2, r3, r4) do { \
140 r0 ^= r1; r1 ^= r3; \
141 r3 = ~r3; r4 = r1; \
142 r1 &= r0; r2 ^= r3; \
143 r1 ^= r2; r2 |= r4; \
144 r4 ^= r3; r3 &= r1; \
145 r3 ^= r0; r4 ^= r1; \
146 r4 ^= r2; r2 ^= r0; \
147 r0 &= r3; r2 = ~r2; \
148 r0 ^= r4; r4 |= r3; \
149 r2 ^= r4; \
150 } while (0)
151
152 #define S6(r0, r1, r2, r3, r4) do { \
153 r2 = ~r2; r4 = r3; \
154 r3 &= r0; r0 ^= r4; \
155 r3 ^= r2; r2 |= r4; \
156 r1 ^= r3; r2 ^= r0; \
157 r0 |= r1; r2 ^= r1; \
158 r4 ^= r0; r0 |= r3; \
159 r0 ^= r2; r4 ^= r3; \
160 r4 ^= r0; r3 = ~r3; \
161 r2 &= r4; \
162 r2 ^= r3; \
163 } while (0)
164
165 #define S7(r0, r1, r2, r3, r4) do { \
166 r4 = r1; r1 |= r2; \
167 r1 ^= r3; r4 ^= r2; \
168 r2 ^= r1; r3 |= r4; \
169 r3 &= r0; r4 ^= r2; \
170 r3 ^= r1; r1 |= r4; \
171 r1 ^= r0; r0 |= r4; \
172 r0 ^= r2; r1 ^= r4; \
173 r2 ^= r1; r1 &= r0; \
174 r1 ^= r4; r2 = ~r2; \
175 r2 |= r0; \
176 r4 ^= r2; \
177 } while (0)
178
179 /*
180 * The Serpent linear transform.
181 */
182 #define SERPENT_LT(x0, x1, x2, x3) do { \
183 x0 = ROLc(x0, 13); \
184 x2 = ROLc(x2, 3); \
185 x1 = x1 ^ x0 ^ x2; \
186 x3 = x3 ^ x2 ^ T32(x0 << 3); \
187 x1 = ROLc(x1, 1); \
188 x3 = ROLc(x3, 7); \
189 x0 = x0 ^ x1 ^ x3; \
190 x2 = x2 ^ x3 ^ T32(x1 << 7); \
191 x0 = ROLc(x0, 5); \
192 x2 = ROLc(x2, 22); \
193 } while (0)
194
195 /* ======================================================================== */
196
197 /*
198 * Initialize Sosemanuk's state by providing a key. The key is an array of
199 * 1 to 32 bytes.
200 * @param st The Sosemanuk state
201 * @param key Key
202 * @param keylen Length of key in bytes
203 * @return CRYPT_OK on success
204 */
sosemanuk_setup(sosemanuk_state * st,const unsigned char * key,unsigned long keylen)205 int sosemanuk_setup(sosemanuk_state *st, const unsigned char *key, unsigned long keylen)
206 {
207 /*
208 * This key schedule is actually a truncated Serpent key schedule.
209 * The key-derived words (w_i) are computed within the eight
210 * local variables w0 to w7, which are reused again and again.
211 */
212
213 #define SKS(S, o0, o1, o2, o3, d0, d1, d2, d3) do { \
214 ulong32 r0, r1, r2, r3, r4; \
215 r0 = w ## o0; \
216 r1 = w ## o1; \
217 r2 = w ## o2; \
218 r3 = w ## o3; \
219 S(r0, r1, r2, r3, r4); \
220 st->kc[i ++] = r ## d0; \
221 st->kc[i ++] = r ## d1; \
222 st->kc[i ++] = r ## d2; \
223 st->kc[i ++] = r ## d3; \
224 } while (0)
225
226 #define SKS0 SKS(S0, 4, 5, 6, 7, 1, 4, 2, 0)
227 #define SKS1 SKS(S1, 0, 1, 2, 3, 2, 0, 3, 1)
228 #define SKS2 SKS(S2, 4, 5, 6, 7, 2, 3, 1, 4)
229 #define SKS3 SKS(S3, 0, 1, 2, 3, 1, 2, 3, 4)
230 #define SKS4 SKS(S4, 4, 5, 6, 7, 1, 4, 0, 3)
231 #define SKS5 SKS(S5, 0, 1, 2, 3, 1, 3, 0, 2)
232 #define SKS6 SKS(S6, 4, 5, 6, 7, 0, 1, 4, 2)
233 #define SKS7 SKS(S7, 0, 1, 2, 3, 4, 3, 1, 0)
234
235 #define WUP(wi, wi5, wi3, wi1, cc) do { \
236 ulong32 tt = (wi) ^ (wi5) ^ (wi3) \
237 ^ (wi1) ^ (0x9E3779B9 ^ (ulong32)(cc)); \
238 (wi) = ROLc(tt, 11); \
239 } while (0)
240
241 #define WUP0(cc) do { \
242 WUP(w0, w3, w5, w7, cc); \
243 WUP(w1, w4, w6, w0, cc + 1); \
244 WUP(w2, w5, w7, w1, cc + 2); \
245 WUP(w3, w6, w0, w2, cc + 3); \
246 } while (0)
247
248 #define WUP1(cc) do { \
249 WUP(w4, w7, w1, w3, cc); \
250 WUP(w5, w0, w2, w4, cc + 1); \
251 WUP(w6, w1, w3, w5, cc + 2); \
252 WUP(w7, w2, w4, w6, cc + 3); \
253 } while (0)
254
255 unsigned char wbuf[32];
256 ulong32 w0, w1, w2, w3, w4, w5, w6, w7;
257 int i = 0;
258
259 LTC_ARGCHK(st != NULL);
260 LTC_ARGCHK(key != NULL);
261 LTC_ARGCHK(keylen > 0 && keylen <= 32);
262
263 /*
264 * The key is copied into the wbuf[] buffer and padded to 256 bits
265 * as described in the Serpent specification.
266 */
267 XMEMCPY(wbuf, key, keylen);
268 if (keylen < 32) {
269 wbuf[keylen] = 0x01;
270 if (keylen < 31) {
271 XMEMSET(wbuf + keylen + 1, 0, 31 - keylen);
272 }
273 }
274
275 LOAD32L(w0, wbuf);
276 LOAD32L(w1, wbuf + 4);
277 LOAD32L(w2, wbuf + 8);
278 LOAD32L(w3, wbuf + 12);
279 LOAD32L(w4, wbuf + 16);
280 LOAD32L(w5, wbuf + 20);
281 LOAD32L(w6, wbuf + 24);
282 LOAD32L(w7, wbuf + 28);
283
284 WUP0(0); SKS3;
285 WUP1(4); SKS2;
286 WUP0(8); SKS1;
287 WUP1(12); SKS0;
288 WUP0(16); SKS7;
289 WUP1(20); SKS6;
290 WUP0(24); SKS5;
291 WUP1(28); SKS4;
292 WUP0(32); SKS3;
293 WUP1(36); SKS2;
294 WUP0(40); SKS1;
295 WUP1(44); SKS0;
296 WUP0(48); SKS7;
297 WUP1(52); SKS6;
298 WUP0(56); SKS5;
299 WUP1(60); SKS4;
300 WUP0(64); SKS3;
301 WUP1(68); SKS2;
302 WUP0(72); SKS1;
303 WUP1(76); SKS0;
304 WUP0(80); SKS7;
305 WUP1(84); SKS6;
306 WUP0(88); SKS5;
307 WUP1(92); SKS4;
308 WUP0(96); SKS3;
309
310 #undef SKS
311 #undef SKS0
312 #undef SKS1
313 #undef SKS2
314 #undef SKS3
315 #undef SKS4
316 #undef SKS5
317 #undef SKS6
318 #undef SKS7
319 #undef WUP
320 #undef WUP0
321 #undef WUP1
322
323 return CRYPT_OK;
324 }
325
326
327 /*
328 * Initialization continues by setting the IV. The IV length is up to 16 bytes.
329 * If "ivlen" is 0 (no IV), then the "iv" parameter can be NULL. If multiple
330 * encryptions/decryptions are to be performed with the same key and
331 * sosemanuk_done() has not been called, only sosemanuk_setiv() need be called
332 * to set the state.
333 * @param st The Sosemanuk state
334 * @param iv Initialization vector
335 * @param ivlen Length of iv in bytes
336 * @return CRYPT_OK on success
337 */
sosemanuk_setiv(sosemanuk_state * st,const unsigned char * iv,unsigned long ivlen)338 int sosemanuk_setiv(sosemanuk_state *st, const unsigned char *iv, unsigned long ivlen)
339 {
340
341 /*
342 * The Serpent key addition step.
343 */
344 #define KA(zc, x0, x1, x2, x3) do { \
345 x0 ^= st->kc[(zc)]; \
346 x1 ^= st->kc[(zc) + 1]; \
347 x2 ^= st->kc[(zc) + 2]; \
348 x3 ^= st->kc[(zc) + 3]; \
349 } while (0)
350
351 /*
352 * One Serpent round.
353 * zc = current subkey counter
354 * S = S-box macro for this round
355 * i0 to i4 = input register numbers (the fifth is a scratch register)
356 * o0 to o3 = output register numbers
357 */
358 #define FSS(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3) do { \
359 KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
360 S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
361 SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
362 } while (0)
363
364 /*
365 * Last Serpent round. Contrary to the "true" Serpent, we keep
366 * the linear transformation for that last round.
367 */
368 #define FSF(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3) do { \
369 KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
370 S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
371 SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
372 KA(zc + 4, r ## o0, r ## o1, r ## o2, r ## o3); \
373 } while (0)
374
375 ulong32 r0, r1, r2, r3, r4;
376 unsigned char ivtmp[16] = {0};
377
378 LTC_ARGCHK(st != NULL);
379 LTC_ARGCHK(ivlen <= 16);
380 LTC_ARGCHK(iv != NULL || ivlen == 0);
381
382 if (ivlen > 0) XMEMCPY(ivtmp, iv, ivlen);
383
384 /*
385 * Decode IV into four 32-bit words (little-endian).
386 */
387 LOAD32L(r0, ivtmp);
388 LOAD32L(r1, ivtmp + 4);
389 LOAD32L(r2, ivtmp + 8);
390 LOAD32L(r3, ivtmp + 12);
391
392 /*
393 * Encrypt IV with Serpent24. Some values are extracted from the
394 * output of the twelfth, eighteenth and twenty-fourth rounds.
395 */
396 FSS(0, S0, 0, 1, 2, 3, 4, 1, 4, 2, 0);
397 FSS(4, S1, 1, 4, 2, 0, 3, 2, 1, 0, 4);
398 FSS(8, S2, 2, 1, 0, 4, 3, 0, 4, 1, 3);
399 FSS(12, S3, 0, 4, 1, 3, 2, 4, 1, 3, 2);
400 FSS(16, S4, 4, 1, 3, 2, 0, 1, 0, 4, 2);
401 FSS(20, S5, 1, 0, 4, 2, 3, 0, 2, 1, 4);
402 FSS(24, S6, 0, 2, 1, 4, 3, 0, 2, 3, 1);
403 FSS(28, S7, 0, 2, 3, 1, 4, 4, 1, 2, 0);
404 FSS(32, S0, 4, 1, 2, 0, 3, 1, 3, 2, 4);
405 FSS(36, S1, 1, 3, 2, 4, 0, 2, 1, 4, 3);
406 FSS(40, S2, 2, 1, 4, 3, 0, 4, 3, 1, 0);
407 FSS(44, S3, 4, 3, 1, 0, 2, 3, 1, 0, 2);
408 st->s09 = r3;
409 st->s08 = r1;
410 st->s07 = r0;
411 st->s06 = r2;
412
413 FSS(48, S4, 3, 1, 0, 2, 4, 1, 4, 3, 2);
414 FSS(52, S5, 1, 4, 3, 2, 0, 4, 2, 1, 3);
415 FSS(56, S6, 4, 2, 1, 3, 0, 4, 2, 0, 1);
416 FSS(60, S7, 4, 2, 0, 1, 3, 3, 1, 2, 4);
417 FSS(64, S0, 3, 1, 2, 4, 0, 1, 0, 2, 3);
418 FSS(68, S1, 1, 0, 2, 3, 4, 2, 1, 3, 0);
419 st->r1 = r2;
420 st->s04 = r1;
421 st->r2 = r3;
422 st->s05 = r0;
423
424 FSS(72, S2, 2, 1, 3, 0, 4, 3, 0, 1, 4);
425 FSS(76, S3, 3, 0, 1, 4, 2, 0, 1, 4, 2);
426 FSS(80, S4, 0, 1, 4, 2, 3, 1, 3, 0, 2);
427 FSS(84, S5, 1, 3, 0, 2, 4, 3, 2, 1, 0);
428 FSS(88, S6, 3, 2, 1, 0, 4, 3, 2, 4, 1);
429 FSF(92, S7, 3, 2, 4, 1, 0, 0, 1, 2, 3);
430 st->s03 = r0;
431 st->s02 = r1;
432 st->s01 = r2;
433 st->s00 = r3;
434
435 st->ptr = sizeof(st->buf);
436
437 #undef KA
438 #undef FSS
439 #undef FSF
440
441 return CRYPT_OK;
442 }
443
444 /*
445 * Multiplication by alpha: alpha * x = T32(x << 8) ^ mul_a[x >> 24]
446 */
447 static const ulong32 mul_a[] = {
448 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
449 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
450 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
451 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
452 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
453 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
454 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
455 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
456 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
457 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
458 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
459 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
460 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
461 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
462 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
463 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
464 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
465 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
466 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
467 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
468 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
469 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
470 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
471 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
472 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
473 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
474 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
475 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
476 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
477 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
478 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
479 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
480 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
481 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
482 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
483 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
484 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
485 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
486 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
487 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
488 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
489 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
490 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
491 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
492 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
493 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
494 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
495 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
496 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
497 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
498 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
499 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
500 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
501 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
502 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
503 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
504 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
505 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
506 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
507 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
508 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
509 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
510 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
511 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB
512 };
513
514 /*
515 * Multiplication by 1/alpha: 1/alpha * x = (x >> 8) ^ mul_ia[x & 0xFF]
516 */
517 static const ulong32 mul_ia[] = {
518 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
519 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
520 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
521 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
522 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
523 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
524 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
525 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
526 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
527 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
528 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
529 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
530 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
531 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
532 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
533 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
534 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
535 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
536 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
537 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
538 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
539 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
540 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
541 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
542 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
543 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
544 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
545 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
546 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
547 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
548 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
549 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
550 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
551 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
552 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
553 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
554 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
555 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
556 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
557 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
558 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
559 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
560 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
561 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
562 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
563 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
564 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
565 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
566 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
567 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
568 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
569 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
570 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
571 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
572 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
573 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
574 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
575 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
576 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
577 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
578 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
579 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
580 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
581 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
582 };
583
584
585 /*
586 * Compute the next block of bits of output stream. This is equivalent
587 * to one full rotation of the shift register.
588 */
_sosemanuk_internal(sosemanuk_state * st)589 static LTC_INLINE void _sosemanuk_internal(sosemanuk_state *st)
590 {
591 /*
592 * MUL_A(x) computes alpha * x (in F_{2^32}).
593 * MUL_G(x) computes 1/alpha * x (in F_{2^32}).
594 */
595 #define MUL_A(x) (T32((x) << 8) ^ mul_a[(x) >> 24])
596 #define MUL_G(x) (((x) >> 8) ^ mul_ia[(x) & 0xFF])
597
598 /*
599 * This macro computes the special multiplexer, which chooses
600 * between "x" and "x xor y", depending on the least significant
601 * bit of the control word. We use the C "?:" selection operator
602 * (which most compilers know how to optimise) except for Alpha,
603 * where the manual sign extension seems to perform equally well
604 * with DEC/Compaq/HP compiler, and much better with gcc.
605 */
606 #ifdef __alpha
607 #define XMUX(c, x, y) ((((signed int)((c) << 31) >> 31) & (y)) ^ (x))
608 #else
609 #define XMUX(c, x, y) (((c) & 0x1) ? ((x) ^ (y)) : (x))
610 #endif
611
612 /*
613 * FSM() updates the finite state machine.
614 */
615 #define FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9) do { \
616 ulong32 tt, or1; \
617 tt = XMUX(r1, s ## x1, s ## x8); \
618 or1 = r1; \
619 r1 = T32(r2 + tt); \
620 tt = T32(or1 * 0x54655307); \
621 r2 = ROLc(tt, 7); \
622 } while (0)
623
624 /*
625 * LRU updates the shift register; the dropped value is stored
626 * in variable "dd".
627 */
628 #define LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd) do { \
629 dd = s ## x0; \
630 s ## x0 = MUL_A(s ## x0) ^ MUL_G(s ## x3) ^ s ## x9; \
631 } while (0)
632
633 /*
634 * CC1 stores into variable "ee" the next intermediate word
635 * (combination of the new states of the LFSR and the FSM).
636 */
637 #define CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee) do { \
638 ee = T32(s ## x9 + r1) ^ r2; \
639 } while (0)
640
641 /*
642 * STEP computes one internal round. "dd" receives the "s_t"
643 * value (dropped from the LFSR) and "ee" gets the value computed
644 * from the LFSR and FSM.
645 */
646 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd, ee) do { \
647 FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); \
648 LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd); \
649 CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee); \
650 } while (0)
651
652 /*
653 * Apply one Serpent round (with the provided S-box macro), XOR
654 * the result with the "v" values, and encode the result into
655 * the destination buffer, at the provided offset. The "x*"
656 * arguments encode the output permutation of the "S" macro.
657 */
658 #define SRD(S, x0, x1, x2, x3, ooff) do { \
659 S(u0, u1, u2, u3, u4); \
660 STORE32L(u ## x0 ^ v0, st->buf + ooff); \
661 STORE32L(u ## x1 ^ v1, st->buf + ooff + 4); \
662 STORE32L(u ## x2 ^ v2, st->buf + ooff + 8); \
663 STORE32L(u ## x3 ^ v3, st->buf + ooff + 12); \
664 } while (0)
665
666 ulong32 s00 = st->s00;
667 ulong32 s01 = st->s01;
668 ulong32 s02 = st->s02;
669 ulong32 s03 = st->s03;
670 ulong32 s04 = st->s04;
671 ulong32 s05 = st->s05;
672 ulong32 s06 = st->s06;
673 ulong32 s07 = st->s07;
674 ulong32 s08 = st->s08;
675 ulong32 s09 = st->s09;
676 ulong32 r1 = st->r1;
677 ulong32 r2 = st->r2;
678 ulong32 u0, u1, u2, u3, u4;
679 ulong32 v0, v1, v2, v3;
680
681 STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v0, u0);
682 STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v1, u1);
683 STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v2, u2);
684 STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v3, u3);
685 SRD(S2, 2, 3, 1, 4, 0);
686 STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v0, u0);
687 STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v1, u1);
688 STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v2, u2);
689 STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v3, u3);
690 SRD(S2, 2, 3, 1, 4, 16);
691 STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v0, u0);
692 STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v1, u1);
693 STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v2, u2);
694 STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v3, u3);
695 SRD(S2, 2, 3, 1, 4, 32);
696 STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v0, u0);
697 STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v1, u1);
698 STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v2, u2);
699 STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v3, u3);
700 SRD(S2, 2, 3, 1, 4, 48);
701 STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v0, u0);
702 STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v1, u1);
703 STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v2, u2);
704 STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v3, u3);
705 SRD(S2, 2, 3, 1, 4, 64);
706
707 st->s00 = s00;
708 st->s01 = s01;
709 st->s02 = s02;
710 st->s03 = s03;
711 st->s04 = s04;
712 st->s05 = s05;
713 st->s06 = s06;
714 st->s07 = s07;
715 st->s08 = s08;
716 st->s09 = s09;
717 st->r1 = r1;
718 st->r2 = r2;
719 }
720
721 /*
722 * Combine buffers in1[] and in2[] by XOR, result in out[]. The length
723 * is "datalen" (in bytes). Partial overlap of out[] with either in1[]
724 * or in2[] is not allowed. Total overlap (out == in1 and/or out == in2)
725 * is allowed.
726 */
_xorbuf(const unsigned char * in1,const unsigned char * in2,unsigned char * out,unsigned long datalen)727 static LTC_INLINE void _xorbuf(const unsigned char *in1, const unsigned char *in2,
728 unsigned char *out, unsigned long datalen)
729 {
730 while (datalen -- > 0) {
731 *out ++ = *in1 ++ ^ *in2 ++;
732 }
733 }
734
735
736 /*
737 * Cipher operation, as a stream cipher: data is read from the "in"
738 * buffer, combined by XOR with the stream, and the result is written
739 * in the "out" buffer. "in" and "out" must be either equal, or
740 * reference distinct buffers (no partial overlap is allowed).
741 * @param st The Sosemanuk state
742 * @param in Data in
743 * @param inlen Length of data in bytes
744 * @param out Data out
745 * @return CRYPT_OK on success
746 */
sosemanuk_crypt(sosemanuk_state * st,const unsigned char * in,unsigned long inlen,unsigned char * out)747 int sosemanuk_crypt(sosemanuk_state *st,
748 const unsigned char *in, unsigned long inlen, unsigned char *out)
749 {
750 LTC_ARGCHK(st != NULL);
751 LTC_ARGCHK(in != NULL);
752 LTC_ARGCHK(out != NULL);
753
754 if (st->ptr < (sizeof(st->buf))) {
755 unsigned long rlen = (sizeof(st->buf)) - st->ptr;
756
757 if (rlen > inlen) {
758 rlen = inlen;
759 }
760 _xorbuf(st->buf + st->ptr, in, out, rlen);
761 in += rlen;
762 out += rlen;
763 inlen -= rlen;
764 st->ptr += rlen;
765 }
766 while (inlen > 0) {
767 _sosemanuk_internal(st);
768 if (inlen >= sizeof(st->buf)) {
769 _xorbuf(st->buf, in, out, sizeof(st->buf));
770 in += sizeof(st->buf);
771 out += sizeof(st->buf);
772 inlen -= sizeof(st->buf);
773 } else {
774 _xorbuf(st->buf, in, out, inlen);
775 st->ptr = inlen;
776 inlen = 0;
777 }
778 }
779 return CRYPT_OK;
780 }
781
782
783
784 /*
785 * Cipher operation, as a PRNG: the provided output buffer is filled with
786 * pseudo-random bytes as output from the stream cipher.
787 * @param st The Sosemanuk state
788 * @param out Data out
789 * @param outlen Length of output in bytes
790 * @return CRYPT_OK on success
791 */
sosemanuk_keystream(sosemanuk_state * st,unsigned char * out,unsigned long outlen)792 int sosemanuk_keystream(sosemanuk_state *st, unsigned char *out, unsigned long outlen)
793 {
794 if (outlen == 0) return CRYPT_OK; /* nothing to do */
795 LTC_ARGCHK(out != NULL);
796 XMEMSET(out, 0, outlen);
797 return sosemanuk_crypt(st, out, outlen, out);
798 }
799
800
801 /*
802 * Terminate and clear Sosemanuk key context
803 * @param st The Sosemanuk state
804 * @return CRYPT_OK on success
805 */
sosemanuk_done(sosemanuk_state * st)806 int sosemanuk_done(sosemanuk_state *st)
807 {
808 LTC_ARGCHK(st != NULL);
809 XMEMSET(st, 0, sizeof(sosemanuk_state));
810 return CRYPT_OK;
811 }
812
813
814 #endif
815
816 /* ref: $Format:%D$ */
817 /* git commit: $Format:%H$ */
818 /* commit time: $Format:%ai$ */
819