1 // SPDX-License-Identifier: BSD-2-Clause
2 /* LibTomCrypt, modular cryptographic library -- Tom St Denis
3  *
4  * LibTomCrypt is a library that provides various cryptographic
5  * algorithms in a highly modular and flexible manner.
6  *
7  * The library is free for all purposes without any express
8  * guarantee it works.
9  */
10 
11 /*
12  * This LTC implementation was adapted from:
13  *    http://www.ecrypt.eu.org/stream/e2-sosemanuk.html
14  */
15 
16 /*
17  * SOSEMANUK reference implementation.
18  *
19  * This code is supposed to run on any conforming C implementation (C90
20  * or later).
21  *
22  * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
23  * any express or implied warranty. In no event will the authors be held
24  * liable for any damages arising from the use of this software.
25  *
26  * Permission is granted to anyone to use this software for any purpose,
27  * including commercial applications, and to alter it and redistribute it
28  * freely, subject to no restriction.
29  *
30  * Technical remarks and questions can be addressed to
31  * <thomas.pornin@cryptolog.com>
32  */
33 
34 #include "tomcrypt_private.h"
35 
36 #ifdef LTC_SOSEMANUK
37 
38 /* ======================================================================== */
39 
40 /*
41  * We want (and sometimes need) to perform explicit truncations to 32 bits.
42  */
43 #define T32(x)   ((x) & (ulong32)0xFFFFFFFF)
44 
45 /*
46  * Some of our functions will be tagged as "inline" to help the compiler
47  * optimize things. We use "inline" only if the compiler is advanced
48  * enough to understand it; C99 compilers, and pre-C99 versions of gcc,
49  * understand enough "inline" for our purposes.
50  */
51 
52 /* ======================================================================== */
53 
54 /*
55  * Serpent S-boxes, implemented in bitslice mode. These circuits have
56  * been published by Dag Arne Osvik ("Speeding up Serpent", published in
57  * the 3rd AES Candidate Conference) and work on five 32-bit registers:
58  * the four inputs, and a fifth scratch register. There are meant to be
59  * quite fast on Pentium-class processors. These are not the fastest
60  * published, but they are "fast enough" and they are unencumbered as
61  * far as intellectual property is concerned (note: these are rewritten
62  * from the article itself, and hence are not covered by the GPL on
63  * Dag's code, which was not used here).
64  *
65  * The output bits are permuted. Here is the correspondance:
66  *   S0:  1420
67  *   S1:  2031
68  *   S2:  2314
69  *   S3:  1234
70  *   S4:  1403
71  *   S5:  1302
72  *   S6:  0142
73  *   S7:  4310
74  * (for instance, the output of S0 is in "r1, r4, r2, r0").
75  */
76 
77 #define S0(r0, r1, r2, r3, r4)   do { \
78         r3 ^= r0;  r4  = r1; \
79         r1 &= r3;  r4 ^= r2; \
80         r1 ^= r0;  r0 |= r3; \
81         r0 ^= r4;  r4 ^= r3; \
82         r3 ^= r2;  r2 |= r1; \
83         r2 ^= r4;  r4 = ~r4; \
84         r4 |= r1;  r1 ^= r3; \
85         r1 ^= r4;  r3 |= r0; \
86         r1 ^= r3;  r4 ^= r3; \
87     } while (0)
88 
89 #define S1(r0, r1, r2, r3, r4)   do { \
90         r0 = ~r0;  r2 = ~r2; \
91         r4  = r0;  r0 &= r1; \
92         r2 ^= r0;  r0 |= r3; \
93         r3 ^= r2;  r1 ^= r0; \
94         r0 ^= r4;  r4 |= r1; \
95         r1 ^= r3;  r2 |= r0; \
96         r2 &= r4;  r0 ^= r1; \
97         r1 &= r2; \
98         r1 ^= r0;  r0 &= r2; \
99         r0 ^= r4; \
100     } while (0)
101 
102 #define S2(r0, r1, r2, r3, r4)   do { \
103         r4  = r0;  r0 &= r2; \
104         r0 ^= r3;  r2 ^= r1; \
105         r2 ^= r0;  r3 |= r4; \
106         r3 ^= r1;  r4 ^= r2; \
107         r1  = r3;  r3 |= r4; \
108         r3 ^= r0;  r0 &= r1; \
109         r4 ^= r0;  r1 ^= r3; \
110         r1 ^= r4;  r4 = ~r4; \
111     } while (0)
112 
113 #define S3(r0, r1, r2, r3, r4)   do { \
114         r4  = r0;  r0 |= r3; \
115         r3 ^= r1;  r1 &= r4; \
116         r4 ^= r2;  r2 ^= r3; \
117         r3 &= r0;  r4 |= r1; \
118         r3 ^= r4;  r0 ^= r1; \
119         r4 &= r0;  r1 ^= r3; \
120         r4 ^= r2;  r1 |= r0; \
121         r1 ^= r2;  r0 ^= r3; \
122         r2  = r1;  r1 |= r3; \
123         r1 ^= r0; \
124     } while (0)
125 
126 #define S4(r0, r1, r2, r3, r4)   do { \
127         r1 ^= r3;  r3 = ~r3; \
128         r2 ^= r3;  r3 ^= r0; \
129         r4  = r1;  r1 &= r3; \
130         r1 ^= r2;  r4 ^= r3; \
131         r0 ^= r4;  r2 &= r4; \
132         r2 ^= r0;  r0 &= r1; \
133         r3 ^= r0;  r4 |= r1; \
134         r4 ^= r0;  r0 |= r3; \
135         r0 ^= r2;  r2 &= r3; \
136         r0 = ~r0;  r4 ^= r2; \
137     } while (0)
138 
139 #define S5(r0, r1, r2, r3, r4)   do { \
140         r0 ^= r1;  r1 ^= r3; \
141         r3 = ~r3;  r4  = r1; \
142         r1 &= r0;  r2 ^= r3; \
143         r1 ^= r2;  r2 |= r4; \
144         r4 ^= r3;  r3 &= r1; \
145         r3 ^= r0;  r4 ^= r1; \
146         r4 ^= r2;  r2 ^= r0; \
147         r0 &= r3;  r2 = ~r2; \
148         r0 ^= r4;  r4 |= r3; \
149         r2 ^= r4; \
150     } while (0)
151 
152 #define S6(r0, r1, r2, r3, r4)   do { \
153         r2 = ~r2;  r4  = r3; \
154         r3 &= r0;  r0 ^= r4; \
155         r3 ^= r2;  r2 |= r4; \
156         r1 ^= r3;  r2 ^= r0; \
157         r0 |= r1;  r2 ^= r1; \
158         r4 ^= r0;  r0 |= r3; \
159         r0 ^= r2;  r4 ^= r3; \
160         r4 ^= r0;  r3 = ~r3; \
161         r2 &= r4; \
162         r2 ^= r3; \
163     } while (0)
164 
165 #define S7(r0, r1, r2, r3, r4)   do { \
166         r4  = r1;  r1 |= r2; \
167         r1 ^= r3;  r4 ^= r2; \
168         r2 ^= r1;  r3 |= r4; \
169         r3 &= r0;  r4 ^= r2; \
170         r3 ^= r1;  r1 |= r4; \
171         r1 ^= r0;  r0 |= r4; \
172         r0 ^= r2;  r1 ^= r4; \
173         r2 ^= r1;  r1 &= r0; \
174         r1 ^= r4;  r2 = ~r2; \
175         r2 |= r0; \
176         r4 ^= r2; \
177     } while (0)
178 
179 /*
180  * The Serpent linear transform.
181  */
182 #define SERPENT_LT(x0, x1, x2, x3)  do { \
183         x0 = ROLc(x0, 13); \
184         x2 = ROLc(x2, 3); \
185         x1 = x1 ^ x0 ^ x2; \
186         x3 = x3 ^ x2 ^ T32(x0 << 3); \
187         x1 = ROLc(x1, 1); \
188         x3 = ROLc(x3, 7); \
189         x0 = x0 ^ x1 ^ x3; \
190         x2 = x2 ^ x3 ^ T32(x1 << 7); \
191         x0 = ROLc(x0, 5); \
192         x2 = ROLc(x2, 22); \
193     } while (0)
194 
195 /* ======================================================================== */
196 
197 /*
198  * Initialize Sosemanuk's state by providing a key. The key is an array of
199  * 1 to 32 bytes.
200  * @param st       The Sosemanuk state
201  * @param key      Key
202  * @param keylen   Length of key in bytes
203  * @return CRYPT_OK on success
204  */
sosemanuk_setup(sosemanuk_state * st,const unsigned char * key,unsigned long keylen)205 int sosemanuk_setup(sosemanuk_state *st, const unsigned char *key, unsigned long keylen)
206 {
207     /*
208      * This key schedule is actually a truncated Serpent key schedule.
209      * The key-derived words (w_i) are computed within the eight
210      * local variables w0 to w7, which are reused again and again.
211      */
212 
213 #define SKS(S, o0, o1, o2, o3, d0, d1, d2, d3)   do { \
214         ulong32 r0, r1, r2, r3, r4; \
215         r0 = w ## o0; \
216         r1 = w ## o1; \
217         r2 = w ## o2; \
218         r3 = w ## o3; \
219         S(r0, r1, r2, r3, r4); \
220         st->kc[i ++] = r ## d0; \
221         st->kc[i ++] = r ## d1; \
222         st->kc[i ++] = r ## d2; \
223         st->kc[i ++] = r ## d3; \
224     } while (0)
225 
226 #define SKS0    SKS(S0, 4, 5, 6, 7, 1, 4, 2, 0)
227 #define SKS1    SKS(S1, 0, 1, 2, 3, 2, 0, 3, 1)
228 #define SKS2    SKS(S2, 4, 5, 6, 7, 2, 3, 1, 4)
229 #define SKS3    SKS(S3, 0, 1, 2, 3, 1, 2, 3, 4)
230 #define SKS4    SKS(S4, 4, 5, 6, 7, 1, 4, 0, 3)
231 #define SKS5    SKS(S5, 0, 1, 2, 3, 1, 3, 0, 2)
232 #define SKS6    SKS(S6, 4, 5, 6, 7, 0, 1, 4, 2)
233 #define SKS7    SKS(S7, 0, 1, 2, 3, 4, 3, 1, 0)
234 
235 #define WUP(wi, wi5, wi3, wi1, cc)   do { \
236         ulong32 tt = (wi) ^ (wi5) ^ (wi3) \
237             ^ (wi1) ^ (0x9E3779B9 ^ (ulong32)(cc)); \
238         (wi) = ROLc(tt, 11); \
239     } while (0)
240 
241 #define WUP0(cc)   do { \
242         WUP(w0, w3, w5, w7, cc); \
243         WUP(w1, w4, w6, w0, cc + 1); \
244         WUP(w2, w5, w7, w1, cc + 2); \
245         WUP(w3, w6, w0, w2, cc + 3); \
246     } while (0)
247 
248 #define WUP1(cc)   do { \
249         WUP(w4, w7, w1, w3, cc); \
250         WUP(w5, w0, w2, w4, cc + 1); \
251         WUP(w6, w1, w3, w5, cc + 2); \
252         WUP(w7, w2, w4, w6, cc + 3); \
253     } while (0)
254 
255     unsigned char wbuf[32];
256     ulong32 w0, w1, w2, w3, w4, w5, w6, w7;
257     int i = 0;
258 
259    LTC_ARGCHK(st  != NULL);
260    LTC_ARGCHK(key != NULL);
261    LTC_ARGCHK(keylen > 0 && keylen <= 32);
262 
263     /*
264      * The key is copied into the wbuf[] buffer and padded to 256 bits
265      * as described in the Serpent specification.
266      */
267     XMEMCPY(wbuf, key, keylen);
268     if (keylen < 32) {
269         wbuf[keylen] = 0x01;
270         if (keylen < 31) {
271             XMEMSET(wbuf + keylen + 1, 0, 31 - keylen);
272         }
273     }
274 
275     LOAD32L(w0, wbuf);
276     LOAD32L(w1, wbuf + 4);
277     LOAD32L(w2, wbuf + 8);
278     LOAD32L(w3, wbuf + 12);
279     LOAD32L(w4, wbuf + 16);
280     LOAD32L(w5, wbuf + 20);
281     LOAD32L(w6, wbuf + 24);
282     LOAD32L(w7, wbuf + 28);
283 
284     WUP0(0);   SKS3;
285     WUP1(4);   SKS2;
286     WUP0(8);   SKS1;
287     WUP1(12);  SKS0;
288     WUP0(16);  SKS7;
289     WUP1(20);  SKS6;
290     WUP0(24);  SKS5;
291     WUP1(28);  SKS4;
292     WUP0(32);  SKS3;
293     WUP1(36);  SKS2;
294     WUP0(40);  SKS1;
295     WUP1(44);  SKS0;
296     WUP0(48);  SKS7;
297     WUP1(52);  SKS6;
298     WUP0(56);  SKS5;
299     WUP1(60);  SKS4;
300     WUP0(64);  SKS3;
301     WUP1(68);  SKS2;
302     WUP0(72);  SKS1;
303     WUP1(76);  SKS0;
304     WUP0(80);  SKS7;
305     WUP1(84);  SKS6;
306     WUP0(88);  SKS5;
307     WUP1(92);  SKS4;
308     WUP0(96);  SKS3;
309 
310 #undef SKS
311 #undef SKS0
312 #undef SKS1
313 #undef SKS2
314 #undef SKS3
315 #undef SKS4
316 #undef SKS5
317 #undef SKS6
318 #undef SKS7
319 #undef WUP
320 #undef WUP0
321 #undef WUP1
322 
323     return CRYPT_OK;
324 }
325 
326 
327 /*
328  * Initialization continues by setting the IV. The IV length is up to 16 bytes.
329  * If "ivlen" is 0 (no IV), then the "iv" parameter can be NULL.  If multiple
330  * encryptions/decryptions are to be performed with the same key and
331  * sosemanuk_done() has not been called, only sosemanuk_setiv() need be called
332  * to set the state.
333  * @param st       The Sosemanuk state
334  * @param iv       Initialization vector
335  * @param ivlen    Length of iv in bytes
336  * @return CRYPT_OK on success
337  */
sosemanuk_setiv(sosemanuk_state * st,const unsigned char * iv,unsigned long ivlen)338 int sosemanuk_setiv(sosemanuk_state *st, const unsigned char *iv, unsigned long ivlen)
339 {
340 
341     /*
342      * The Serpent key addition step.
343      */
344 #define KA(zc, x0, x1, x2, x3)  do { \
345         x0 ^= st->kc[(zc)]; \
346         x1 ^= st->kc[(zc) + 1]; \
347         x2 ^= st->kc[(zc) + 2]; \
348         x3 ^= st->kc[(zc) + 3]; \
349     } while (0)
350 
351     /*
352      * One Serpent round.
353      *   zc = current subkey counter
354      *   S = S-box macro for this round
355      *   i0 to i4 = input register numbers (the fifth is a scratch register)
356      *   o0 to o3 = output register numbers
357      */
358 #define FSS(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
359         KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
360         S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
361         SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
362     } while (0)
363 
364     /*
365      * Last Serpent round. Contrary to the "true" Serpent, we keep
366      * the linear transformation for that last round.
367      */
368 #define FSF(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
369         KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
370         S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
371         SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
372         KA(zc + 4, r ## o0, r ## o1, r ## o2, r ## o3); \
373     } while (0)
374 
375     ulong32 r0, r1, r2, r3, r4;
376     unsigned char ivtmp[16] = {0};
377 
378     LTC_ARGCHK(st != NULL);
379     LTC_ARGCHK(ivlen <= 16);
380     LTC_ARGCHK(iv != NULL || ivlen == 0);
381 
382     if (ivlen > 0) XMEMCPY(ivtmp, iv, ivlen);
383 
384     /*
385      * Decode IV into four 32-bit words (little-endian).
386      */
387     LOAD32L(r0, ivtmp);
388     LOAD32L(r1, ivtmp + 4);
389     LOAD32L(r2, ivtmp + 8);
390     LOAD32L(r3, ivtmp + 12);
391 
392     /*
393      * Encrypt IV with Serpent24. Some values are extracted from the
394      * output of the twelfth, eighteenth and twenty-fourth rounds.
395      */
396     FSS(0, S0, 0, 1, 2, 3, 4, 1, 4, 2, 0);
397     FSS(4, S1, 1, 4, 2, 0, 3, 2, 1, 0, 4);
398     FSS(8, S2, 2, 1, 0, 4, 3, 0, 4, 1, 3);
399     FSS(12, S3, 0, 4, 1, 3, 2, 4, 1, 3, 2);
400     FSS(16, S4, 4, 1, 3, 2, 0, 1, 0, 4, 2);
401     FSS(20, S5, 1, 0, 4, 2, 3, 0, 2, 1, 4);
402     FSS(24, S6, 0, 2, 1, 4, 3, 0, 2, 3, 1);
403     FSS(28, S7, 0, 2, 3, 1, 4, 4, 1, 2, 0);
404     FSS(32, S0, 4, 1, 2, 0, 3, 1, 3, 2, 4);
405     FSS(36, S1, 1, 3, 2, 4, 0, 2, 1, 4, 3);
406     FSS(40, S2, 2, 1, 4, 3, 0, 4, 3, 1, 0);
407     FSS(44, S3, 4, 3, 1, 0, 2, 3, 1, 0, 2);
408     st->s09 = r3;
409     st->s08 = r1;
410     st->s07 = r0;
411     st->s06 = r2;
412 
413     FSS(48, S4, 3, 1, 0, 2, 4, 1, 4, 3, 2);
414     FSS(52, S5, 1, 4, 3, 2, 0, 4, 2, 1, 3);
415     FSS(56, S6, 4, 2, 1, 3, 0, 4, 2, 0, 1);
416     FSS(60, S7, 4, 2, 0, 1, 3, 3, 1, 2, 4);
417     FSS(64, S0, 3, 1, 2, 4, 0, 1, 0, 2, 3);
418     FSS(68, S1, 1, 0, 2, 3, 4, 2, 1, 3, 0);
419     st->r1  = r2;
420     st->s04 = r1;
421     st->r2  = r3;
422     st->s05 = r0;
423 
424     FSS(72, S2, 2, 1, 3, 0, 4, 3, 0, 1, 4);
425     FSS(76, S3, 3, 0, 1, 4, 2, 0, 1, 4, 2);
426     FSS(80, S4, 0, 1, 4, 2, 3, 1, 3, 0, 2);
427     FSS(84, S5, 1, 3, 0, 2, 4, 3, 2, 1, 0);
428     FSS(88, S6, 3, 2, 1, 0, 4, 3, 2, 4, 1);
429     FSF(92, S7, 3, 2, 4, 1, 0, 0, 1, 2, 3);
430     st->s03 = r0;
431     st->s02 = r1;
432     st->s01 = r2;
433     st->s00 = r3;
434 
435     st->ptr = sizeof(st->buf);
436 
437 #undef KA
438 #undef FSS
439 #undef FSF
440 
441     return CRYPT_OK;
442 }
443 
444 /*
445  * Multiplication by alpha: alpha * x = T32(x << 8) ^ mul_a[x >> 24]
446  */
447 static const ulong32 mul_a[] = {
448     0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
449     0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
450     0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
451     0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
452     0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
453     0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
454     0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
455     0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
456     0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
457     0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
458     0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
459     0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
460     0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
461     0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
462     0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
463     0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
464     0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
465     0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
466     0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
467     0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
468     0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
469     0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
470     0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
471     0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
472     0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
473     0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
474     0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
475     0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
476     0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
477     0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
478     0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
479     0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
480     0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
481     0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
482     0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
483     0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
484     0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
485     0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
486     0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
487     0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
488     0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
489     0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
490     0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
491     0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
492     0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
493     0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
494     0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
495     0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
496     0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
497     0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
498     0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
499     0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
500     0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
501     0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
502     0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
503     0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
504     0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
505     0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
506     0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
507     0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
508     0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
509     0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
510     0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
511     0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB
512 };
513 
514 /*
515  * Multiplication by 1/alpha: 1/alpha * x = (x >> 8) ^ mul_ia[x & 0xFF]
516  */
517 static const ulong32 mul_ia[] = {
518     0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
519     0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
520     0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
521     0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
522     0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
523     0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
524     0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
525     0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
526     0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
527     0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
528     0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
529     0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
530     0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
531     0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
532     0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
533     0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
534     0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
535     0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
536     0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
537     0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
538     0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
539     0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
540     0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
541     0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
542     0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
543     0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
544     0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
545     0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
546     0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
547     0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
548     0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
549     0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
550     0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
551     0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
552     0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
553     0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
554     0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
555     0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
556     0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
557     0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
558     0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
559     0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
560     0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
561     0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
562     0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
563     0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
564     0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
565     0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
566     0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
567     0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
568     0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
569     0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
570     0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
571     0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
572     0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
573     0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
574     0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
575     0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
576     0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
577     0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
578     0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
579     0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
580     0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
581     0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
582 };
583 
584 
585 /*
586  * Compute the next block of bits of output stream. This is equivalent
587  * to one full rotation of the shift register.
588  */
_sosemanuk_internal(sosemanuk_state * st)589 static LTC_INLINE void _sosemanuk_internal(sosemanuk_state *st)
590 {
591     /*
592      * MUL_A(x) computes alpha * x (in F_{2^32}).
593      * MUL_G(x) computes 1/alpha * x (in F_{2^32}).
594      */
595 #define MUL_A(x)    (T32((x) << 8) ^ mul_a[(x) >> 24])
596 #define MUL_G(x)    (((x) >> 8) ^ mul_ia[(x) & 0xFF])
597 
598     /*
599      * This macro computes the special multiplexer, which chooses
600      * between "x" and "x xor y", depending on the least significant
601      * bit of the control word. We use the C "?:" selection operator
602      * (which most compilers know how to optimise) except for Alpha,
603      * where the manual sign extension seems to perform equally well
604      * with DEC/Compaq/HP compiler, and much better with gcc.
605      */
606 #ifdef __alpha
607 #define XMUX(c, x, y)   ((((signed int)((c) << 31) >> 31) & (y)) ^ (x))
608 #else
609 #define XMUX(c, x, y)   (((c) & 0x1) ? ((x) ^ (y)) : (x))
610 #endif
611 
612     /*
613      * FSM() updates the finite state machine.
614      */
615 #define FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9)   do { \
616         ulong32 tt, or1; \
617         tt = XMUX(r1, s ## x1, s ## x8); \
618         or1 = r1; \
619         r1 = T32(r2 + tt); \
620         tt = T32(or1 * 0x54655307); \
621         r2 = ROLc(tt, 7); \
622     } while (0)
623 
624     /*
625      * LRU updates the shift register; the dropped value is stored
626      * in variable "dd".
627      */
628 #define LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd)   do { \
629         dd = s ## x0; \
630         s ## x0 = MUL_A(s ## x0) ^ MUL_G(s ## x3) ^ s ## x9; \
631     } while (0)
632 
633     /*
634      * CC1 stores into variable "ee" the next intermediate word
635      * (combination of the new states of the LFSR and the FSM).
636      */
637 #define CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee)   do { \
638         ee = T32(s ## x9 + r1) ^ r2; \
639     } while (0)
640 
641     /*
642      * STEP computes one internal round. "dd" receives the "s_t"
643      * value (dropped from the LFSR) and "ee" gets the value computed
644      * from the LFSR and FSM.
645      */
646 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd, ee)   do { \
647         FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); \
648         LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd); \
649         CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee); \
650     } while (0)
651 
652     /*
653      * Apply one Serpent round (with the provided S-box macro), XOR
654      * the result with the "v" values, and encode the result into
655      * the destination buffer, at the provided offset. The "x*"
656      * arguments encode the output permutation of the "S" macro.
657      */
658 #define SRD(S, x0, x1, x2, x3, ooff)   do { \
659         S(u0, u1, u2, u3, u4); \
660         STORE32L(u ## x0 ^ v0, st->buf + ooff); \
661         STORE32L(u ## x1 ^ v1, st->buf + ooff +  4); \
662         STORE32L(u ## x2 ^ v2, st->buf + ooff +  8); \
663         STORE32L(u ## x3 ^ v3, st->buf + ooff + 12); \
664     } while (0)
665 
666     ulong32 s00 = st->s00;
667     ulong32 s01 = st->s01;
668     ulong32 s02 = st->s02;
669     ulong32 s03 = st->s03;
670     ulong32 s04 = st->s04;
671     ulong32 s05 = st->s05;
672     ulong32 s06 = st->s06;
673     ulong32 s07 = st->s07;
674     ulong32 s08 = st->s08;
675     ulong32 s09 = st->s09;
676     ulong32 r1 = st->r1;
677     ulong32 r2 = st->r2;
678     ulong32 u0, u1, u2, u3, u4;
679     ulong32 v0, v1, v2, v3;
680 
681     STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v0, u0);
682     STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v1, u1);
683     STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v2, u2);
684     STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v3, u3);
685     SRD(S2, 2, 3, 1, 4, 0);
686     STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v0, u0);
687     STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v1, u1);
688     STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v2, u2);
689     STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v3, u3);
690     SRD(S2, 2, 3, 1, 4, 16);
691     STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v0, u0);
692     STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v1, u1);
693     STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v2, u2);
694     STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v3, u3);
695     SRD(S2, 2, 3, 1, 4, 32);
696     STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v0, u0);
697     STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v1, u1);
698     STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v2, u2);
699     STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v3, u3);
700     SRD(S2, 2, 3, 1, 4, 48);
701     STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v0, u0);
702     STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v1, u1);
703     STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v2, u2);
704     STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v3, u3);
705     SRD(S2, 2, 3, 1, 4, 64);
706 
707     st->s00 = s00;
708     st->s01 = s01;
709     st->s02 = s02;
710     st->s03 = s03;
711     st->s04 = s04;
712     st->s05 = s05;
713     st->s06 = s06;
714     st->s07 = s07;
715     st->s08 = s08;
716     st->s09 = s09;
717     st->r1 = r1;
718     st->r2 = r2;
719 }
720 
721 /*
722  * Combine buffers in1[] and in2[] by XOR, result in out[]. The length
723  * is "datalen" (in bytes). Partial overlap of out[] with either in1[]
724  * or in2[] is not allowed. Total overlap (out == in1 and/or out == in2)
725  * is allowed.
726  */
_xorbuf(const unsigned char * in1,const unsigned char * in2,unsigned char * out,unsigned long datalen)727 static LTC_INLINE void _xorbuf(const unsigned char *in1, const unsigned char *in2,
728     unsigned char *out, unsigned long datalen)
729 {
730     while (datalen -- > 0) {
731         *out ++ = *in1 ++ ^ *in2 ++;
732     }
733 }
734 
735 
736 /*
737  * Cipher operation, as a stream cipher: data is read from the "in"
738  * buffer, combined by XOR with the stream, and the result is written
739  * in the "out" buffer. "in" and "out" must be either equal, or
740  * reference distinct buffers (no partial overlap is allowed).
741  * @param st       The Sosemanuk state
742  * @param in       Data in
743  * @param inlen    Length of data in bytes
744  * @param out      Data out
745  * @return CRYPT_OK on success
746  */
sosemanuk_crypt(sosemanuk_state * st,const unsigned char * in,unsigned long inlen,unsigned char * out)747 int sosemanuk_crypt(sosemanuk_state *st,
748                         const unsigned char *in, unsigned long inlen, unsigned char *out)
749 {
750     LTC_ARGCHK(st  != NULL);
751     LTC_ARGCHK(in  != NULL);
752     LTC_ARGCHK(out != NULL);
753 
754     if (st->ptr < (sizeof(st->buf))) {
755         unsigned long rlen = (sizeof(st->buf)) - st->ptr;
756 
757         if (rlen > inlen) {
758             rlen = inlen;
759         }
760         _xorbuf(st->buf + st->ptr, in, out, rlen);
761         in += rlen;
762         out += rlen;
763         inlen -= rlen;
764         st->ptr += rlen;
765     }
766     while (inlen > 0) {
767         _sosemanuk_internal(st);
768         if (inlen >= sizeof(st->buf)) {
769             _xorbuf(st->buf, in, out, sizeof(st->buf));
770             in += sizeof(st->buf);
771             out += sizeof(st->buf);
772             inlen -= sizeof(st->buf);
773         } else {
774             _xorbuf(st->buf, in, out, inlen);
775             st->ptr = inlen;
776             inlen = 0;
777         }
778     }
779     return CRYPT_OK;
780 }
781 
782 
783 
784 /*
785  * Cipher operation, as a PRNG: the provided output buffer is filled with
786  * pseudo-random bytes as output from the stream cipher.
787  * @param st       The Sosemanuk state
788  * @param out      Data out
789  * @param outlen   Length of output in bytes
790  * @return CRYPT_OK on success
791  */
sosemanuk_keystream(sosemanuk_state * st,unsigned char * out,unsigned long outlen)792 int sosemanuk_keystream(sosemanuk_state *st, unsigned char *out, unsigned long outlen)
793 {
794    if (outlen == 0) return CRYPT_OK; /* nothing to do */
795    LTC_ARGCHK(out != NULL);
796    XMEMSET(out, 0, outlen);
797    return sosemanuk_crypt(st, out, outlen, out);
798 }
799 
800 
801 /*
802  * Terminate and clear Sosemanuk key context
803  * @param st      The Sosemanuk state
804  * @return CRYPT_OK on success
805  */
sosemanuk_done(sosemanuk_state * st)806 int sosemanuk_done(sosemanuk_state *st)
807 {
808    LTC_ARGCHK(st != NULL);
809    XMEMSET(st, 0, sizeof(sosemanuk_state));
810    return CRYPT_OK;
811 }
812 
813 
814 #endif
815 
816 /* ref:         $Format:%D$ */
817 /* git commit:  $Format:%H$ */
818 /* commit time: $Format:%ai$ */
819