Issue #16113: integrade SHA-3 (Keccak) patch from http://hg.python.org/sandbox/cheimes

This commit is contained in:
Christian Heimes 2012-10-06 02:23:36 +02:00
parent 8c6db45d3e
commit 4a0270d82b
30 changed files with 7971 additions and 28 deletions

View File

@ -51,9 +51,13 @@ concatenation of the data fed to it so far using the :meth:`digest` or
.. index:: single: OpenSSL; (use in module hashlib)
Constructors for hash algorithms that are always present in this module are
:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, and
:func:`sha512`. Additional algorithms may also be available depending upon the
OpenSSL library that Python uses on your platform.
:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`,
:func:`sha512`, :func:`sha3_224`, :func:`sha3_256`, :func:`sha3_384`, and
:func:`sha3_512`. Additional algorithms may also be available depending upon
the OpenSSL library that Python uses on your platform.
.. versionchanged:: 3.4
Add sha3 family of hash algorithms.
For example, to obtain the digest of the byte string ``b'Nobody inspects the
spammish repetition'``::

View File

@ -658,6 +658,25 @@ The :mod:`select` and contains the following notice for the kqueue interface::
SUCH DAMAGE.
SHA-3
-----
The module :mod:`_sha3` and :mod:`hashlib` are using the reference
implementation of Keccak. The files at :file:`Modules/_sha3/keccak/` contain
the following note::
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
strtod and dtoa
---------------

View File

@ -101,7 +101,7 @@ Implementation improvements:
Significantly Improved Library Modules:
* None yet.
* SHA-3 (Keccak) support for :mod:`hashlib`.
Security improvements:

View File

@ -54,7 +54,8 @@ More condensed:
# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512')
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512')
algorithms_guaranteed = set(__always_supported)
algorithms_available = set(__always_supported)
@ -85,6 +86,18 @@ def __get_builtin_constructor(name):
return _sha512.sha512
elif bs == '384':
return _sha512.sha384
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
'SHA3_224', 'SHA3_256', 'SHA3_384', 'SHA3_512'}:
import _sha3
bs = name[5:]
if bs == '224':
return _sha3.sha3_224
elif bs == '256':
return _sha3.sha3_256
elif bs == '384':
return _sha3.sha3_384
elif bs == '512':
return _sha3.sha3_512
except ImportError:
pass # no extension module, this hash is unsupported.

View File

@ -36,7 +36,10 @@ def hexstr(s):
class HashLibTestCase(unittest.TestCase):
supported_hash_names = ( 'md5', 'MD5', 'sha1', 'SHA1',
'sha224', 'SHA224', 'sha256', 'SHA256',
'sha384', 'SHA384', 'sha512', 'SHA512' )
'sha384', 'SHA384', 'sha512', 'SHA512',
'sha3_224', 'sha3_256', 'sha3_384',
'sha3_512', 'SHA3_224', 'SHA3_256',
'SHA3_384', 'SHA3_512' )
# Issue #14693: fallback modules are always compiled under POSIX
_warn_on_extension_import = os.name == 'posix' or COMPILED_WITH_PYDEBUG
@ -93,6 +96,12 @@ class HashLibTestCase(unittest.TestCase):
if _sha512:
self.constructors_to_test['sha384'].add(_sha512.sha384)
self.constructors_to_test['sha512'].add(_sha512.sha512)
_sha3 = self._conditional_import_module('_sha3')
if _sha3:
self.constructors_to_test['sha3_224'].add(_sha3.sha3_224)
self.constructors_to_test['sha3_256'].add(_sha3.sha3_256)
self.constructors_to_test['sha3_384'].add(_sha3.sha3_384)
self.constructors_to_test['sha3_512'].add(_sha3.sha3_512)
super(HashLibTestCase, self).__init__(*args, **kwargs)
@ -158,6 +167,7 @@ class HashLibTestCase(unittest.TestCase):
self.assertEqual(m1.digest(), m2.digest())
def check(self, name, data, digest):
digest = digest.lower()
constructors = self.constructors_to_test[name]
# 2 is for hashlib.name(...) and hashlib.new(name, ...)
self.assertGreaterEqual(len(constructors), 2)
@ -183,6 +193,10 @@ class HashLibTestCase(unittest.TestCase):
self.check_no_unicode('sha256')
self.check_no_unicode('sha384')
self.check_no_unicode('sha512')
self.check_no_unicode('sha3_224')
self.check_no_unicode('sha3_256')
self.check_no_unicode('sha3_384')
self.check_no_unicode('sha3_512')
def test_case_md5_0(self):
self.check('md5', b'', 'd41d8cd98f00b204e9800998ecf8427e')
@ -318,11 +332,122 @@ class HashLibTestCase(unittest.TestCase):
"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
"de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
# SHA-3 family
def test_case_sha3_224_0(self):
self.check('sha3_224', b"",
"F71837502BA8E10837BDD8D365ADB85591895602FC552B48B7390ABD")
def test_case_sha3_224_1(self):
self.check('sha3_224', bytes.fromhex("CC"),
"A9CAB59EB40A10B246290F2D6086E32E3689FAF1D26B470C899F2802")
def test_case_sha3_224_2(self):
self.check('sha3_224', bytes.fromhex("41FB"),
"615BA367AFDC35AAC397BC7EB5D58D106A734B24986D5D978FEFD62C")
def test_case_sha3_224_3(self):
self.check('sha3_224', bytes.fromhex(
"433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+
"8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+
"444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+
"E7E0846DCBB4CE"),
"62B10F1B6236EBC2DA72957742A8D4E48E213B5F8934604BFD4D2C3A")
@bigmemtest(size=_4G + 5, memuse=1)
def test_case_sha3_224_huge(self, size):
if size == _4G + 5:
try:
self.check('sha3_224', b'A'*size,
'58ef60057c9dddb6a87477e9ace5a26f0d9db01881cf9b10a9f8c224')
except OverflowError:
pass # 32-bit arch
def test_case_sha3_256_0(self):
self.check('sha3_256', b"",
"C5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470")
def test_case_sha3_256_1(self):
self.check('sha3_256', bytes.fromhex("CC"),
"EEAD6DBFC7340A56CAEDC044696A168870549A6A7F6F56961E84A54BD9970B8A")
def test_case_sha3_256_2(self):
self.check('sha3_256', bytes.fromhex("41FB"),
"A8EACEDA4D47B3281A795AD9E1EA2122B407BAF9AABCB9E18B5717B7873537D2")
def test_case_sha3_256_3(self):
self.check('sha3_256', bytes.fromhex(
"433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+
"8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+
"444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+
"E7E0846DCBB4CE"),
"CE87A5173BFFD92399221658F801D45C294D9006EE9F3F9D419C8D427748DC41")
def test_case_sha3_384_0(self):
self.check('sha3_384', b"",
"2C23146A63A29ACF99E73B88F8C24EAA7DC60AA771780CCC006AFBFA8FE2479B"+
"2DD2B21362337441AC12B515911957FF")
def test_case_sha3_384_1(self):
self.check('sha3_384', bytes.fromhex("CC"),
"1B84E62A46E5A201861754AF5DC95C4A1A69CAF4A796AE405680161E29572641"+
"F5FA1E8641D7958336EE7B11C58F73E9")
def test_case_sha3_384_2(self):
self.check('sha3_384', bytes.fromhex("41FB"),
"495CCE2714CD72C8C53C3363D22C58B55960FE26BE0BF3BBC7A3316DD563AD1D"+
"B8410E75EEFEA655E39D4670EC0B1792")
def test_case_sha3_384_3(self):
self.check('sha3_384', bytes.fromhex(
"433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+
"8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+
"444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+
"E7E0846DCBB4CE"),
"135114508DD63E279E709C26F7817C0482766CDE49132E3EDF2EEDD8996F4E35"+
"96D184100B384868249F1D8B8FDAA2C9")
def test_case_sha3_512_0(self):
self.check('sha3_512', b"",
"0EAB42DE4C3CEB9235FC91ACFFE746B29C29A8C366B7C60E4E67C466F36A4304"+
"C00FA9CAF9D87976BA469BCBE06713B435F091EF2769FB160CDAB33D3670680E")
def test_case_sha3_512_1(self):
self.check('sha3_512', bytes.fromhex("CC"),
"8630C13CBD066EA74BBE7FE468FEC1DEE10EDC1254FB4C1B7C5FD69B646E4416"+
"0B8CE01D05A0908CA790DFB080F4B513BC3B6225ECE7A810371441A5AC666EB9")
def test_case_sha3_512_2(self):
self.check('sha3_512', bytes.fromhex("41FB"),
"551DA6236F8B96FCE9F97F1190E901324F0B45E06DBBB5CDB8355D6ED1DC34B3"+
"F0EAE7DCB68622FF232FA3CECE0D4616CDEB3931F93803662A28DF1CD535B731")
def test_case_sha3_512_3(self):
self.check('sha3_512', bytes.fromhex(
"433C5303131624C0021D868A30825475E8D0BD3052A022180398F4CA4423B9"+
"8214B6BEAAC21C8807A2C33F8C93BD42B092CC1B06CEDF3224D5ED1EC29784"+
"444F22E08A55AA58542B524B02CD3D5D5F6907AFE71C5D7462224A3F9D9E53"+
"E7E0846DCBB4CE"),
"527D28E341E6B14F4684ADB4B824C496C6482E51149565D3D17226828884306B"+
"51D6148A72622C2B75F5D3510B799D8BDC03EAEDE453676A6EC8FE03A1AD0EAB")
def test_gil(self):
# Check things work fine with an input larger than the size required
# for multithreaded operation (which is hardwired to 2048).
gil_minsize = 2048
for name in self.supported_hash_names:
m = hashlib.new(name)
m.update(b'1')
m.update(b'#' * gil_minsize)
m.update(b'1')
m = hashlib.new(name, b'x' * gil_minsize)
m.update(b'1')
m = hashlib.md5()
m.update(b'1')
m.update(b'#' * gil_minsize)

View File

@ -17,24 +17,6 @@
#include "structmember.h"
#include "hashlib.h"
#ifdef WITH_THREAD
#include "pythread.h"
#define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
Py_BEGIN_ALLOW_THREADS \
PyThread_acquire_lock((obj)->lock, 1); \
Py_END_ALLOW_THREADS \
} \
}
#define LEAVE_HASHLIB(obj) \
if ((obj)->lock) { \
PyThread_release_lock((obj)->lock); \
}
#else
#define ENTER_HASHLIB(obj)
#define LEAVE_HASHLIB(obj)
#endif
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>
@ -43,10 +25,6 @@
#define MUNCH_SIZE INT_MAX
/* TODO(gps): We should probably make this a module or EVPobject attribute
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048
#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif

49
Modules/_sha3/cleanup.py Executable file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
# Copyright (C) 2012 Christian Heimes (christian@python.org)
# Licensed to PSF under a Contributor Agreement.
#
# cleanup Keccak sources
import os
import re
CPP1 = re.compile("^//(.*)")
CPP2 = re.compile("\ //(.*)")
STATICS = ("void ", "int ", "HashReturn ", "const UINT64 ", "UINT16 ")
HERE = os.path.dirname(os.path.abspath(__file__))
KECCAK = os.path.join(HERE, "keccak")
def getfiles():
for name in os.listdir(KECCAK):
name = os.path.join(KECCAK, name)
if os.path.isfile(name):
yield name
def cleanup(f):
buf = []
for line in f:
# mark all functions and global data as static
if line.startswith(STATICS):
buf.append("static " + line)
continue
# remove UINT64 typedef, we have our own
if line.startswith("typedef unsigned long long int"):
buf.append("/* %s */\n" % line.strip())
continue
# remove #include "brg_endian.h"
if "brg_endian.h" in line:
buf.append("/* %s */\n" % line.strip())
continue
# transform C++ comments into ANSI C comments
line = CPP1.sub(r"/* \1 */", line)
line = CPP2.sub(r" /* \1 */", line)
buf.append(line)
return "".join(buf)
for name in getfiles():
with open(name) as f:
res = cleanup(f)
with open(name, "w") as f:
f.write(res)

View File

@ -0,0 +1,555 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
static const UINT32 KeccakF1600RoundConstants_int2[2*24] =
{
0x00000001UL, 0x00000000UL,
0x00000000UL, 0x00000089UL,
0x00000000UL, 0x8000008bUL,
0x00000000UL, 0x80008080UL,
0x00000001UL, 0x0000008bUL,
0x00000001UL, 0x00008000UL,
0x00000001UL, 0x80008088UL,
0x00000001UL, 0x80000082UL,
0x00000000UL, 0x0000000bUL,
0x00000000UL, 0x0000000aUL,
0x00000001UL, 0x00008082UL,
0x00000000UL, 0x00008003UL,
0x00000001UL, 0x0000808bUL,
0x00000001UL, 0x8000000bUL,
0x00000001UL, 0x8000008aUL,
0x00000001UL, 0x80000081UL,
0x00000000UL, 0x80000081UL,
0x00000000UL, 0x80000008UL,
0x00000000UL, 0x00000083UL,
0x00000000UL, 0x80008003UL,
0x00000001UL, 0x80008088UL,
0x00000000UL, 0x80000088UL,
0x00000001UL, 0x00008000UL,
0x00000000UL, 0x80008082UL
};
#undef rounds
#define rounds \
{ \
UINT32 Da0, De0, Di0, Do0, Du0; \
UINT32 Da1, De1, Di1, Do1, Du1; \
UINT32 Ba, Be, Bi, Bo, Bu; \
UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
UINT32 Cw, Cx, Cy, Cz; \
UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \
UINT32 i; \
\
copyFromState(A, state) \
\
for( i = 12; i != 0; --i ) { \
Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
Da0 = Cx^ROL32(Du1, 1); \
Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
Da1 = Cz^Du0; \
\
Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
Do0 = Cw^ROL32(Cz, 1); \
Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
Do1 = Cy^Cx; \
\
Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
De0 = Cx^ROL32(Cy, 1); \
Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
De1 = Cz^Cw; \
\
Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
Di0 = Du0^ROL32(Cy, 1); \
Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
Di1 = Du1^Cw; \
\
Du0 = Cw^ROL32(Cz, 1); \
Du1 = Cy^Cx; \
\
Aba0 ^= Da0; \
Ba = Aba0; \
Age0 ^= De0; \
Be = ROL32(Age0, 22); \
Aki1 ^= Di1; \
Bi = ROL32(Aki1, 22); \
Amo1 ^= Do1; \
Bo = ROL32(Amo1, 11); \
Asu0 ^= Du0; \
Bu = ROL32(Asu0, 7); \
Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \
Ebe0 = Be ^((~Bi)& Bo ); \
Ebi0 = Bi ^((~Bo)& Bu ); \
Ebo0 = Bo ^((~Bu)& Ba ); \
Ebu0 = Bu ^((~Ba)& Be ); \
\
Abo0 ^= Do0; \
Ba = ROL32(Abo0, 14); \
Agu0 ^= Du0; \
Be = ROL32(Agu0, 10); \
Aka1 ^= Da1; \
Bi = ROL32(Aka1, 2); \
Ame1 ^= De1; \
Bo = ROL32(Ame1, 23); \
Asi1 ^= Di1; \
Bu = ROL32(Asi1, 31); \
Ega0 = Ba ^((~Be)& Bi ); \
Ege0 = Be ^((~Bi)& Bo ); \
Egi0 = Bi ^((~Bo)& Bu ); \
Ego0 = Bo ^((~Bu)& Ba ); \
Egu0 = Bu ^((~Ba)& Be ); \
\
Abe1 ^= De1; \
Ba = ROL32(Abe1, 1); \
Agi0 ^= Di0; \
Be = ROL32(Agi0, 3); \
Ako1 ^= Do1; \
Bi = ROL32(Ako1, 13); \
Amu0 ^= Du0; \
Bo = ROL32(Amu0, 4); \
Asa0 ^= Da0; \
Bu = ROL32(Asa0, 9); \
Eka0 = Ba ^((~Be)& Bi ); \
Eke0 = Be ^((~Bi)& Bo ); \
Eki0 = Bi ^((~Bo)& Bu ); \
Eko0 = Bo ^((~Bu)& Ba ); \
Eku0 = Bu ^((~Ba)& Be ); \
\
Abu1 ^= Du1; \
Ba = ROL32(Abu1, 14); \
Aga0 ^= Da0; \
Be = ROL32(Aga0, 18); \
Ake0 ^= De0; \
Bi = ROL32(Ake0, 5); \
Ami1 ^= Di1; \
Bo = ROL32(Ami1, 8); \
Aso0 ^= Do0; \
Bu = ROL32(Aso0, 28); \
Ema0 = Ba ^((~Be)& Bi ); \
Eme0 = Be ^((~Bi)& Bo ); \
Emi0 = Bi ^((~Bo)& Bu ); \
Emo0 = Bo ^((~Bu)& Ba ); \
Emu0 = Bu ^((~Ba)& Be ); \
\
Abi0 ^= Di0; \
Ba = ROL32(Abi0, 31); \
Ago1 ^= Do1; \
Be = ROL32(Ago1, 28); \
Aku1 ^= Du1; \
Bi = ROL32(Aku1, 20); \
Ama1 ^= Da1; \
Bo = ROL32(Ama1, 21); \
Ase0 ^= De0; \
Bu = ROL32(Ase0, 1); \
Esa0 = Ba ^((~Be)& Bi ); \
Ese0 = Be ^((~Bi)& Bo ); \
Esi0 = Bi ^((~Bo)& Bu ); \
Eso0 = Bo ^((~Bu)& Ba ); \
Esu0 = Bu ^((~Ba)& Be ); \
\
Aba1 ^= Da1; \
Ba = Aba1; \
Age1 ^= De1; \
Be = ROL32(Age1, 22); \
Aki0 ^= Di0; \
Bi = ROL32(Aki0, 21); \
Amo0 ^= Do0; \
Bo = ROL32(Amo0, 10); \
Asu1 ^= Du1; \
Bu = ROL32(Asu1, 7); \
Eba1 = Ba ^((~Be)& Bi ); \
Eba1 ^= *(pRoundConstants++); \
Ebe1 = Be ^((~Bi)& Bo ); \
Ebi1 = Bi ^((~Bo)& Bu ); \
Ebo1 = Bo ^((~Bu)& Ba ); \
Ebu1 = Bu ^((~Ba)& Be ); \
\
Abo1 ^= Do1; \
Ba = ROL32(Abo1, 14); \
Agu1 ^= Du1; \
Be = ROL32(Agu1, 10); \
Aka0 ^= Da0; \
Bi = ROL32(Aka0, 1); \
Ame0 ^= De0; \
Bo = ROL32(Ame0, 22); \
Asi0 ^= Di0; \
Bu = ROL32(Asi0, 30); \
Ega1 = Ba ^((~Be)& Bi ); \
Ege1 = Be ^((~Bi)& Bo ); \
Egi1 = Bi ^((~Bo)& Bu ); \
Ego1 = Bo ^((~Bu)& Ba ); \
Egu1 = Bu ^((~Ba)& Be ); \
\
Abe0 ^= De0; \
Ba = Abe0; \
Agi1 ^= Di1; \
Be = ROL32(Agi1, 3); \
Ako0 ^= Do0; \
Bi = ROL32(Ako0, 12); \
Amu1 ^= Du1; \
Bo = ROL32(Amu1, 4); \
Asa1 ^= Da1; \
Bu = ROL32(Asa1, 9); \
Eka1 = Ba ^((~Be)& Bi ); \
Eke1 = Be ^((~Bi)& Bo ); \
Eki1 = Bi ^((~Bo)& Bu ); \
Eko1 = Bo ^((~Bu)& Ba ); \
Eku1 = Bu ^((~Ba)& Be ); \
\
Abu0 ^= Du0; \
Ba = ROL32(Abu0, 13); \
Aga1 ^= Da1; \
Be = ROL32(Aga1, 18); \
Ake1 ^= De1; \
Bi = ROL32(Ake1, 5); \
Ami0 ^= Di0; \
Bo = ROL32(Ami0, 7); \
Aso1 ^= Do1; \
Bu = ROL32(Aso1, 28); \
Ema1 = Ba ^((~Be)& Bi ); \
Eme1 = Be ^((~Bi)& Bo ); \
Emi1 = Bi ^((~Bo)& Bu ); \
Emo1 = Bo ^((~Bu)& Ba ); \
Emu1 = Bu ^((~Ba)& Be ); \
\
Abi1 ^= Di1; \
Ba = ROL32(Abi1, 31); \
Ago0 ^= Do0; \
Be = ROL32(Ago0, 27); \
Aku0 ^= Du0; \
Bi = ROL32(Aku0, 19); \
Ama0 ^= Da0; \
Bo = ROL32(Ama0, 20); \
Ase1 ^= De1; \
Bu = ROL32(Ase1, 1); \
Esa1 = Ba ^((~Be)& Bi ); \
Ese1 = Be ^((~Bi)& Bo ); \
Esi1 = Bi ^((~Bo)& Bu ); \
Eso1 = Bo ^((~Bu)& Ba ); \
Esu1 = Bu ^((~Ba)& Be ); \
\
Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \
Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \
Da0 = Cx^ROL32(Du1, 1); \
Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \
Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \
Da1 = Cz^Du0; \
\
Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \
Do0 = Cw^ROL32(Cz, 1); \
Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \
Do1 = Cy^Cx; \
\
Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \
De0 = Cx^ROL32(Cy, 1); \
Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \
De1 = Cz^Cw; \
\
Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \
Di0 = Du0^ROL32(Cy, 1); \
Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \
Di1 = Du1^Cw; \
\
Du0 = Cw^ROL32(Cz, 1); \
Du1 = Cy^Cx; \
\
Eba0 ^= Da0; \
Ba = Eba0; \
Ege0 ^= De0; \
Be = ROL32(Ege0, 22); \
Eki1 ^= Di1; \
Bi = ROL32(Eki1, 22); \
Emo1 ^= Do1; \
Bo = ROL32(Emo1, 11); \
Esu0 ^= Du0; \
Bu = ROL32(Esu0, 7); \
Aba0 = Ba ^((~Be)& Bi ); \
Aba0 ^= *(pRoundConstants++); \
Abe0 = Be ^((~Bi)& Bo ); \
Abi0 = Bi ^((~Bo)& Bu ); \
Abo0 = Bo ^((~Bu)& Ba ); \
Abu0 = Bu ^((~Ba)& Be ); \
\
Ebo0 ^= Do0; \
Ba = ROL32(Ebo0, 14); \
Egu0 ^= Du0; \
Be = ROL32(Egu0, 10); \
Eka1 ^= Da1; \
Bi = ROL32(Eka1, 2); \
Eme1 ^= De1; \
Bo = ROL32(Eme1, 23); \
Esi1 ^= Di1; \
Bu = ROL32(Esi1, 31); \
Aga0 = Ba ^((~Be)& Bi ); \
Age0 = Be ^((~Bi)& Bo ); \
Agi0 = Bi ^((~Bo)& Bu ); \
Ago0 = Bo ^((~Bu)& Ba ); \
Agu0 = Bu ^((~Ba)& Be ); \
\
Ebe1 ^= De1; \
Ba = ROL32(Ebe1, 1); \
Egi0 ^= Di0; \
Be = ROL32(Egi0, 3); \
Eko1 ^= Do1; \
Bi = ROL32(Eko1, 13); \
Emu0 ^= Du0; \
Bo = ROL32(Emu0, 4); \
Esa0 ^= Da0; \
Bu = ROL32(Esa0, 9); \
Aka0 = Ba ^((~Be)& Bi ); \
Ake0 = Be ^((~Bi)& Bo ); \
Aki0 = Bi ^((~Bo)& Bu ); \
Ako0 = Bo ^((~Bu)& Ba ); \
Aku0 = Bu ^((~Ba)& Be ); \
\
Ebu1 ^= Du1; \
Ba = ROL32(Ebu1, 14); \
Ega0 ^= Da0; \
Be = ROL32(Ega0, 18); \
Eke0 ^= De0; \
Bi = ROL32(Eke0, 5); \
Emi1 ^= Di1; \
Bo = ROL32(Emi1, 8); \
Eso0 ^= Do0; \
Bu = ROL32(Eso0, 28); \
Ama0 = Ba ^((~Be)& Bi ); \
Ame0 = Be ^((~Bi)& Bo ); \
Ami0 = Bi ^((~Bo)& Bu ); \
Amo0 = Bo ^((~Bu)& Ba ); \
Amu0 = Bu ^((~Ba)& Be ); \
\
Ebi0 ^= Di0; \
Ba = ROL32(Ebi0, 31); \
Ego1 ^= Do1; \
Be = ROL32(Ego1, 28); \
Eku1 ^= Du1; \
Bi = ROL32(Eku1, 20); \
Ema1 ^= Da1; \
Bo = ROL32(Ema1, 21); \
Ese0 ^= De0; \
Bu = ROL32(Ese0, 1); \
Asa0 = Ba ^((~Be)& Bi ); \
Ase0 = Be ^((~Bi)& Bo ); \
Asi0 = Bi ^((~Bo)& Bu ); \
Aso0 = Bo ^((~Bu)& Ba ); \
Asu0 = Bu ^((~Ba)& Be ); \
\
Eba1 ^= Da1; \
Ba = Eba1; \
Ege1 ^= De1; \
Be = ROL32(Ege1, 22); \
Eki0 ^= Di0; \
Bi = ROL32(Eki0, 21); \
Emo0 ^= Do0; \
Bo = ROL32(Emo0, 10); \
Esu1 ^= Du1; \
Bu = ROL32(Esu1, 7); \
Aba1 = Ba ^((~Be)& Bi ); \
Aba1 ^= *(pRoundConstants++); \
Abe1 = Be ^((~Bi)& Bo ); \
Abi1 = Bi ^((~Bo)& Bu ); \
Abo1 = Bo ^((~Bu)& Ba ); \
Abu1 = Bu ^((~Ba)& Be ); \
\
Ebo1 ^= Do1; \
Ba = ROL32(Ebo1, 14); \
Egu1 ^= Du1; \
Be = ROL32(Egu1, 10); \
Eka0 ^= Da0; \
Bi = ROL32(Eka0, 1); \
Eme0 ^= De0; \
Bo = ROL32(Eme0, 22); \
Esi0 ^= Di0; \
Bu = ROL32(Esi0, 30); \
Aga1 = Ba ^((~Be)& Bi ); \
Age1 = Be ^((~Bi)& Bo ); \
Agi1 = Bi ^((~Bo)& Bu ); \
Ago1 = Bo ^((~Bu)& Ba ); \
Agu1 = Bu ^((~Ba)& Be ); \
\
Ebe0 ^= De0; \
Ba = Ebe0; \
Egi1 ^= Di1; \
Be = ROL32(Egi1, 3); \
Eko0 ^= Do0; \
Bi = ROL32(Eko0, 12); \
Emu1 ^= Du1; \
Bo = ROL32(Emu1, 4); \
Esa1 ^= Da1; \
Bu = ROL32(Esa1, 9); \
Aka1 = Ba ^((~Be)& Bi ); \
Ake1 = Be ^((~Bi)& Bo ); \
Aki1 = Bi ^((~Bo)& Bu ); \
Ako1 = Bo ^((~Bu)& Ba ); \
Aku1 = Bu ^((~Ba)& Be ); \
\
Ebu0 ^= Du0; \
Ba = ROL32(Ebu0, 13); \
Ega1 ^= Da1; \
Be = ROL32(Ega1, 18); \
Eke1 ^= De1; \
Bi = ROL32(Eke1, 5); \
Emi0 ^= Di0; \
Bo = ROL32(Emi0, 7); \
Eso1 ^= Do1; \
Bu = ROL32(Eso1, 28); \
Ama1 = Ba ^((~Be)& Bi ); \
Ame1 = Be ^((~Bi)& Bo ); \
Ami1 = Bi ^((~Bo)& Bu ); \
Amo1 = Bo ^((~Bu)& Ba ); \
Amu1 = Bu ^((~Ba)& Be ); \
\
Ebi1 ^= Di1; \
Ba = ROL32(Ebi1, 31); \
Ego0 ^= Do0; \
Be = ROL32(Ego0, 27); \
Eku0 ^= Du0; \
Bi = ROL32(Eku0, 19); \
Ema0 ^= Da0; \
Bo = ROL32(Ema0, 20); \
Ese1 ^= De1; \
Bu = ROL32(Ese1, 1); \
Asa1 = Ba ^((~Be)& Bi ); \
Ase1 = Be ^((~Bi)& Bo ); \
Asi1 = Bi ^((~Bo)& Bu ); \
Aso1 = Bo ^((~Bu)& Ba ); \
Asu1 = Bu ^((~Ba)& Be ); \
} \
copyToState(state, A) \
}
#define copyFromState(X, state) \
X##ba0 = state[ 0]; \
X##ba1 = state[ 1]; \
X##be0 = state[ 2]; \
X##be1 = state[ 3]; \
X##bi0 = state[ 4]; \
X##bi1 = state[ 5]; \
X##bo0 = state[ 6]; \
X##bo1 = state[ 7]; \
X##bu0 = state[ 8]; \
X##bu1 = state[ 9]; \
X##ga0 = state[10]; \
X##ga1 = state[11]; \
X##ge0 = state[12]; \
X##ge1 = state[13]; \
X##gi0 = state[14]; \
X##gi1 = state[15]; \
X##go0 = state[16]; \
X##go1 = state[17]; \
X##gu0 = state[18]; \
X##gu1 = state[19]; \
X##ka0 = state[20]; \
X##ka1 = state[21]; \
X##ke0 = state[22]; \
X##ke1 = state[23]; \
X##ki0 = state[24]; \
X##ki1 = state[25]; \
X##ko0 = state[26]; \
X##ko1 = state[27]; \
X##ku0 = state[28]; \
X##ku1 = state[29]; \
X##ma0 = state[30]; \
X##ma1 = state[31]; \
X##me0 = state[32]; \
X##me1 = state[33]; \
X##mi0 = state[34]; \
X##mi1 = state[35]; \
X##mo0 = state[36]; \
X##mo1 = state[37]; \
X##mu0 = state[38]; \
X##mu1 = state[39]; \
X##sa0 = state[40]; \
X##sa1 = state[41]; \
X##se0 = state[42]; \
X##se1 = state[43]; \
X##si0 = state[44]; \
X##si1 = state[45]; \
X##so0 = state[46]; \
X##so1 = state[47]; \
X##su0 = state[48]; \
X##su1 = state[49]; \
#define copyToState(state, X) \
state[ 0] = X##ba0; \
state[ 1] = X##ba1; \
state[ 2] = X##be0; \
state[ 3] = X##be1; \
state[ 4] = X##bi0; \
state[ 5] = X##bi1; \
state[ 6] = X##bo0; \
state[ 7] = X##bo1; \
state[ 8] = X##bu0; \
state[ 9] = X##bu1; \
state[10] = X##ga0; \
state[11] = X##ga1; \
state[12] = X##ge0; \
state[13] = X##ge1; \
state[14] = X##gi0; \
state[15] = X##gi1; \
state[16] = X##go0; \
state[17] = X##go1; \
state[18] = X##gu0; \
state[19] = X##gu1; \
state[20] = X##ka0; \
state[21] = X##ka1; \
state[22] = X##ke0; \
state[23] = X##ke1; \
state[24] = X##ki0; \
state[25] = X##ki1; \
state[26] = X##ko0; \
state[27] = X##ko1; \
state[28] = X##ku0; \
state[29] = X##ku1; \
state[30] = X##ma0; \
state[31] = X##ma1; \
state[32] = X##me0; \
state[33] = X##me1; \
state[34] = X##mi0; \
state[35] = X##mi1; \
state[36] = X##mo0; \
state[37] = X##mo1; \
state[38] = X##mu0; \
state[39] = X##mu1; \
state[40] = X##sa0; \
state[41] = X##sa1; \
state[42] = X##se0; \
state[43] = X##se1; \
state[44] = X##si0; \
state[45] = X##si1; \
state[46] = X##so0; \
state[47] = X##so1; \
state[48] = X##su0; \
state[49] = X##su1; \

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifdef UseSchedule
#if (UseSchedule == 1)
#include "KeccakF-1600-32-s1.macros"
#elif (UseSchedule == 2)
#include "KeccakF-1600-32-s2.macros"
#elif (UseSchedule == 3)
#include "KeccakF-1600-32-rvk.macros"
#else
#error "This schedule is not supported."
#endif
#else
#include "KeccakF-1600-32-s1.macros"
#endif

View File

@ -0,0 +1,728 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
UINT64 Aba, Abe, Abi, Abo, Abu; \
UINT64 Aga, Age, Agi, Ago, Agu; \
UINT64 Aka, Ake, Aki, Ako, Aku; \
UINT64 Ama, Ame, Ami, Amo, Amu; \
UINT64 Asa, Ase, Asi, Aso, Asu; \
UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
UINT64 Bka, Bke, Bki, Bko, Bku; \
UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
UINT64 Ca, Ce, Ci, Co, Cu; \
UINT64 Da, De, Di, Do, Du; \
UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
UINT64 Ega, Ege, Egi, Ego, Egu; \
UINT64 Eka, Eke, Eki, Eko, Eku; \
UINT64 Ema, Eme, Emi, Emo, Emu; \
UINT64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = Aba^Aga^Aka^Ama^Asa; \
Ce = Abe^Age^Ake^Ame^Ase; \
Ci = Abi^Agi^Aki^Ami^Asi; \
Co = Abo^Ago^Ako^Amo^Aso; \
Cu = Abu^Agu^Aku^Amu^Asu; \
#ifdef UseBebigokimisa
/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)| Bbo ); \
Ce = E##be; \
E##bi = Bbi ^( Bbo & Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^( Bbu | Bba ); \
Co = E##bo; \
E##bu = Bbu ^( Bba & Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^( Bgi & Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^( Bgo |(~Bgu)); \
Ci ^= E##gi; \
E##go = Bgo ^( Bgu | Bga ); \
Co ^= E##go; \
E##gu = Bgu ^( Bga & Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^( Bki & Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = (~Bko)^( Bku | Bka ); \
Co ^= E##ko; \
E##ku = Bku ^( Bka & Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^( Bmi | Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)| Bmu ); \
Ci ^= E##mi; \
E##mo = (~Bmo)^( Bmu & Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^( Bma | Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = (~Bse)^( Bsi | Bso ); \
Ce ^= E##se; \
E##si = Bsi ^( Bso & Bsu ); \
Ci ^= E##si; \
E##so = Bso ^( Bsu | Bsa ); \
Co ^= E##so; \
E##su = Bsu ^( Bsa & Bse ); \
Cu ^= E##su; \
\
/* --- Code for round (lane complementing pattern 'bebigokimisa') */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)| Bbo ); \
E##bi = Bbi ^( Bbo & Bbu ); \
E##bo = Bbo ^( Bbu | Bba ); \
E##bu = Bbu ^( Bba & Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
E##ge = Bge ^( Bgi & Bgo ); \
E##gi = Bgi ^( Bgo |(~Bgu)); \
E##go = Bgo ^( Bgu | Bga ); \
E##gu = Bgu ^( Bga & Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
E##ke = Bke ^( Bki & Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = (~Bko)^( Bku | Bka ); \
E##ku = Bku ^( Bka & Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
E##me = Bme ^( Bmi | Bmo ); \
E##mi = Bmi ^((~Bmo)| Bmu ); \
E##mo = (~Bmo)^( Bmu & Bma ); \
E##mu = Bmu ^( Bma | Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = (~Bse)^( Bsi | Bso ); \
E##si = Bsi ^( Bso & Bsu ); \
E##so = Bso ^( Bsu | Bsa ); \
E##su = Bsu ^( Bsa & Bse ); \
\
#else /* UseBebigokimisa */
/* --- Code for round, with prepare-theta */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)& Bbo ); \
Ce = E##be; \
E##bi = Bbi ^((~Bbo)& Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^((~Bbu)& Bba ); \
Co = E##bo; \
E##bu = Bbu ^((~Bba)& Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^((~Bgi)& Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^((~Bgo)& Bgu ); \
Ci ^= E##gi; \
E##go = Bgo ^((~Bgu)& Bga ); \
Co ^= E##go; \
E##gu = Bgu ^((~Bga)& Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^((~Bki)& Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = Bko ^((~Bku)& Bka ); \
Co ^= E##ko; \
E##ku = Bku ^((~Bka)& Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^((~Bmi)& Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)& Bmu ); \
Ci ^= E##mi; \
E##mo = Bmo ^((~Bmu)& Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^((~Bma)& Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = Bse ^((~Bsi)& Bso ); \
Ce ^= E##se; \
E##si = Bsi ^((~Bso)& Bsu ); \
Ci ^= E##si; \
E##so = Bso ^((~Bsu)& Bsa ); \
Co ^= E##so; \
E##su = Bsu ^((~Bsa)& Bse ); \
Cu ^= E##su; \
\
/* --- Code for round */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)& Bbo ); \
E##bi = Bbi ^((~Bbo)& Bbu ); \
E##bo = Bbo ^((~Bbu)& Bba ); \
E##bu = Bbu ^((~Bba)& Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
E##ge = Bge ^((~Bgi)& Bgo ); \
E##gi = Bgi ^((~Bgo)& Bgu ); \
E##go = Bgo ^((~Bgu)& Bga ); \
E##gu = Bgu ^((~Bga)& Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
E##ke = Bke ^((~Bki)& Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = Bko ^((~Bku)& Bka ); \
E##ku = Bku ^((~Bka)& Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
E##me = Bme ^((~Bmi)& Bmo ); \
E##mi = Bmi ^((~Bmo)& Bmu ); \
E##mo = Bmo ^((~Bmu)& Bma ); \
E##mu = Bmu ^((~Bma)& Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = Bse ^((~Bsi)& Bso ); \
E##si = Bsi ^((~Bso)& Bsu ); \
E##so = Bso ^((~Bsu)& Bsa ); \
E##su = Bsu ^((~Bsa)& Bse ); \
\
#endif /* UseBebigokimisa */
static const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]^input[18]; \
X##mu = state[19]^input[19]; \
X##sa = state[20]^input[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromState(X, state) \
X##ba = state[ 0]; \
X##be = state[ 1]; \
X##bi = state[ 2]; \
X##bo = state[ 3]; \
X##bu = state[ 4]; \
X##ga = state[ 5]; \
X##ge = state[ 6]; \
X##gi = state[ 7]; \
X##go = state[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyToState(state, X) \
state[ 0] = X##ba; \
state[ 1] = X##be; \
state[ 2] = X##bi; \
state[ 3] = X##bo; \
state[ 4] = X##bu; \
state[ 5] = X##ga; \
state[ 6] = X##ge; \
state[ 7] = X##gi; \
state[ 8] = X##go; \
state[ 9] = X##gu; \
state[10] = X##ka; \
state[11] = X##ke; \
state[12] = X##ki; \
state[13] = X##ko; \
state[14] = X##ku; \
state[15] = X##ma; \
state[16] = X##me; \
state[17] = X##mi; \
state[18] = X##mo; \
state[19] = X##mu; \
state[20] = X##sa; \
state[21] = X##se; \
state[22] = X##si; \
state[23] = X##so; \
state[24] = X##su; \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View File

@ -0,0 +1,6 @@
#define ProvideFast576
#define ProvideFast832
#define ProvideFast1024
#define ProvideFast1088
#define ProvideFast1152
#define ProvideFast1344

View File

@ -0,0 +1,46 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakPermutationInterface_h_
#define _KeccakPermutationInterface_h_
#include "KeccakF-1600-int-set.h"
static void KeccakInitialize( void );
static void KeccakInitializeState(unsigned char *state);
static void KeccakPermutation(unsigned char *state);
#ifdef ProvideFast576
static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast832
static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1024
static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1088
static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1152
static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1344
static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data);
#endif
static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount);
#ifdef ProvideFast1024
static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data);
#endif
static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount);
#endif

View File

@ -0,0 +1,6 @@
/*
#define Unrolling 2
#define UseBebigokimisa
#define UseInterleaveTables
#define UseSchedule 3
*/

View File

@ -0,0 +1,524 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
/* #include "brg_endian.h" */
#include "KeccakF-1600-opt32-settings.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned short UINT16;
typedef unsigned int UINT32;
/* typedef unsigned long long int UINT64; */
#ifdef UseInterleaveTables
static int interleaveTablesBuilt = 0;
static UINT16 interleaveTable[65536];
static UINT16 deinterleaveTable[65536];
static void buildInterleaveTables()
{
UINT32 i, j;
UINT16 x;
if (!interleaveTablesBuilt) {
for(i=0; i<65536; i++) {
x = 0;
for(j=0; j<16; j++) {
if (i & (1 << j))
x |= (1 << (j/2 + 8*(j%2)));
}
interleaveTable[i] = x;
deinterleaveTable[x] = (UINT16)i;
}
interleaveTablesBuilt = 1;
}
}
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
i##j = interleaveTable[((const UINT16*)source)[j]]; \
((UINT8*)even)[j] ^= i##j & 0xFF; \
((UINT8*)odd)[j] ^= i##j >> 8;
#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
((UINT16*)dest)[j] = d##j;
#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \
*even ^= (i##j & 0xFF) << (j*8); \
*odd ^= ((i##j >> 8) & 0xFF) << (j*8);
#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
dest[2*j] = d##j & 0xFF; \
dest[2*j+1] = d##j >> 8;
#endif /* Endianness */
static void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source)
{
UINT16 i0, i1, i2, i3;
xor2bytesIntoInterleavedWords(even, odd, source, 0)
xor2bytesIntoInterleavedWords(even, odd, source, 1)
xor2bytesIntoInterleavedWords(even, odd, source, 2)
xor2bytesIntoInterleavedWords(even, odd, source, 3)
}
#define xorLanesIntoState(laneCount, state, input) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \
}
static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd)
{
UINT16 d0, d1, d2, d3;
setInterleavedWordsInto2bytes(dest, even, odd, 0)
setInterleavedWordsInto2bytes(dest, even, odd, 1)
setInterleavedWordsInto2bytes(dest, even, odd, 2)
setInterleavedWordsInto2bytes(dest, even, odd, 3)
}
#define extractLanes(laneCount, state, data) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \
}
#else /* No interleaving tables */
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
#define xorInterleavedLE(rateInLanes, state, input) \
{ \
const UINT32 * pI = (const UINT32 *)input; \
UINT32 * pS = state; \
UINT32 t, x0, x1; \
int i; \
for (i = (rateInLanes)-1; i >= 0; --i) \
{ \
x0 = *(pI++); \
t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \
t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \
t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \
t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \
x1 = *(pI++); \
t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \
t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \
t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \
t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \
*(pS++) ^= (UINT16)x0 | (x1 << 16); \
*(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \
} \
}
#define xorLanesIntoState(laneCount, state, input) \
xorInterleavedLE(laneCount, state, input)
#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
UINT64 toInterleaving(UINT64 x)
{
UINT64 t;
t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
return x;
}
static void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source)
{
/* This can be optimized */
UINT64 sourceWord =
(UINT64)source[0]
^ (((UINT64)source[1]) << 8)
^ (((UINT64)source[2]) << 16)
^ (((UINT64)source[3]) << 24)
^ (((UINT64)source[4]) << 32)
^ (((UINT64)source[5]) << 40)
^ (((UINT64)source[6]) << 48)
^ (((UINT64)source[7]) << 56);
UINT64 evenAndOddWord = toInterleaving(sourceWord);
evenAndOdd[0] ^= (UINT32)evenAndOddWord;
evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32);
}
#define xorLanesIntoState(laneCount, state, input) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \
}
#endif /* Endianness */
/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
UINT64 fromInterleaving(UINT64 x)
{
UINT64 t;
t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
return x;
}
static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd);
#else /* (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) */
/* This can be optimized */
UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32);
UINT64 destWord = fromInterleaving(evenAndOddWord);
dest[0] = destWord & 0xFF;
dest[1] = (destWord >> 8) & 0xFF;
dest[2] = (destWord >> 16) & 0xFF;
dest[3] = (destWord >> 24) & 0xFF;
dest[4] = (destWord >> 32) & 0xFF;
dest[5] = (destWord >> 40) & 0xFF;
dest[6] = (destWord >> 48) & 0xFF;
dest[7] = (destWord >> 56) & 0xFF;
#endif /* Endianness */
}
#define extractLanes(laneCount, state, data) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \
}
#endif /* With or without interleaving tables */
#if defined(_MSC_VER)
#define ROL32(a, offset) _rotl(a, offset)
#elif (defined (__arm__) && defined(__ARMCC_VERSION))
#define ROL32(a, offset) __ror(a, 32-(offset))
#else
#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
#endif
#include "KeccakF-1600-unrolling.macros"
#include "KeccakF-1600-32.macros"
#if (UseSchedule == 3)
#ifdef UseBebigokimisa
#error "No lane complementing with schedule 3."
#endif
#if (Unrolling != 2)
#error "Only unrolling 2 is supported by schedule 3."
#endif
static void KeccakPermutationOnWords(UINT32 *state)
{
rounds
}
static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
{
xorLanesIntoState(laneCount, state, input)
rounds
}
#ifdef ProvideFast576
static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(9, state, input)
rounds
}
#endif
#ifdef ProvideFast832
static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(13, state, input)
rounds
}
#endif
#ifdef ProvideFast1024
static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(16, state, input)
rounds
}
#endif
#ifdef ProvideFast1088
static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(17, state, input)
rounds
}
#endif
#ifdef ProvideFast1152
static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(18, state, input)
rounds
}
#endif
#ifdef ProvideFast1344
static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(21, state, input)
rounds
}
#endif
#else /* (Schedule != 3) */
static void KeccakPermutationOnWords(UINT32 *state)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromState(A, state)
rounds
}
static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
{
declareABCDE
unsigned int i;
xorLanesIntoState(laneCount, state, input)
copyFromState(A, state)
rounds
}
#ifdef ProvideFast576
static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(9, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast832
static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(13, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1024
static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(16, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1088
static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(17, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1152
static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(18, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1344
static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(21, state, input)
copyFromState(A, state)
rounds
}
#endif
#endif
static void KeccakInitialize()
{
#ifdef UseInterleaveTables
buildInterleaveTables();
#endif
}
static void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, 200);
#ifdef UseBebigokimisa
((UINT32*)state)[ 2] = ~(UINT32)0;
((UINT32*)state)[ 3] = ~(UINT32)0;
((UINT32*)state)[ 4] = ~(UINT32)0;
((UINT32*)state)[ 5] = ~(UINT32)0;
((UINT32*)state)[16] = ~(UINT32)0;
((UINT32*)state)[17] = ~(UINT32)0;
((UINT32*)state)[24] = ~(UINT32)0;
((UINT32*)state)[25] = ~(UINT32)0;
((UINT32*)state)[34] = ~(UINT32)0;
((UINT32*)state)[35] = ~(UINT32)0;
((UINT32*)state)[40] = ~(UINT32)0;
((UINT32*)state)[41] = ~(UINT32)0;
#endif
}
static void KeccakPermutation(unsigned char *state)
{
/* We assume the state is always stored as interleaved 32-bit words */
KeccakPermutationOnWords((UINT32*)state);
}
#ifdef ProvideFast576
static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast832
static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1024
static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1088
static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1152
static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1344
static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data);
}
#endif
static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount);
}
#ifdef ProvideFast1024
static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
extractLanes(16, state, data)
#ifdef UseBebigokimisa
((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
((UINT32*)data)[16] = ~((UINT32*)data)[16];
((UINT32*)data)[17] = ~((UINT32*)data)[17];
((UINT32*)data)[24] = ~((UINT32*)data)[24];
((UINT32*)data)[25] = ~((UINT32*)data)[25];
#endif
}
#endif
static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
extractLanes(laneCount, state, data)
#ifdef UseBebigokimisa
if (laneCount > 1) {
((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
if (laneCount > 2) {
((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
if (laneCount > 8) {
((UINT32*)data)[16] = ~((UINT32*)data)[16];
((UINT32*)data)[17] = ~((UINT32*)data)[17];
if (laneCount > 12) {
((UINT32*)data)[24] = ~((UINT32*)data)[24];
((UINT32*)data)[25] = ~((UINT32*)data)[25];
if (laneCount > 17) {
((UINT32*)data)[34] = ~((UINT32*)data)[34];
((UINT32*)data)[35] = ~((UINT32*)data)[35];
if (laneCount > 20) {
((UINT32*)data)[40] = ~((UINT32*)data)[40];
((UINT32*)data)[41] = ~((UINT32*)data)[41];
}
}
}
}
}
}
#endif
}

View File

@ -0,0 +1,9 @@
/*
#define Unrolling 24
#define UseBebigokimisa
#define UseSSE
#define UseOnlySIMD64
#define UseMMX
#define UseSHLD
#define UseXOP
*/

View File

@ -0,0 +1,508 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
/* #include "brg_endian.h" */
#include "KeccakF-1600-opt64-settings.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
/* typedef unsigned long long int UINT64; */
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
#if defined(UseSSE)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
typedef union {
V128 v128;
UINT64 v64[2];
} V6464;
#define ANDnu64(a, b) _mm_andnot_si128(a, b)
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ZERO128() _mm_setzero_si128()
#ifdef UseOnlySIMD64
#include "KeccakF-1600-simd64.macros"
#else
ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
#include "KeccakF-1600-simd128.macros"
#endif
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseSSE"
#endif
#elif defined(UseXOP)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define ZERO128() _mm_setzero_si128()
#define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
#define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ROL6464same(a, o) _mm_roti_epi64(a, o)
#define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
ALIGN const UINT64 rot_0_20[2] = { 0, 20};
ALIGN const UINT64 rot_44_3[2] = {44, 3};
ALIGN const UINT64 rot_43_45[2] = {43, 45};
ALIGN const UINT64 rot_21_61[2] = {21, 61};
ALIGN const UINT64 rot_14_28[2] = {14, 28};
ALIGN const UINT64 rot_1_36[2] = { 1, 36};
ALIGN const UINT64 rot_6_10[2] = { 6, 10};
ALIGN const UINT64 rot_25_15[2] = {25, 15};
ALIGN const UINT64 rot_8_56[2] = { 8, 56};
ALIGN const UINT64 rot_18_27[2] = {18, 27};
ALIGN const UINT64 rot_62_55[2] = {62, 55};
ALIGN const UINT64 rot_39_41[2] = {39, 41};
#if defined(UseSimulatedXOP)
/* For debugging purposes, when XOP is not available */
#undef ROL6464
#undef ROL6464same
#define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
V128 ROL6464(V128 a, int r0, int r1)
{
V128 a0 = ROL64(a, r0);
V128 a1 = COPY64HI2LO(ROL64(a, r1));
return GET64LOLO(a0, a1);
}
#endif
#include "KeccakF-1600-xop.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseXOP"
#endif
#elif defined(UseMMX)
#include <mmintrin.h>
typedef __m64 V64;
#define ANDnu64(a, b) _mm_andnot_si64(a, b)
#if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
#define LOAD64(a) *(V64*)&(a)
#define CONST64(a) *(V64*)&(a)
#define STORE64(a, b) *(V64*)&(a) = b
#else
#define LOAD64(a) (V64)a
#define CONST64(a) (V64)a
#define STORE64(a, b) a = (UINT64)b
#endif
#define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
#define XOR64(a, b) _mm_xor_si64(a, b)
#define XOReq64(a, b) a = _mm_xor_si64(a, b)
#include "KeccakF-1600-simd64.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseMMX"
#endif
#else
#if defined(_MSC_VER)
#define ROL64(a, offset) _rotl64(a, offset)
#elif defined(UseSHLD)
#define ROL64(x,N) ({ \
register UINT64 __out; \
register UINT64 __in = x; \
__asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
__out; \
})
#else
#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
#endif
#include "KeccakF-1600-64.macros"
#endif
#include "KeccakF-1600-unrolling.macros"
static void KeccakPermutationOnWords(UINT64 *state)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
unsigned int j;
for(j=0; j<laneCount; j++)
state[j] ^= input[j];
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#ifdef ProvideFast576
static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor576bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast832
static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor832bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1024
static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1024bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1088
static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1088bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1152
static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1152bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1344
static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1344bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
static void KeccakInitialize()
{
}
static void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, 200);
#ifdef UseBebigokimisa
((UINT64*)state)[ 1] = ~(UINT64)0;
((UINT64*)state)[ 2] = ~(UINT64)0;
((UINT64*)state)[ 8] = ~(UINT64)0;
((UINT64*)state)[12] = ~(UINT64)0;
((UINT64*)state)[17] = ~(UINT64)0;
((UINT64*)state)[20] = ~(UINT64)0;
#endif
}
static void KeccakPermutation(unsigned char *state)
{
/* We assume the state is always stored as words */
KeccakPermutationOnWords((UINT64*)state);
}
/*
static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
{
unsigned int i;
*word = 0;
for(i=0; i<(64/8); i++)
*word |= (UINT64)(bytes[i]) << (8*i);
}
*/
#ifdef ProvideFast576
static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[9];
unsigned int i;
for(i=0; i<9; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast832
static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[13];
unsigned int i;
for(i=0; i<13; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1024
static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[16];
unsigned int i;
for(i=0; i<16; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1088
static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[17];
unsigned int i;
for(i=0; i<17; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1152
static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[18];
unsigned int i;
for(i=0; i<18; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1344
static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[21];
unsigned int i;
for(i=0; i<21; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
#endif
}
#endif
static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
#else
UINT64 dataAsWords[25];
unsigned int i;
for(i=0; i<laneCount; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
#endif
}
/*
static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
{
unsigned int i;
for(i=0; i<(64/8); i++)
bytes[i] = (word >> (8*i)) & 0xFF;
}
*/
#ifdef ProvideFast1024
static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, 128);
#else
unsigned int i;
for(i=0; i<16; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
((UINT64*)data)[12] = ~((UINT64*)data)[12];
#endif
}
#endif
static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, laneCount*8);
#else
unsigned int i;
for(i=0; i<laneCount; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
if (laneCount > 1) {
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
if (laneCount > 2) {
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
if (laneCount > 8) {
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
if (laneCount > 12) {
((UINT64*)data)[12] = ~((UINT64*)data)[12];
if (laneCount > 17) {
((UINT64*)data)[17] = ~((UINT64*)data)[17];
if (laneCount > 20) {
((UINT64*)data)[20] = ~((UINT64*)data)[20];
}
}
}
}
}
}
#endif
}

View File

@ -0,0 +1,651 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V6464 Abage, Abegi, Abigo, Abogu, Abuga; \
V6464 Akame, Akemi, Akimo, Akomu, Akuma; \
V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
V128 Zero;
#define prepareTheta
#define computeD \
Cua = GET64LOLO(Cu, Cae); \
Dei = XOR128(Cae, ROL64in128(Cio, 1)); \
Dou = XOR128(Cio, ROL64in128(Cua, 1)); \
Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \
De = Dei; \
Di = COPY64HI2LO(Dei); \
Do = Dou; \
Du = COPY64HI2LO(Dou);
/* --- Theta Rho Pi Chi Iota Prepare-theta */
/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
computeD \
\
A##ba = LOAD64(A##bage.v64[0]); \
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
Bbage = GET64LOLO(Bba, Bge); \
A##ge = LOAD64(A##bage.v64[1]); \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
A##ka = LOAD64(A##kame.v64[0]); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
Bbegi = GET64LOLO(Bbe, Bgi); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
A##me = LOAD64(A##kame.v64[1]); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
Bbigo = GET64LOLO(Bbi, Bgo); \
E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \
Cae = E##bage.v128; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
Bbogu = GET64LOLO(Bbo, Bgu); \
E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
Cei = E##begi.v128; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
Bbuga = GET64LOLO(Bbu, Bga); \
E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
E##bi = E##bigo.v128; \
E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \
Cio = E##bigo.v128; \
E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
E##bo = E##bogu.v128; \
E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \
Cou = E##bogu.v128; \
E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
E##bu = E##buga.v128; \
E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \
Cua = E##buga.v128; \
\
A##be = LOAD64(A##begi.v64[0]); \
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
Bkame = GET64LOLO(Bka, Bme); \
A##gi = LOAD64(A##begi.v64[1]); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
A##ke = LOAD64(A##kemi.v64[0]); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
Bkemi = GET64LOLO(Bke, Bmi); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
A##mi = LOAD64(A##kemi.v64[1]); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
Bkimo = GET64LOLO(Bki, Bmo); \
E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
XOReq128(Cae, E##kame.v128); \
Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \
Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \
E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
XOReq128(Cei, E##kemi.v128); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
Bkuma = GET64LOLO(Bku, Bma); \
E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
E##ki = E##kimo.v128; \
E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \
XOReq128(Cio, E##kimo.v128); \
E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
E##ko = E##komu.v128; \
E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \
XOReq128(Cou, E##komu.v128); \
E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
E##ku = E##kuma.v128; \
E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \
XOReq128(Cua, E##kuma.v128); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
Ca = E##sa; \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
Ce = E##se; \
XOReq128(Cae, GET64LOLO(Ca, Ce)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
Ci = E##si; \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
Co = E##so; \
XOReq128(Cio, GET64LOLO(Ci, Co)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
Cu = E##su; \
\
Zero = ZERO128(); \
XOReq128(Cae, GET64HIHI(Cua, Zero)); \
XOReq128(Cae, GET64LOLO(Zero, Cei)); \
XOReq128(Cio, GET64HIHI(Cei, Zero)); \
XOReq128(Cio, GET64LOLO(Zero, Cou)); \
XOReq128(Cua, GET64HIHI(Cou, Zero)); \
XOReq64(Cu, Cua); \
/* --- Theta Rho Pi Chi Iota */
/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
static const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor832bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
XOReq64(Cu, X##mu); \
X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromState(X, state) \
X##bae.v128 = LOAD128(state[ 0]); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = LOAD128(state[ 2]); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = LOAD64(state[ 4]); \
Cu = X##bu; \
X##gae.v128 = LOAD128u(state[ 5]); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = LOAD128u(state[ 7]); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyToState(state, X) \
state[ 0] = A##bage.v64[0]; \
state[ 1] = A##begi.v64[0]; \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
state[ 6] = A##bage.v64[1]; \
state[ 7] = A##begi.v64[1]; \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
state[10] = X##kame.v64[0]; \
state[11] = X##kemi.v64[0]; \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
state[16] = X##kame.v64[1]; \
state[17] = X##kemi.v64[1]; \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##bage = Y##bage; \
X##begi = Y##begi; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##go = Y##go; \
X##gu = Y##gu; \
X##kame = Y##kame; \
X##kemi = Y##kemi; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View File

@ -0,0 +1,517 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \
Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \
Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \
Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \
Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \
/* --- Code for round, with prepare-theta */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
Ca = E##ba; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
Ce = E##be; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
Ci = E##bi; \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
Co = E##bo; \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
Cu = E##bu; \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(Ca, E##ga); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(Ce, E##ge); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
XOReq64(Ci, E##gi); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
XOReq64(Co, E##go); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
XOReq64(Cu, E##gu); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(Ca, E##ka); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(Ce, E##ke); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
XOReq64(Ci, E##ki); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
XOReq64(Co, E##ko); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
XOReq64(Cu, E##ku); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(Ca, E##ma); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(Ce, E##me); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
XOReq64(Ci, E##mi); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
XOReq64(Co, E##mo); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
XOReq64(Cu, E##mu); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(Ca, E##sa); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(Ce, E##se); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
XOReq64(Ci, E##si); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
XOReq64(Co, E##so); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
XOReq64(Cu, E##su); \
\
/* --- Code for round */
/* --- 64-bit lanes mapped to 64-bit words */
#define thetaRhoPiChiIota(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
\
static const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromState(X, state) \
X##ba = LOAD64(state[ 0]); \
X##be = LOAD64(state[ 1]); \
X##bi = LOAD64(state[ 2]); \
X##bo = LOAD64(state[ 3]); \
X##bu = LOAD64(state[ 4]); \
X##ga = LOAD64(state[ 5]); \
X##ge = LOAD64(state[ 6]); \
X##gi = LOAD64(state[ 7]); \
X##go = LOAD64(state[ 8]); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyToState(state, X) \
STORE64(state[ 0], X##ba); \
STORE64(state[ 1], X##be); \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
STORE64(state[ 6], X##ge); \
STORE64(state[ 7], X##gi); \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
STORE64(state[10], X##ka); \
STORE64(state[11], X##ke); \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
STORE64(state[16], X##me); \
STORE64(state[17], X##mi); \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View File

@ -0,0 +1,124 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#if (Unrolling == 24)
#define rounds \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
copyToState(state, A)
#elif (Unrolling == 12)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 8)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=8) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 6)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 4)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 3)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#elif (Unrolling == 2)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 1)
#define rounds \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#else
#error "Unrolling is not correctly specified!"
#endif

View File

@ -0,0 +1,573 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V128 Abage, Abegi, Abigo, Abogu, Abuga; \
V128 Akame, Akemi, Akimo, Akomu, Akuma; \
V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V128 Asase, Asiso; \
V64 Asu; \
V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \
V128 Cae, Cei, Cio, Cou, Cua; \
V128 Dau, Dea, Die, Doi, Duo; \
V128 Dua, Dae, Dei, Dio, Dou; \
V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
V128 Esase, Esiso; \
V64 Esu; \
V128 Zero;
#define prepareTheta
#define computeD \
Cua = GET64LOLO(Cua, Cae); \
Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
Dou = XOR128(Cio, ROL6464same(Cua, 1)); \
Cei = GET64HILO(Cae, Cio); \
Dae = XOR128(Cua, ROL6464same(Cei, 1)); \
Dau = GET64LOHI(Dae, Dou); \
Dea = SWAP64(Dae); \
Die = SWAP64(Dei); \
Doi = GET64LOLO(Dou, Die); \
Duo = SWAP64(Dou);
/* --- Theta Rho Pi Chi Iota Prepare-theta */
/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
computeD \
\
Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \
Bbage = ROL6464(Bbage, 0, 20); \
Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \
Bbegi = ROL6464(Bbegi, 44, 3); \
Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \
Bbigo = ROL6464(Bbigo, 43, 45); \
E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \
Cae = E##bage; \
Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \
Bbogu = ROL6464(Bbogu, 21, 61); \
E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
Cei = E##begi; \
Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \
Bbuga = ROL6464(Bbuga, 14, 28); \
E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
Cio = E##bigo; \
E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
Cou = E##bogu; \
E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
Cua = E##buga; \
\
Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \
Bkame = ROL6464(Bkame, 1, 36); \
Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \
Bkemi = ROL6464(Bkemi, 6, 10); \
Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \
Bkimo = ROL6464(Bkimo, 25, 15); \
E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
XOReq128(Cae, E##kame); \
Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \
Bkomu = ROL6464(Bkomu, 8, 56); \
E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
XOReq128(Cei, E##kemi); \
Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \
Bkuma = ROL6464(Bkuma, 18, 27); \
E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
XOReq128(Cio, E##kimo); \
E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
XOReq128(Cou, E##komu); \
E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
XOReq128(Cua, E##kuma); \
\
Bsase = XOR128(A##bigo, SWAP64(Doi)); \
Bsase = ROL6464(Bsase, 62, 55); \
Bsiso = XOR128(A##kuma, SWAP64(Dau)); \
Bsiso = ROL6464(Bsiso, 39, 41); \
Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \
Bsusa = ROL6464same(Bsusa, 2); \
Bsusa = GET64LOLO(Bsusa, Bsase); \
Bsesi = GET64HILO(Bsase, Bsiso); \
Bsosu = GET64HILO(Bsiso, Bsusa); \
E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \
XOReq128(Cae, E##sase); \
E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \
XOReq128(Cio, E##siso); \
E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \
XOReq128(Cua, E##su); \
\
Zero = ZERO128(); \
XOReq128(Cae, GET64HIHI(Cua, Zero)); \
XOReq128(Cae, GET64LOLO(Zero, Cei)); \
XOReq128(Cio, GET64HIHI(Cei, Zero)); \
XOReq128(Cio, GET64LOLO(Zero, Cou)); \
XOReq128(Cua, GET64HIHI(Cou, Zero)); \
/* --- Theta Rho Pi Chi Iota */
/* --- 64-bit lanes mapped to 64-bit and 128-bit words */
#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
static const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = LOAD64(state[ 9]); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = LOAD128(state[10]); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = LOAD128(state[12]); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor832bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromState(X, state) \
X##bae = LOAD128(state[ 0]); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = LOAD128(state[ 2]); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = LOAD64(state[ 4]); \
Cua = X##bu; \
X##gae = LOAD128u(state[ 5]); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = LOAD128u(state[ 7]); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = LOAD64(state[ 9]); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = LOAD128(state[10]); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = LOAD128(state[12]); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyToState(state, X) \
STORE64(state[ 0], X##bage); \
STORE64(state[ 1], X##begi); \
STORE64(state[ 2], X##bigo); \
STORE64(state[ 3], X##bogu); \
STORE128(state[ 4], X##buga); \
STORE64(state[ 6], COPY64HI2LO(X##bage)); \
STORE64(state[ 7], COPY64HI2LO(X##begi)); \
STORE64(state[ 8], COPY64HI2LO(X##bigo)); \
STORE64(state[ 9], COPY64HI2LO(X##bogu)); \
STORE64(state[10], X##kame); \
STORE64(state[11], X##kemi); \
STORE64(state[12], X##kimo); \
STORE64(state[13], X##komu); \
STORE128(state[14], X##kuma); \
STORE64(state[16], COPY64HI2LO(X##kame)); \
STORE64(state[17], COPY64HI2LO(X##kemi)); \
STORE64(state[18], COPY64HI2LO(X##kimo)); \
STORE64(state[19], COPY64HI2LO(X##komu)); \
STORE128(state[20], X##sase); \
STORE128(state[22], X##siso); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##bage = Y##bage; \
X##begi = Y##begi; \
X##bigo = Y##bigo; \
X##bogu = Y##bogu; \
X##buga = Y##buga; \
X##kame = Y##kame; \
X##kemi = Y##kemi; \
X##kimo = Y##kimo; \
X##komu = Y##komu; \
X##kuma = Y##kuma; \
X##sase = Y##sase; \
X##siso = Y##siso; \
X##su = Y##su; \

View File

@ -0,0 +1,83 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakNISTInterface.h"
#include "KeccakF-1600-interface.h"
static HashReturn Init(hashState *state, int hashbitlen)
{
switch(hashbitlen) {
case 0: /* Default parameters, arbitrary length output */
InitSponge((spongeState*)state, 1024, 576);
break;
case 224:
InitSponge((spongeState*)state, 1152, 448);
break;
case 256:
InitSponge((spongeState*)state, 1088, 512);
break;
case 384:
InitSponge((spongeState*)state, 832, 768);
break;
case 512:
InitSponge((spongeState*)state, 576, 1024);
break;
default:
return BAD_HASHLEN;
}
state->fixedOutputLength = hashbitlen;
return SUCCESS;
}
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
{
if ((databitlen % 8) == 0)
return Absorb((spongeState*)state, data, databitlen);
else {
HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8));
if (ret == SUCCESS) {
unsigned char lastByte;
/* Align the last partial byte to the least significant bits */
lastByte = data[databitlen/8] >> (8 - (databitlen % 8));
return Absorb((spongeState*)state, &lastByte, databitlen % 8);
}
else
return ret;
}
}
static HashReturn Final(hashState *state, BitSequence *hashval)
{
return Squeeze(state, hashval, state->fixedOutputLength);
}
/*
static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{
hashState state;
HashReturn result;
if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512))
return BAD_HASHLEN; * Only the four fixed output lengths available through this API *
result = Init(&state, hashbitlen);
if (result != SUCCESS)
return result;
result = Update(&state, data, databitlen);
if (result != SUCCESS)
return result;
result = Final(&state, hashval);
return result;
}
*/

View File

@ -0,0 +1,72 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakNISTInterface_h_
#define _KeccakNISTInterface_h_
#include "KeccakSponge.h"
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
typedef spongeState hashState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param state Pointer to the state of the sponge function to be initialized.
* @param hashbitlen The desired number of output bits,
* or 0 for Keccak[] with default parameters
* and arbitrarily-long output.
* @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
static HashReturn Init(hashState *state, int hashbitlen);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
/**
* Function to squeeze output data from the sponge function.
* If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param hashval Pointer to the buffer where to store the output data.
* @return SUCCESS if successful, FAIL otherwise.
*/
static HashReturn Final(hashState *state, BitSequence *hashval);
/**
* Function to compute a hash using the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param hashbitlen The desired number of output bits.
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @param hashval Pointer to the buffer where to store the output data.
* @pre The value of hashbitlen must be one of 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
/*
static HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
*/
#endif

View File

@ -0,0 +1,266 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakSponge.h"
#include "KeccakF-1600-interface.h"
#ifdef KeccakReference
#include "displayIntermediateValues.h"
#endif
static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity)
{
if (rate+capacity != 1600)
return 1;
if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0))
return 1;
KeccakInitialize();
state->rate = rate;
state->capacity = capacity;
state->fixedOutputLength = 0;
KeccakInitializeState(state->state);
memset(state->dataQueue, 0, KeccakMaximumRateInBytes);
state->bitsInQueue = 0;
state->squeezing = 0;
state->bitsAvailableForSqueezing = 0;
return 0;
}
static void AbsorbQueue(spongeState *state)
{
/* state->bitsInQueue is assumed to be equal to state->rate */
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8);
#endif
#ifdef ProvideFast576
if (state->rate == 576)
KeccakAbsorb576bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast832
if (state->rate == 832)
KeccakAbsorb832bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024)
KeccakAbsorb1024bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088)
KeccakAbsorb1088bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152)
KeccakAbsorb1152bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344)
KeccakAbsorb1344bits(state->state, state->dataQueue);
else
#endif
KeccakAbsorb(state->state, state->dataQueue, state->rate/64);
state->bitsInQueue = 0;
}
static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen)
{
unsigned long long i, j, wholeBlocks;
unsigned int partialBlock, partialByte;
const unsigned char *curData;
if ((state->bitsInQueue % 8) != 0)
return 1; /* Only the last call may contain a partial byte */
if (state->squeezing)
return 1; /* Too late for additional input */
i = 0;
while(i < databitlen) {
if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) {
wholeBlocks = (databitlen-i)/state->rate;
curData = data+i/8;
#ifdef ProvideFast576
if (state->rate == 576) {
for(j=0; j<wholeBlocks; j++, curData+=576/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb576bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast832
if (state->rate == 832) {
for(j=0; j<wholeBlocks; j++, curData+=832/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb832bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
for(j=0; j<wholeBlocks; j++, curData+=1024/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1024bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088) {
for(j=0; j<wholeBlocks; j++, curData+=1088/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1088bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152) {
for(j=0; j<wholeBlocks; j++, curData+=1152/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1152bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344) {
for(j=0; j<wholeBlocks; j++, curData+=1344/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1344bits(state->state, curData);
}
}
else
#endif
{
for(j=0; j<wholeBlocks; j++, curData+=state->rate/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb(state->state, curData, state->rate/64);
}
}
i += wholeBlocks*state->rate;
}
else {
partialBlock = (unsigned int)(databitlen - i);
if (partialBlock+state->bitsInQueue > state->rate)
partialBlock = state->rate-state->bitsInQueue;
partialByte = partialBlock % 8;
partialBlock -= partialByte;
memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8);
state->bitsInQueue += partialBlock;
i += partialBlock;
if (state->bitsInQueue == state->rate)
AbsorbQueue(state);
if (partialByte > 0) {
unsigned char mask = (1 << partialByte)-1;
state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask;
state->bitsInQueue += partialByte;
i += partialByte;
}
}
}
return 0;
}
static void PadAndSwitchToSqueezingPhase(spongeState *state)
{
/* Note: the bits are numbered from 0=LSB to 7=MSB */
if (state->bitsInQueue + 1 == state->rate) {
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
AbsorbQueue(state);
memset(state->dataQueue, 0, state->rate/8);
}
else {
memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8);
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
}
state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8);
AbsorbQueue(state);
#ifdef KeccakReference
displayText(1, "--- Switching to squeezing phase ---");
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
state->squeezing = 1;
}
static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength)
{
unsigned long long i;
unsigned int partialBlock;
if (!state->squeezing)
PadAndSwitchToSqueezingPhase(state);
if ((outputLength % 8) != 0)
return 1; /* Only multiple of 8 bits are allowed, truncation can be done at user level */
i = 0;
while(i < outputLength) {
if (state->bitsAvailableForSqueezing == 0) {
KeccakPermutation(state->state);
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
}
partialBlock = state->bitsAvailableForSqueezing;
if ((unsigned long long)partialBlock > outputLength - i)
partialBlock = (unsigned int)(outputLength - i);
memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8);
state->bitsAvailableForSqueezing -= partialBlock;
i += partialBlock;
}
return 0;
}

View File

@ -0,0 +1,76 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakSponge_h_
#define _KeccakSponge_h_
#define KeccakPermutationSize 1600
#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
#define KeccakMaximumRate 1536
#define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
ALIGN typedef struct spongeStateStruct {
ALIGN unsigned char state[KeccakPermutationSizeInBytes];
ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
unsigned int rate;
unsigned int capacity;
unsigned int bitsInQueue;
unsigned int fixedOutputLength;
int squeezing;
unsigned int bitsAvailableForSqueezing;
} spongeState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The sponge function is set to the absorbing phase.
* @param state Pointer to the state of the sponge function to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation.
* @return Zero if successful, 1 otherwise.
*/
static int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the least significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @pre The sponge function must be in the absorbing phase,
* i.e., Squeeze() must not have been called before.
* @return Zero if successful, 1 otherwise.
*/
static int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen);
/**
* Function to squeeze output data from the sponge function.
* If the sponge function was in the absorbing phase, this function
* switches it to the squeezing phase.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param output Pointer to the buffer where to store the output data.
* @param outputLength The number of output bits desired.
* It must be a multiple of 8.
* @return Zero if successful, 1 otherwise.
*/
static int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength);
#endif

View File

569
Modules/_sha3/sha3module.c Normal file
View File

@ -0,0 +1,569 @@
/* SHA3 module
*
* This module provides an interface to the SHA3 algorithm
*
* See below for information about the original code this module was
* based upon. Additional work performed by:
*
* Andrew Kuchling (amk@amk.ca)
* Greg Stein (gstein@lyra.org)
* Trevor Perrin (trevp@trevp.net)
* Gregory P. Smith (greg@krypto.org)
*
* Copyright (C) 2012 Christian Heimes (christian@python.org)
* Licensed to PSF under a Contributor Agreement.
*
*/
#include "Python.h"
#include "../hashlib.h"
/* **************************************************************************
* SHA-3 (Keccak)
*
* The code is based on KeccakReferenceAndOptimized-3.2.zip from 29 May 2012.
*
* The reference implementation is altered in this points:
* - C++ comments are converted to ANSI C comments.
* - All functions and globals are declared static.
* - The typedef for UINT64 is commented out.
* - brg_endian.h is removed.
* - KeccakF-1600-opt[32|64]-settings.h are commented out
* - Some unused functions are commented out to silence compiler warnings.
*
* In order to avoid name clashes with other software I have to declare all
* Keccak functions and global data as static. The C code is directly
* included into this file in order to access the static functions.
*
* Keccak can be tuned with several paramenters. I try to explain all options
* as far as I understand them. The reference implementation also contains
* assembler code for ARM platforms (NEON instructions).
*
* Common
* ======
*
* Options:
* UseBebigokimisa, Unrolling
*
* - Unrolling: loop unrolling (24, 12, 8, 6, 4, 3, 2, 1)
* - UseBebigokimisa: lane complementing
*
* 64bit platforms
* ===============
*
* Additional options:
* UseSSE, UseOnlySIMD64, UseMMX, UseXOP, UseSHLD
*
* Optimized instructions (disabled by default):
* - UseSSE: use Stream SIMD extensions
* o UseOnlySIMD64: limit to 64bit instructions, otherwise 128bit
* o w/o UseOnlySIMD64: requires compiler agument -mssse3 or -mtune
* - UseMMX: use 64bit MMX instructions
* - UseXOP: use AMD's eXtended Operations (128bit SSE extension)
*
* Other:
* - Unrolling: default 24
* - UseBebigokimisa: default 1
*
* When neither UseSSE, UseMMX nor UseXOP is configured, ROL64 (rotate left
* 64) is implemented as:
* - Windows: _rotl64()
* - UseSHLD: use shld (shift left) asm optimization
* - otherwise: shift and xor
*
* UseBebigokimisa can't be used in combination with UseSSE, UseMMX or
* UseXOP. UseOnlySIMD64 has no effect unless UseSSE is specified.
*
* Tests have shown that UseSSE + UseOnlySIMD64 is about three to four
* times SLOWER than UseBebigokimisa. UseSSE and UseMMX are about two times
* slower. (tested by CH and AP)
*
* 32bit platforms
* ===============
*
* Additional options:
* UseInterleaveTables, UseSchedule
*
* - Unrolling: default 2
* - UseBebigokimisa: default n/a
* - UseSchedule: ???, (1, 2, 3; default 3)
* - UseInterleaveTables: use two 64k lookup tables for (de)interleaving
* default: n/a
*
* schedules:
* - 3: no UseBebigokimisa, Unrolling must be 2
* - 2 + 1: ???
*
* *************************************************************************/
#if SIZEOF_VOID_P == 8 && defined(PY_UINT64_T)
/* 64bit platforms with unsigned int64 */
#define KeccakImplementation 64
#define Unrolling 24
#define UseBebigokimisa
typedef PY_UINT64_T UINT64;
#elif SIZEOF_VOID_P == 4 && defined(PY_UINT64_T)
/* 32bit platforms with unsigned int64 */
#define KeccakImplementation 32
#define Unrolling 2
#define UseSchedule 3
typedef PY_UINT64_T UINT64;
#else
/* 32 or 64bit platforms without unsigned int64 */
#warning no uint64_t available, force Keccak opt32 with interleave tables
#define KeccakImplementation 32
#define Unrolling 2
#define UseSchedule 3
#define UseInterleaveTables
#endif
/* replacement for brg_endian.h */
#define IS_BIG_ENDIAN BIG_ENDIAN
#define IS_LITTLE_ENDIAN LITTLE_ENDIAN
#define PLATFORM_BYTE_ORDER BYTE_ORDER
/* inline all Keccak dependencies */
#include "keccak/KeccakNISTInterface.h"
#include "keccak/KeccakNISTInterface.c"
#include "keccak/KeccakSponge.c"
#if KeccakImplementation == 64
#include "keccak/KeccakF-1600-opt64.c"
#elif KeccakImplementation == 32
#include "keccak/KeccakF-1600-opt32.c"
#endif
#define SHA3_BLOCKSIZE 200 /* 1600 bits */
#define SHA3_MAX_DIGESTSIZE 64 /* 512 bits */
#define SHA3_state hashState
#define SHA3_init Init
#define SHA3_process Update
#define SHA3_done Final
#define SHA3_copystate(dest, src) memcpy(&(dest), &(src), sizeof(SHA3_state))
#define SHA3_clearstate(state) memset(&(state), 0, sizeof(SHA3_state))
/* The structure for storing SHA3 info */
typedef struct {
PyObject_HEAD
int hashbitlen;
SHA3_state hash_state;
#ifdef WITH_THREAD
PyThread_type_lock lock;
#endif
} SHA3object;
static PyTypeObject SHA3type;
static SHA3object *
newSHA3object(int hashbitlen)
{
SHA3object *newobj;
/* check hashbitlen */
switch(hashbitlen) {
/* supported hash length */
case 224:
break;
case 256:
break;
case 384:
break;
case 512:
break;
case 0:
/* arbitrarily-long output isn't supported by this module */
default:
/* everything else is an error */
PyErr_SetString(PyExc_ValueError,
"hashbitlen must be one of 224, 256, 384 or 512.");
return NULL;
}
newobj = (SHA3object *)PyObject_New(SHA3object, &SHA3type);
if (newobj == NULL) {
return NULL;
}
newobj->hashbitlen = hashbitlen;
#ifdef WITH_THREAD
newobj->lock = NULL;
#endif
return newobj;
}
/* Internal methods for a hash object */
static void
SHA3_dealloc(SHA3object *self)
{
SHA3_clearstate(self->hash_state);
#ifdef WITH_THREAD
if (self->lock) {
PyThread_free_lock(self->lock);
}
#endif
PyObject_Del(self);
}
/* External methods for a hash object */
PyDoc_STRVAR(SHA3_copy__doc__, "Return a copy of the hash object.");
static PyObject *
SHA3_copy(SHA3object *self, PyObject *unused)
{
SHA3object *newobj;
if ((newobj = newSHA3object(self->hashbitlen)) == NULL) {
return NULL;
}
ENTER_HASHLIB(self);
SHA3_copystate(newobj->hash_state, self->hash_state);
LEAVE_HASHLIB(self);
return (PyObject *)newobj;
}
PyDoc_STRVAR(SHA3_digest__doc__,
"Return the digest value as a string of binary data.");
static PyObject *
SHA3_digest(SHA3object *self, PyObject *unused)
{
unsigned char digest[SHA3_MAX_DIGESTSIZE];
SHA3_state temp;
HashReturn res;
ENTER_HASHLIB(self);
SHA3_copystate(temp, self->hash_state);
LEAVE_HASHLIB(self);
res = SHA3_done(&temp, digest);
SHA3_clearstate(temp);
if (res != SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()");
return NULL;
}
return PyBytes_FromStringAndSize((const char *)digest,
self->hashbitlen / 8);
}
PyDoc_STRVAR(SHA3_hexdigest__doc__,
"Return the digest value as a string of hexadecimal digits.");
static PyObject *
SHA3_hexdigest(SHA3object *self, PyObject *unused)
{
unsigned char digest[SHA3_MAX_DIGESTSIZE];
SHA3_state temp;
HashReturn res;
PyObject *retval;
Py_UCS1 *hex_digest;
int digestlen, i, j;
/* Get the raw (binary) digest value */
ENTER_HASHLIB(self);
SHA3_copystate(temp, self->hash_state);
LEAVE_HASHLIB(self);
res = SHA3_done(&temp, digest);
SHA3_clearstate(temp);
if (res != SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Final()");
return NULL;
}
/* Create a new string */
digestlen = self->hashbitlen / 8;
retval = PyUnicode_New(digestlen * 2, 127);
if (!retval)
return NULL;
hex_digest = PyUnicode_1BYTE_DATA(retval);
/* Make hex version of the digest */
for(i=j=0; i < digestlen; i++) {
unsigned char c;
c = (digest[i] >> 4) & 0xf;
hex_digest[j++] = Py_hexdigits[c];
c = (digest[i] & 0xf);
hex_digest[j++] = Py_hexdigits[c];
}
assert(_PyUnicode_CheckConsistency(retval, 1));
return retval;
}
PyDoc_STRVAR(SHA3_update__doc__,
"Update this hash object's state with the provided string.");
static PyObject *
SHA3_update(SHA3object *self, PyObject *args)
{
PyObject *obj;
Py_buffer buf;
HashReturn res;
if (!PyArg_ParseTuple(args, "O:update", &obj))
return NULL;
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
/* add new data, the function takes the length in bits not bytes */
#ifdef WITH_THREADS
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
self->lock = PyThread_allocate_lock();
}
/* Once a lock exists all code paths must be synchronized. We have to
* release the GIL even for small buffers as acquiring the lock may take
* an unlimited amount of time when another thread updates this object
* with lots of data. */
if (self->lock) {
Py_BEGIN_ALLOW_THREADS
PyThread_acquire_lock(self->lock, 1);
res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
PyThread_release_lock(self->lock);
Py_END_ALLOW_THREADS
}
else {
res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
}
#else
res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
#endif
LEAVE_HASHLIB(self);
if (res != SUCCESS) {
PyBuffer_Release(&buf);
PyErr_SetString(PyExc_RuntimeError,
"internal error in SHA3 Update()");
return NULL;
}
PyBuffer_Release(&buf);
Py_INCREF(Py_None);
return Py_None;
}
static PyMethodDef SHA3_methods[] = {
{"copy", (PyCFunction)SHA3_copy, METH_NOARGS,
SHA3_copy__doc__},
{"digest", (PyCFunction)SHA3_digest, METH_NOARGS,
SHA3_digest__doc__},
{"hexdigest", (PyCFunction)SHA3_hexdigest, METH_NOARGS,
SHA3_hexdigest__doc__},
{"update", (PyCFunction)SHA3_update, METH_VARARGS,
SHA3_update__doc__},
{NULL, NULL} /* sentinel */
};
static PyObject *
SHA3_get_block_size(SHA3object *self, void *closure)
{
return PyLong_FromLong(SHA3_BLOCKSIZE);
}
static PyObject *
SHA3_get_name(SHA3object *self, void *closure)
{
return PyUnicode_FromFormat("sha3_%i", self->hashbitlen);
}
static PyObject *
SHA3_get_digest_size(SHA3object *self, void *closure)
{
return PyLong_FromLong(self->hashbitlen / 8);
}
static PyGetSetDef SHA3_getseters[] = {
{"block_size", (getter)SHA3_get_block_size, NULL, NULL, NULL},
{"name", (getter)SHA3_get_name, NULL, NULL, NULL},
{"digest_size", (getter)SHA3_get_digest_size, NULL, NULL, NULL},
{NULL} /* Sentinel */
};
static PyTypeObject SHA3type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_sha3.SHA3", /* tp_name */
sizeof(SHA3object), /* tp_size */
0, /* tp_itemsize */
/* methods */
(destructor)SHA3_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
0, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
SHA3_methods, /* tp_methods */
NULL, /* tp_members */
SHA3_getseters, /* tp_getset */
};
/* constructor helper */
static PyObject *
SHA3_factory(PyObject *args, PyObject *kwdict, const char *fmt,
int hashbitlen)
{
SHA3object *newobj = NULL;
static char *kwlist[] = {"string", NULL};
PyObject *data_obj = NULL;
Py_buffer buf;
HashReturn res;
if (!PyArg_ParseTupleAndKeywords(args, kwdict, fmt, kwlist,
&data_obj)) {
return NULL;
}
if (data_obj)
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &buf);
if ((newobj = newSHA3object(hashbitlen)) == NULL) {
goto error;
}
if (SHA3_init(&newobj->hash_state, hashbitlen) != SUCCESS) {
PyErr_SetString(PyExc_RuntimeError,
"internal error in SHA3 Update()");
goto error;
}
if (data_obj) {
#ifdef WITH_THREADS
if (buf.len >= HASHLIB_GIL_MINSIZE) {
/* invariant: New objects can't be accessed by other code yet,
* thus it's safe to release the GIL without locking the object.
*/
Py_BEGIN_ALLOW_THREADS
res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8);
Py_END_ALLOW_THREADS
}
else {
res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8);
}
#else
res = SHA3_process(&newobj->hash_state, buf.buf, buf.len * 8);
#endif
if (res != SUCCESS) {
PyErr_SetString(PyExc_RuntimeError,
"internal error in SHA3 Update()");
goto error;
}
PyBuffer_Release(&buf);
}
return (PyObject *)newobj;
error:
if (newobj) {
SHA3_clearstate(newobj->hash_state);
/* self->lock is always NULL */
}
if (data_obj) {
PyBuffer_Release(&buf);
}
return NULL;
}
PyDoc_STRVAR(sha3_224__doc__,
"sha3_224([string]) -> SHA3 object\n\
\n\
Return a new SHA3 hash object with a hashbit length of 28 bytes.");
static PyObject *
sha3_224(PyObject *self, PyObject *args, PyObject *kwdict)
{
return SHA3_factory(args, kwdict, "|O:sha3_224", 224);
}
PyDoc_STRVAR(sha3_256__doc__,
"sha3_256([string]) -> SHA3 object\n\
\n\
Return a new SHA3 hash object with a hashbit length of 32 bytes.");
static PyObject *
sha3_256(PyObject *self, PyObject *args, PyObject *kwdict)
{
return SHA3_factory(args, kwdict, "|O:sha3_256", 256);
}
PyDoc_STRVAR(sha3_384__doc__,
"sha3_384([string]) -> SHA3 object\n\
\n\
Return a new SHA3 hash object with a hashbit length of 48 bytes.");
static PyObject *
sha3_384(PyObject *self, PyObject *args, PyObject *kwdict)
{
return SHA3_factory(args, kwdict, "|O:sha3_384", 384);
}
PyDoc_STRVAR(sha3_512__doc__,
"sha3_512([string]) -> SHA3 object\n\
\n\
Return a new SHA3 hash object with a hashbit length of 64 bytes.");
static PyObject *
sha3_512(PyObject *self, PyObject *args, PyObject *kwdict)
{
return SHA3_factory(args, kwdict, "|O:sha3_512", 512);
}
/* List of functions exported by this module */
static struct PyMethodDef SHA3_functions[] = {
{"sha3_224", (PyCFunction)sha3_224, METH_VARARGS|METH_KEYWORDS,
sha3_224__doc__},
{"sha3_256", (PyCFunction)sha3_256, METH_VARARGS|METH_KEYWORDS,
sha3_256__doc__},
{"sha3_384", (PyCFunction)sha3_384, METH_VARARGS|METH_KEYWORDS,
sha3_384__doc__},
{"sha3_512", (PyCFunction)sha3_512, METH_VARARGS|METH_KEYWORDS,
sha3_512__doc__},
{NULL, NULL} /* Sentinel */
};
/* Initialize this module. */
static struct PyModuleDef _SHA3module = {
PyModuleDef_HEAD_INIT,
"_sha3",
NULL,
-1,
SHA3_functions,
NULL,
NULL,
NULL,
NULL
};
PyMODINIT_FUNC
PyInit__sha3(void)
{
Py_TYPE(&SHA3type) = &PyType_Type;
if (PyType_Ready(&SHA3type) < 0) {
return NULL;
}
return PyModule_Create(&_SHA3module);
}

View File

@ -26,3 +26,36 @@
return NULL; \
} \
} while(0);
/*
* Helper code to synchronize access to the hash object when the GIL is
* released around a CPU consuming hashlib operation. All code paths that
* access a mutable part of obj must be enclosed in a ENTER_HASHLIB /
* LEAVE_HASHLIB block or explicitly acquire and release the lock inside
* a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for
* an operation.
*/
#ifdef WITH_THREAD
#include "pythread.h"
#define ENTER_HASHLIB(obj) \
if ((obj)->lock) { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
Py_BEGIN_ALLOW_THREADS \
PyThread_acquire_lock((obj)->lock, 1); \
Py_END_ALLOW_THREADS \
} \
}
#define LEAVE_HASHLIB(obj) \
if ((obj)->lock) { \
PyThread_release_lock((obj)->lock); \
}
#else
#define ENTER_HASHLIB(obj)
#define LEAVE_HASHLIB(obj)
#endif
/* TODO(gps): We should probably make this a module or EVPobject attribute
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048

View File

@ -838,6 +838,15 @@ class PyBuildExt(build_ext):
exts.append( Extension('_sha1', ['sha1module.c'],
depends=['hashlib.h']) )
# SHA-3 (Keccak) module
sha3_depends = ['hashlib.h']
keccak = os.path.join(os.getcwd(), srcdir, 'Modules', '_sha3',
'keccak')
for pattern in ('*.c', '*.h', '*.macros'):
sha3_depends.extend(glob(os.path.join(keccak, pattern)))
exts.append(Extension("_sha3", ["_sha3/sha3module.c"],
depends=sha3_depends))
# Modules that provide persistent dictionary-like semantics. You will
# probably want to arrange for at least one of them to be available on
# your machine, though none are defined by default because of library