From 325e9b8ef400b86fb077aa40d5cb8cec6e4df7bb Mon Sep 17 00:00:00 2001 From: Jonathan Protzenko Date: Tue, 13 Aug 2024 14:42:19 -0700 Subject: [PATCH] gh-99108: Add HACL* Blake2 implementation to hashlib (GH-119316) This replaces the existing hashlib Blake2 module with a single implementation that uses HACL\*'s Blake2b/Blake2s implementations. We added support for all the modes exposed by the Python API, including tree hashing, leaf nodes, and so on. We ported and merged all of these changes upstream in HACL\*, added test vectors based on Python's existing implementation, and exposed everything needed for hashlib. This was joint work done with @R1kM. See the PR for much discussion and benchmarking details. TL;DR: On many systems, 8-50% faster (!) than `libb2`, on some systems it appeared 10-20% slower than `libb2`. --- Lib/test/test_hashlib.py | 11 + Makefile.pre.in | 66 +- ...4-07-19-09-38-01.gh-issue-99108.qzM6gl.rst | 10 + Misc/sbom.spdx.json | 604 ++--- Modules/Setup.stdlib.in | 2 +- Modules/_blake2/blake2b2s.py | 49 - Modules/_blake2/blake2b_impl.c | 417 ---- Modules/_blake2/blake2module.c | 160 -- Modules/_blake2/blake2module.h | 43 - Modules/_blake2/blake2s_impl.c | 417 ---- Modules/_blake2/clinic/blake2s_impl.c.h | 268 --- Modules/_blake2/impl/blake2-config.h | 71 - Modules/_blake2/impl/blake2-impl.h | 162 -- Modules/_blake2/impl/blake2.h | 177 -- Modules/_blake2/impl/blake2b-load-sse2.h | 68 - Modules/_blake2/impl/blake2b-load-sse41.h | 402 ---- Modules/_blake2/impl/blake2b-ref.c | 379 --- Modules/_blake2/impl/blake2b-round.h | 160 -- Modules/_blake2/impl/blake2b.c | 436 ---- Modules/_blake2/impl/blake2s-load-sse2.h | 59 - Modules/_blake2/impl/blake2s-load-sse41.h | 229 -- Modules/_blake2/impl/blake2s-load-xop.h | 189 -- Modules/_blake2/impl/blake2s-ref.c | 368 --- Modules/_blake2/impl/blake2s-round.h | 91 - Modules/_blake2/impl/blake2s.c | 415 ---- Modules/_hacl/Hacl_Hash_Blake2b.c | 1493 ++++++++++++ Modules/_hacl/Hacl_Hash_Blake2b.h | 245 ++ Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c | 1338 +++++++++++ Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h | 231 ++ Modules/_hacl/Hacl_Hash_Blake2s.c | 1444 ++++++++++++ Modules/_hacl/Hacl_Hash_Blake2s.h | 222 ++ Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c | 1294 +++++++++++ Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h | 230 ++ Modules/_hacl/Hacl_Hash_SHA3.c | 2026 +++++++++++++++-- Modules/_hacl/Hacl_Hash_SHA3.h | 95 +- Modules/_hacl/Lib_Memzero0.c | 54 + Modules/_hacl/include/krml/internal/target.h | 69 + Modules/_hacl/internal/Hacl_Hash_Blake2b.h | 78 + .../internal/Hacl_Hash_Blake2b_Simd256.h | 93 + Modules/_hacl/internal/Hacl_Hash_Blake2s.h | 72 + .../internal/Hacl_Hash_Blake2s_Simd128.h | 93 + Modules/_hacl/internal/Hacl_Hash_SHA3.h | 10 +- .../internal/Hacl_Impl_Blake2_Constants.h | 73 + Modules/_hacl/lib_memzero0.h | 5 + Modules/_hacl/libintvector.h | 936 ++++++++ Modules/_hacl/python_hacl_namespaces.h | 125 +- Modules/_hacl/refresh.sh | 36 +- Modules/blake2module.c | 930 ++++++++ .../blake2module.c.h} | 210 +- PCbuild/pythoncore.vcxproj | 18 +- Tools/build/generate_sbom.py | 3 - Tools/c-analyzer/cpython/_parser.py | 3 + configure | 201 +- configure.ac | 44 +- pyconfig.h.in | 9 +- 55 files changed, 11697 insertions(+), 5236 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-07-19-09-38-01.gh-issue-99108.qzM6gl.rst delete mode 100755 Modules/_blake2/blake2b2s.py delete mode 100644 Modules/_blake2/blake2b_impl.c delete mode 100644 Modules/_blake2/blake2module.c delete mode 100644 Modules/_blake2/blake2module.h delete mode 100644 Modules/_blake2/blake2s_impl.c delete mode 100644 Modules/_blake2/clinic/blake2s_impl.c.h delete mode 100644 Modules/_blake2/impl/blake2-config.h delete mode 100644 Modules/_blake2/impl/blake2-impl.h delete mode 100644 Modules/_blake2/impl/blake2.h delete mode 100644 Modules/_blake2/impl/blake2b-load-sse2.h delete mode 100644 Modules/_blake2/impl/blake2b-load-sse41.h delete mode 100644 Modules/_blake2/impl/blake2b-ref.c delete mode 100644 Modules/_blake2/impl/blake2b-round.h delete mode 100644 Modules/_blake2/impl/blake2b.c delete mode 100644 Modules/_blake2/impl/blake2s-load-sse2.h delete mode 100644 Modules/_blake2/impl/blake2s-load-sse41.h delete mode 100644 Modules/_blake2/impl/blake2s-load-xop.h delete mode 100644 Modules/_blake2/impl/blake2s-ref.c delete mode 100644 Modules/_blake2/impl/blake2s-round.h delete mode 100644 Modules/_blake2/impl/blake2s.c create mode 100644 Modules/_hacl/Hacl_Hash_Blake2b.c create mode 100644 Modules/_hacl/Hacl_Hash_Blake2b.h create mode 100644 Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c create mode 100644 Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h create mode 100644 Modules/_hacl/Hacl_Hash_Blake2s.c create mode 100644 Modules/_hacl/Hacl_Hash_Blake2s.h create mode 100644 Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c create mode 100644 Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h create mode 100644 Modules/_hacl/Lib_Memzero0.c create mode 100644 Modules/_hacl/internal/Hacl_Hash_Blake2b.h create mode 100644 Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h create mode 100644 Modules/_hacl/internal/Hacl_Hash_Blake2s.h create mode 100644 Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h create mode 100644 Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h create mode 100644 Modules/_hacl/lib_memzero0.h create mode 100644 Modules/_hacl/libintvector.h create mode 100644 Modules/blake2module.c rename Modules/{_blake2/clinic/blake2b_impl.c.h => clinic/blake2module.c.h} (54%) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 73d758a3631..575b2cd0da7 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -368,6 +368,17 @@ class HashLibTestCase(unittest.TestCase): h.update(b"hello world") self.assertEqual(h.hexdigest(), "e2d4535e3b613135c14f2fe4e026d7ad8d569db44901740beffa30d430acb038") + @requires_resource('cpu') + def test_blake2_update_over_4gb(self): + # blake2s or blake2b doesn't matter based on how our C code is structured, this tests the + # common loop macro logic. + zero_1mb = b"\0" * 1024 * 1024 + h = hashlib.blake2s() + for i in range(0, 4096): + h.update(zero_1mb) + h.update(b"hello world") + self.assertEqual(h.hexdigest(), "8a268e83dd30528bc0907fa2008c91de8f090a0b6e0e60a5ff0d999d8485526f") + def check(self, name, data, hexdigest, shake=False, **kwargs): length = len(hexdigest)//2 hexdigest = hexdigest.lower() diff --git a/Makefile.pre.in b/Makefile.pre.in index 6eb9afefada..3a1c1d3d217 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -221,6 +221,11 @@ ENSUREPIP= @ENSUREPIP@ LIBMPDEC_A= Modules/_decimal/libmpdec/libmpdec.a LIBEXPAT_A= Modules/expat/libexpat.a LIBHACL_SHA2_A= Modules/_hacl/libHacl_Hash_SHA2.a +LIBHACL_BLAKE2_A= Modules/_hacl/libHacl_Hash_Blake2.a +LIBHACL_SIMD128_FLAGS=@LIBHACL_SIMD128_FLAGS@ +LIBHACL_SIMD256_FLAGS=@LIBHACL_SIMD256_FLAGS@ +LIBHACL_SIMD128_OBJS=@LIBHACL_SIMD128_OBJS@ +LIBHACL_SIMD256_OBJS=@LIBHACL_SIMD256_OBJS@ # Module state, compiler flags and linker flags # Empty CFLAGS and LDFLAGS are omitted. @@ -646,6 +651,13 @@ LIBEXPAT_HEADERS= \ LIBHACL_SHA2_OBJS= \ Modules/_hacl/Hacl_Hash_SHA2.o +LIBHACL_BLAKE2_OBJS= \ + Modules/_hacl/Hacl_Hash_Blake2s.o \ + Modules/_hacl/Hacl_Hash_Blake2b.o \ + Modules/_hacl/Lib_Memzero0.o \ + $(LIBHACL_SIMD128_OBJS) \ + $(LIBHACL_SIMD256_OBJS) + LIBHACL_HEADERS= \ Modules/_hacl/include/krml/FStar_UInt128_Verified.h \ Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h \ @@ -661,6 +673,18 @@ LIBHACL_SHA2_HEADERS= \ Modules/_hacl/internal/Hacl_Hash_SHA2.h \ $(LIBHACL_HEADERS) +LIBHACL_BLAKE2_HEADERS= \ + Modules/_hacl/Hacl_Hash_Blake2b.h \ + Modules/_hacl/Hacl_Hash_Blake2s.h \ + Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h \ + Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h \ + Modules/_hacl/internal/Hacl_Hash_Blake2b.h \ + Modules/_hacl/internal/Hacl_Hash_Blake2s.h \ + Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h \ + Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h \ + Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h \ + $(LIBHACL_HEADERS) + ######################################################################### # Rules @@ -840,7 +864,7 @@ coverage-lcov: @ # remove 3rd party modules, system headers and internal files with @ # debug, test or dummy functions. @lcov $(COVERAGE_LCOV_OPTIONS) --remove $(COVERAGE_INFO) \ - '*/Modules/_blake2/impl/*' \ + '*/Modules/_hacl/*' \ '*/Modules/_ctypes/libffi*/*' \ '*/Modules/_decimal/libmpdec/*' \ '*/Modules/expat/*' \ @@ -870,7 +894,7 @@ coverage-report: regen-token regen-frozen # Run "Argument Clinic" over all source files .PHONY: clinic -clinic: check-clean-src $(srcdir)/Modules/_blake2/blake2s_impl.c +clinic: check-clean-src $(PYTHON_FOR_REGEN) $(srcdir)/Tools/clinic/clinic.py --make --exclude Lib/test/clinic.test.c --srcdir $(srcdir) .PHONY: clinic-tests @@ -900,11 +924,6 @@ pybuilddir.txt: $(PYTHON_FOR_BUILD_DEPS) exit 1 ; \ fi -# blake2s is auto-generated from blake2b -$(srcdir)/Modules/_blake2/blake2s_impl.c: $(srcdir)/Modules/_blake2/blake2b_impl.c $(srcdir)/Modules/_blake2/blake2b2s.py - $(PYTHON_FOR_REGEN) $(srcdir)/Modules/_blake2/blake2b2s.py - $(PYTHON_FOR_REGEN) $(srcdir)/Tools/clinic/clinic.py -f $@ - # Build static library $(LIBRARY): $(LIBRARY_OBJS) -rm -f $@ @@ -1346,8 +1365,10 @@ $(LIBEXPAT_A): $(LIBEXPAT_OBJS) $(AR) $(ARFLAGS) $@ $(LIBEXPAT_OBJS) ########################################################################## -# Build HACL* static libraries for hashlib: libHacl_Hash_SHA2.a -LIBHACL_CFLAGS=-I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED) +# Build HACL* static libraries for hashlib: libHacl_Hash_SHA2.a, and +# libHacl_Blake2.a -- the contents of the latter vary depending on whether we +# have the ability to compile vectorized versions +LIBHACL_CFLAGS=-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED) Modules/_hacl/Hacl_Hash_SHA2.o: $(srcdir)/Modules/_hacl/Hacl_Hash_SHA2.c $(LIBHACL_SHA2_HEADERS) $(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_SHA2.c @@ -1356,6 +1377,25 @@ $(LIBHACL_SHA2_A): $(LIBHACL_SHA2_OBJS) -rm -f $@ $(AR) $(ARFLAGS) $@ $(LIBHACL_SHA2_OBJS) +Modules/_hacl/Hacl_Hash_Blake2s.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s.c $(LIBHACL_BLAKE2_HEADERS) + $(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s.c + +Modules/_hacl/Hacl_Hash_Blake2b.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b.c $(LIBHACL_BLAKE2_HEADERS) + $(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b.c + +Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c $(LIBHACL_BLAKE2_HEADERS) + $(CC) -c $(LIBHACL_CFLAGS) $(LIBHACL_SIMD128_FLAGS) -DHACL_CAN_COMPILE_VEC128 -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c + +Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c $(LIBHACL_BLAKE2_HEADERS) + $(CC) -c $(LIBHACL_CFLAGS) $(LIBHACL_SIMD256_FLAGS) -DHACL_CAN_COMPILE_VEC256 -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c + +Modules/_hacl/Lib_Memzero0.o: $(srcdir)/Modules/_hacl/Lib_Memzero0.c $(LIBHACL_BLAKE2_HEADERS) + $(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Lib_Memzero0.c + +$(LIBHACL_BLAKE2_A): $(LIBHACL_BLAKE2_OBJS) + -rm -f $@ + $(AR) $(ARFLAGS) $@ $(LIBHACL_BLAKE2_OBJS) + # create relative links from build/lib.platform/egg.so to Modules/egg.so # pybuilddir.txt is created too late. We cannot use it in Makefile # targets. ln --relative is not portable. @@ -3136,7 +3176,6 @@ MODULE_CMATH_DEPS=$(srcdir)/Modules/_math.h MODULE_MATH_DEPS=$(srcdir)/Modules/_math.h MODULE_PYEXPAT_DEPS=@LIBEXPAT_INTERNAL@ MODULE_UNICODEDATA_DEPS=$(srcdir)/Modules/unicodedata_db.h $(srcdir)/Modules/unicodename_db.h -MODULE__BLAKE2_DEPS=$(srcdir)/Modules/_blake2/impl/blake2-config.h $(srcdir)/Modules/_blake2/impl/blake2-impl.h $(srcdir)/Modules/_blake2/impl/blake2.h $(srcdir)/Modules/_blake2/impl/blake2b-load-sse2.h $(srcdir)/Modules/_blake2/impl/blake2b-load-sse41.h $(srcdir)/Modules/_blake2/impl/blake2b-ref.c $(srcdir)/Modules/_blake2/impl/blake2b-round.h $(srcdir)/Modules/_blake2/impl/blake2b.c $(srcdir)/Modules/_blake2/impl/blake2s-load-sse2.h $(srcdir)/Modules/_blake2/impl/blake2s-load-sse41.h $(srcdir)/Modules/_blake2/impl/blake2s-load-xop.h $(srcdir)/Modules/_blake2/impl/blake2s-ref.c $(srcdir)/Modules/_blake2/impl/blake2s-round.h $(srcdir)/Modules/_blake2/impl/blake2s.c $(srcdir)/Modules/_blake2/blake2module.h $(srcdir)/Modules/hashlib.h MODULE__CTYPES_DEPS=$(srcdir)/Modules/_ctypes/ctypes.h $(srcdir)/Modules/_complex.h MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@ @@ -3144,10 +3183,11 @@ MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@ MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@ MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h -MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c -MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA1.h Modules/_hacl/Hacl_Hash_SHA1.c +MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/internal/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c +MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA1.h Modules/_hacl/internal/Hacl_Hash_SHA1.h Modules/_hacl/Hacl_Hash_SHA1.c MODULE__SHA2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_A) -MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA3.h Modules/_hacl/Hacl_Hash_SHA3.c +MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA3.h Modules/_hacl/internal/Hacl_Hash_SHA3.h Modules/_hacl/Hacl_Hash_SHA3.c +MODULE__BLAKE2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_A) MODULE__SOCKET_DEPS=$(srcdir)/Modules/socketmodule.h $(srcdir)/Modules/addrinfo.h $(srcdir)/Modules/getaddrinfo.c $(srcdir)/Modules/getnameinfo.c MODULE__SSL_DEPS=$(srcdir)/Modules/_ssl.h $(srcdir)/Modules/_ssl/cert.c $(srcdir)/Modules/_ssl/debughelpers.c $(srcdir)/Modules/_ssl/misc.c $(srcdir)/Modules/_ssl_data_111.h $(srcdir)/Modules/_ssl_data_300.h $(srcdir)/Modules/socketmodule.h MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_testcapi/util.h diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-19-09-38-01.gh-issue-99108.qzM6gl.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-19-09-38-01.gh-issue-99108.qzM6gl.rst new file mode 100644 index 00000000000..125f04a36a1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-19-09-38-01.gh-issue-99108.qzM6gl.rst @@ -0,0 +1,10 @@ +Python's hashlib now unconditionally uses the vendored HACL* library for +Blake2. Python no longer accepts libb2 as an optional dependency for Blake2. + +We refreshed HACL* to the latest version, and now vendor HACL*'s 128-bit and +256-bit wide vector implementations for Blake2, which are used on x86/x64 +toolchains when the required CPU features are available at runtime. + +HACL*'s 128-bit wide vector implementation of Blake2 can also run on ARM +NEON and Power8, but lacking evidence of a performance gain, these are not +enabled (yet). diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index b60adcfd362..dadc07c4f40 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -295,6 +295,118 @@ ], "fileName": "Modules/expat/xmltok_ns.c" }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "c96cba53034348537ac423a220803b06cd9f0a43" + }, + { + "algorithm": "SHA256", + "checksumValue": "9f4fb5c70678638cfd163cc990be1def356cf7b65b75faa4666db8c5f8593530" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "b0b3ae92d6aee7b52bacfdf02409d8d7e23701ee" + }, + { + "algorithm": "SHA256", + "checksumValue": "95d1dd4097a706b0719610da674297fa253b30d03a6ead4685ed648e20cb51a2" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b-Simd256.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "e11e2d1771e56c0afbdb0673906898b3a67e0cc3" + }, + { + "algorithm": "SHA256", + "checksumValue": "d5bf29d995f7cb9861841b813aa01206664895a1c5aa166a4796785c02117bf4" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b-Simd256.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "a5011646670c4f51368aca661e458e4c7f1d88e0" + }, + { + "algorithm": "SHA256", + "checksumValue": "f00c1fe8e774c7ec65f6c5a8efa43ce180a17fc80ed6119ada8c4022d058b6e2" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "5422517af799cf74b194821fb2a1f39e3b02c54d" + }, + { + "algorithm": "SHA256", + "checksumValue": "c66adab0259f2c2229e010cd635a982e8c2b8836e59e43e7867992d4148e4d9a" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "0328172a62507a051cd60ff9603710ed5aea1bc8" + }, + { + "algorithm": "SHA256", + "checksumValue": "9f3c8ef615c9fbc59ef796d0ad2a7a76a7e55dc8939077b44ca538cbf8889a8c" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s-Simd128.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "7822db8e7c2f60dd64a18e112a1bc369e7f7a0ff" + }, + { + "algorithm": "SHA256", + "checksumValue": "94b0cd3cf1f7385325ee878d2ef06affc8d6412af9302ca47d1aa6d858182050" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s-Simd128.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "32f35c173c10a2c49ac53c839cfbccd8a147274d" + }, + { + "algorithm": "SHA256", + "checksumValue": "8734879b551f0fa860002ae81c0d0cfbade561007d9c26ad18c5a221e239237e" + } + ], + "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h" + }, { "SPDXID": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-MD5.c", "checksums": [ @@ -384,11 +496,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "66644fd3325c414fef7d985536bb477c849c8f9a" + "checksumValue": "fc2c3ef83a71bef42eb3f73b78e4ef6642a4634e" }, { "algorithm": "SHA256", - "checksumValue": "17c0db96d40d1849f02546d5f55428fa89b61b07748d5b5df45cec25c5f29c0f" + "checksumValue": "e4f3ed9d1e8f661482cbd2d04b197e15cc3b698c5ef2ddedf0eb65df320dbbc4" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.c" @@ -398,11 +510,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "580e9a73813281e99a98871380b3726576295a96" + "checksumValue": "7d78e6844dde1f9b5e68f58ca105a4c330461ff6" }, { "algorithm": "SHA256", - "checksumValue": "d8d4d14bbc3a561a4e590d9b18b326e6a8095efb12423edbd949cf3c00953621" + "checksumValue": "231d9bc13190be4b6821acb518194f32f4a3c04f1c034b3118f6db0bab2debe3" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.h" @@ -421,6 +533,20 @@ ], "fileName": "Modules/_hacl/Hacl_Streaming_Types.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-Lib-Memzero0.c", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "e52071a35fc1893928804fe91b098ad5682c2508" + }, + { + "algorithm": "SHA256", + "checksumValue": "c4424a4851cd2d4f27633ca19faf5cb1135a680443727a8d1b134737f9a71e62" + } + ], + "fileName": "Modules/_hacl/Lib_Memzero0.c" + }, { "SPDXID": "SPDXRef-FILE-Modules-hacl-include-krml-FStar-UInt128-Verified.h", "checksums": [ @@ -468,11 +594,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "ba64394679643c6d4ceaf6bd2616d48d12f996a7" + "checksumValue": "81872ecdbd39b09cd813dee6e1dbed113a81aa4a" }, { "algorithm": "SHA256", - "checksumValue": "d16a59f37a1d4982626870e370889eb9d332a9ad035661b8062f549fc734d061" + "checksumValue": "1eef18295d412129007816fe65b7f15c0be8ad32840ef5e3dfaa5b67317e1b51" } ], "fileName": "Modules/_hacl/include/krml/internal/target.h" @@ -505,6 +631,62 @@ ], "fileName": "Modules/_hacl/include/krml/types.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2b.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "31b329bd39ff72ed25086e2afe7875949003c140" + }, + { + "algorithm": "SHA256", + "checksumValue": "16df6cf240ee99aade0fd11d5cc7573c201c7589d8325a5c95c7670c531e1518" + } + ], + "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2b.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2b-Simd256.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "3f4fdfdaef97a2cbac5ec091c91ede18d4b33f92" + }, + { + "algorithm": "SHA256", + "checksumValue": "96b1c77860f12bcadad0caca77a5a1649a840ad9989d97984a3b51bb98c80e2f" + } + ], + "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2s.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "9efd61f6ba8d126e98abd83679a5ed5954278c31" + }, + { + "algorithm": "SHA256", + "checksumValue": "143f58f033786173501a72ac302e435963fdce6c2cc38eef6d6adeb3cdc1bb9c" + } + ], + "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2s.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2s-Simd128.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "3f984829465285283b03b1111b4918cfb48b8031" + }, + { + "algorithm": "SHA256", + "checksumValue": "cd24038fdd617edc65e472496b0d58f23ff312f81f9244c3e7893fdc9a1b2977" + } + ], + "fileName": "Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h" + }, { "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-MD5.h", "checksums": [ @@ -552,225 +734,71 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "eae8a5226bf993f07584cf4c0d269022328cf3d4" + "checksumValue": "39ba6e8959e44ae956a640d3a1fb3ef60de8a9e5" }, { "algorithm": "SHA256", - "checksumValue": "6853125de10d0f605e9bc3a3dbbd7254713709e9893cc3f69929ea8d3f254934" + "checksumValue": "dbf4b86a04b4d8716976f8c023cccbfe174435dbec3bc00fc1f066fb52c4e341" } ], "fileName": "Modules/_hacl/internal/Hacl_Hash_SHA3.h" }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Impl-Blake2-Constants.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "c3ae35ed5bf70cf011b2732df011231528b9111c" + }, + { + "algorithm": "SHA256", + "checksumValue": "c381fea7b8b505a7c7ce27231a36751add6b184b204132935c5faaba4fce8ba1" + } + ], + "fileName": "Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-lib-memzero0.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "3d65f95f6f4bbfe980a89b82c55d02d7694a5a79" + }, + { + "algorithm": "SHA256", + "checksumValue": "0f8d744620cf5f6b8450da187484b418d24dec7d8cf72b757b7080e84cb3ae5e" + } + ], + "fileName": "Modules/_hacl/lib_memzero0.h" + }, + { + "SPDXID": "SPDXRef-FILE-Modules-hacl-libintvector.h", + "checksums": [ + { + "algorithm": "SHA1", + "checksumValue": "d5d85ee8f0bd52781fe470d0bf73ec388ddb3999" + }, + { + "algorithm": "SHA256", + "checksumValue": "9a421b998add98fe366374641c4edb27617ff539a59f0963879f345065d3d39d" + } + ], + "fileName": "Modules/_hacl/libintvector.h" + }, { "SPDXID": "SPDXRef-FILE-Modules-hacl-python-hacl-namespaces.h", "checksums": [ { "algorithm": "SHA1", - "checksumValue": "d8063060cc707a7ac70108a15934d33e7b448db6" + "checksumValue": "37e3eb63c5c6f8ae671748bfde642c180b96d2de" }, { "algorithm": "SHA256", - "checksumValue": "347dfdf856ed1e584d124d6709b51267598ea5b37c1a2e03beeb358c978beada" + "checksumValue": "0b5c7892cc25a2b3467936c1f346a6186d9d0a257d1bd5671beda253b66e0f68" } ], "fileName": "Modules/_hacl/python_hacl_namespaces.h" }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2-config.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "ff5e3ae2360adf7279a9c54d12a1d32e16a1f223" - }, - { - "algorithm": "SHA256", - "checksumValue": "1eb919e885244e43cdf7b2104ad30dc9271513478c0026f6bfb4bad6e2f0ab42" - } - ], - "fileName": "Modules/_blake2/impl/blake2-config.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2-impl.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "28b947b43bdc680b9f4335712bb2a5f2d5d32623" - }, - { - "algorithm": "SHA256", - "checksumValue": "4277092643b289f1d36d32cf0fd2efc30ead8bdd99342e5da3b3609dd8ea7d86" - } - ], - "fileName": "Modules/_blake2/impl/blake2-impl.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "caa3da7953109d0d2961e3b686d2d285c484b901" - }, - { - "algorithm": "SHA256", - "checksumValue": "2f6c9d0ecf70be474f2853b52394993625a32960e0a64eae147ef97a3a5c1460" - } - ], - "fileName": "Modules/_blake2/impl/blake2.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2b-load-sse2.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "029a98f87a178936d9e5211c7798b3e0fc622f94" - }, - { - "algorithm": "SHA256", - "checksumValue": "b392a6e7b43813a05609e994db5fc3552c5912bd482efc781daa0778eb56ab4e" - } - ], - "fileName": "Modules/_blake2/impl/blake2b-load-sse2.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2b-load-sse41.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "fb466dd72344170d09e311e5ea12de99ce071357" - }, - { - "algorithm": "SHA256", - "checksumValue": "cc3072c92164142bf2f9dda4e6c08db61be68ec15a95442415e861090d08f6a2" - } - ], - "fileName": "Modules/_blake2/impl/blake2b-load-sse41.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2b-ref.c", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "4c0d79128cf891a95b1f668031d55c0c6d2e0270" - }, - { - "algorithm": "SHA256", - "checksumValue": "07b257d44e9cc2d95d4911629c92138feafd16d63fef0a5fa7b38914dfd82349" - } - ], - "fileName": "Modules/_blake2/impl/blake2b-ref.c" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2b-round.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "4c7418e2026417c9c6736fcd305a31f23e05a661" - }, - { - "algorithm": "SHA256", - "checksumValue": "fa34a60c2d198a0585033f43fd4003f4ba279c9ebcabdf5d6650def0e6d1e914" - } - ], - "fileName": "Modules/_blake2/impl/blake2b-round.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2b.c", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "f935d64cc633c38e09fc2d89281c95edfbc1fb05" - }, - { - "algorithm": "SHA256", - "checksumValue": "b932aa273b2504606a48895a50ff08c883f7a68a7e4aced5daa909c43348605a" - } - ], - "fileName": "Modules/_blake2/impl/blake2b.c" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-sse2.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "ad3f79b6cbe3fd812722114a0d5d08064e69e4d0" - }, - { - "algorithm": "SHA256", - "checksumValue": "57f1ac6c09f4a50d95811529062220eab4f29cec3805bc6081dec00426c6df62" - } - ], - "fileName": "Modules/_blake2/impl/blake2s-load-sse2.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-sse41.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "51c32d79f419f3d2eb9875cd9a7f5c0d7892f8a8" - }, - { - "algorithm": "SHA256", - "checksumValue": "ecc9e09adcbe098629eafd305596bed8d7004be1d83f326995def42bbde93b23" - } - ], - "fileName": "Modules/_blake2/impl/blake2s-load-sse41.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-xop.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "2749a7ba0104b765d4f56f13faf70b6eb89cf203" - }, - { - "algorithm": "SHA256", - "checksumValue": "8bc95595cec4c50f5d70f2b330d3798de07cc784e8890791b3328890e602d5c5" - } - ], - "fileName": "Modules/_blake2/impl/blake2s-load-xop.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s-ref.c", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "883fcfe85f9063819f21b1100296d1f9eb55bac1" - }, - { - "algorithm": "SHA256", - "checksumValue": "9715c00d0f11587a139b07fa26678e6d26e44d3d4910b96158d158da2b022bfb" - } - ], - "fileName": "Modules/_blake2/impl/blake2s-ref.c" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s-round.h", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "5d9f69adda40ed163b287b9ed4cedb35b88f2daa" - }, - { - "algorithm": "SHA256", - "checksumValue": "65d90111c89c43bb18a9e1d1a4fdbd9f85bebd1ff00129335b85995d0f30ee8b" - } - ], - "fileName": "Modules/_blake2/impl/blake2s-round.h" - }, - { - "SPDXID": "SPDXRef-FILE-Modules-blake2-impl-blake2s.c", - "checksums": [ - { - "algorithm": "SHA1", - "checksumValue": "13ac5bb93578a7ee8f815b4e247e82c849992bbe" - }, - { - "algorithm": "SHA256", - "checksumValue": "25ec5dd5c79f916307358059fe9f633781f27df1c0e0962c4fcccdda1feb93a7" - } - ], - "fileName": "Modules/_blake2/impl/blake2s.c" - }, { "SPDXID": "SPDXRef-FILE-Lib-ctypes-macholib-init-.py", "checksums": [ @@ -1584,14 +1612,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "e31e4ca10da91c585793c0eaf1b98aee3cb43e3a58d3d8d478593e5a6bd82927" + "checksumValue": "988a74f5fbb59baca2d54e41447997ada92f4ebc59888dfb717438013f859117" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/a6a09496d9cff652b567d26f2c3ab012321b632a.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:a6a09496d9cff652b567d26f2c3ab012321b632a:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1599,29 +1627,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0" - }, - { - "SPDXID": "SPDXRef-PACKAGE-libb2", - "checksums": [ - { - "algorithm": "SHA256", - "checksumValue": "53626fddce753c454a3fea581cbbc7fe9bbcf0bc70416d48fdbbf5d87ef6c72e" - } - ], - "downloadLocation": "https://github.com/BLAKE2/libb2/releases/download/v0.98.1/libb2-0.98.1.tar.gz", - "externalRefs": [ - { - "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:blake2:libb2:0.98.1:*:*:*:*:*:*:*", - "referenceType": "cpe23Type" - } - ], - "licenseConcluded": "NOASSERTION", - "name": "libb2", - "originator": "Organization: BLAKE2 - fast secure hashing", - "primaryPackagePurpose": "SOURCE", - "versionInfo": "0.98.1" + "versionInfo": "a6a09496d9cff652b567d26f2c3ab012321b632a" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", @@ -1774,6 +1780,46 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-expat" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b-Simd256.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2b-Simd256.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s-Simd128.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-Blake2s-Simd128.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Hacl-Hash-MD5.c", "relationshipType": "CONTAINS", @@ -1819,6 +1865,11 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-Lib-Memzero0.c", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-include-krml-FStar-UInt128-Verified.h", "relationshipType": "CONTAINS", @@ -1849,6 +1900,26 @@ "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2b.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2b-Simd256.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2s.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, + { + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-Blake2s-Simd128.h", + "relationshipType": "CONTAINS", + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Hash-MD5.h", "relationshipType": "CONTAINS", @@ -1870,79 +1941,24 @@ "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, { - "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-python-hacl-namespaces.h", + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-internal-Hacl-Impl-Blake2-Constants.h", "relationshipType": "CONTAINS", "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2-config.h", + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-lib-memzero0.h", "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2-impl.h", + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-libintvector.h", "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2.h", + "relatedSpdxElement": "SPDXRef-FILE-Modules-hacl-python-hacl-namespaces.h", "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2b-load-sse2.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2b-load-sse41.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2b-ref.c", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2b-round.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2b.c", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-sse2.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-sse41.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s-load-xop.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s-ref.c", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s-round.h", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" - }, - { - "relatedSpdxElement": "SPDXRef-FILE-Modules-blake2-impl-blake2s.c", - "relationshipType": "CONTAINS", - "spdxElementId": "SPDXRef-PACKAGE-libb2" + "spdxElementId": "SPDXRef-PACKAGE-hacl-star" }, { "relatedSpdxElement": "SPDXRef-FILE-Lib-ctypes-macholib-init-.py", diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 9da4e785804..02b6eed346f 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -82,7 +82,7 @@ @MODULE__SHA1_TRUE@_sha1 sha1module.c -I$(srcdir)/Modules/_hacl/include _hacl/Hacl_Hash_SHA1.c -D_BSD_SOURCE -D_DEFAULT_SOURCE @MODULE__SHA2_TRUE@_sha2 sha2module.c -I$(srcdir)/Modules/_hacl/include Modules/_hacl/libHacl_Hash_SHA2.a @MODULE__SHA3_TRUE@_sha3 sha3module.c -I$(srcdir)/Modules/_hacl/include _hacl/Hacl_Hash_SHA3.c -D_BSD_SOURCE -D_DEFAULT_SOURCE -@MODULE__BLAKE2_TRUE@_blake2 _blake2/blake2module.c _blake2/blake2b_impl.c _blake2/blake2s_impl.c +@MODULE__BLAKE2_TRUE@_blake2 blake2module.c -I$(srcdir)/Modules/_hacl/include Modules/_hacl/libHacl_Hash_Blake2.a ############################################################################ # XML and text diff --git a/Modules/_blake2/blake2b2s.py b/Modules/_blake2/blake2b2s.py deleted file mode 100755 index 01cf26521b3..00000000000 --- a/Modules/_blake2/blake2b2s.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python3 - -import os -import re - -HERE = os.path.dirname(os.path.abspath(__file__)) -BLAKE2 = os.path.join(HERE, 'impl') - -PUBLIC_SEARCH = re.compile(r'\ int (blake2[bs]p?[a-z_]*)\(') - - -def getfiles(): - for name in os.listdir(BLAKE2): - name = os.path.join(BLAKE2, name) - if os.path.isfile(name): - yield name - - -def find_public(): - public_funcs = set() - for name in getfiles(): - with open(name) as f: - for line in f: - # find public functions - mo = PUBLIC_SEARCH.search(line) - if mo: - public_funcs.add(mo.group(1)) - - for f in sorted(public_funcs): - print('#define {0:<18} PyBlake2_{0}'.format(f)) - - return public_funcs - - -def main(): - lines = [] - with open(os.path.join(HERE, 'blake2b_impl.c')) as f: - for line in f: - line = line.replace('blake2b', 'blake2s') - line = line.replace('BLAKE2b', 'BLAKE2s') - line = line.replace('BLAKE2B', 'BLAKE2S') - lines.append(line) - with open(os.path.join(HERE, 'blake2s_impl.c'), 'w') as f: - f.write(''.join(lines)) - # find_public() - - -if __name__ == '__main__': - main() diff --git a/Modules/_blake2/blake2b_impl.c b/Modules/_blake2/blake2b_impl.c deleted file mode 100644 index 0c3ae5a2fac..00000000000 --- a/Modules/_blake2/blake2b_impl.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Written in 2013 by Dmitry Chestnykh - * Modified for CPython by Christian Heimes - * - * To the extent possible under law, the author have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ - */ - -/* WARNING: autogenerated file! - * - * The blake2s_impl.c is autogenerated from blake2b_impl.c. - */ - -#ifndef Py_BUILD_CORE_BUILTIN -# define Py_BUILD_CORE_MODULE 1 -#endif - -#include -#include "Python.h" -#include "pycore_strhex.h" // _Py_strhex() - -#include "../hashlib.h" -#include "blake2module.h" - -#ifndef HAVE_LIBB2 -/* pure SSE2 implementation is very slow, so only use the more optimized SSSE3+ - * https://bugs.python.org/issue31834 */ -#if defined(__SSSE3__) || defined(__SSE4_1__) || defined(__AVX__) || defined(__XOP__) -#include "impl/blake2b.c" -#else -#include "impl/blake2b-ref.c" -#endif -#endif // !HAVE_LIBB2 - -#define HAVE_BLAKE2B 1 - -extern PyType_Spec blake2b_type_spec; - - -typedef struct { - PyObject_HEAD - blake2b_param param; - blake2b_state state; - bool use_mutex; - PyMutex mutex; -} BLAKE2bObject; - -#include "clinic/blake2b_impl.c.h" - -/*[clinic input] -module _blake2 -class _blake2.blake2b "BLAKE2bObject *" "&PyBlake2_BLAKE2bType" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=d47b0527b39c673f]*/ - - -static BLAKE2bObject * -new_BLAKE2bObject(PyTypeObject *type) -{ - BLAKE2bObject *self; - self = (BLAKE2bObject *)type->tp_alloc(type, 0); - if (self == NULL) { - return NULL; - } - HASHLIB_INIT_MUTEX(self); - - return self; -} - -/*[clinic input] -@classmethod -_blake2.blake2b.__new__ as py_blake2b_new - data: object(c_default="NULL") = b'' - / - * - digest_size: int(c_default="BLAKE2B_OUTBYTES") = _blake2.blake2b.MAX_DIGEST_SIZE - key: Py_buffer(c_default="NULL", py_default="b''") = None - salt: Py_buffer(c_default="NULL", py_default="b''") = None - person: Py_buffer(c_default="NULL", py_default="b''") = None - fanout: int = 1 - depth: int = 1 - leaf_size: unsigned_long = 0 - node_offset: unsigned_long_long = 0 - node_depth: int = 0 - inner_size: int = 0 - last_node: bool = False - usedforsecurity: bool = True - -Return a new BLAKE2b hash object. -[clinic start generated code]*/ - -static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, - Py_buffer *key, Py_buffer *salt, Py_buffer *person, - int fanout, int depth, unsigned long leaf_size, - unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=32bfd8f043c6896f input=b947312abff46977]*/ -{ - BLAKE2bObject *self = NULL; - Py_buffer buf; - - self = new_BLAKE2bObject(type); - if (self == NULL) { - goto error; - } - - /* Zero parameter block. */ - memset(&self->param, 0, sizeof(self->param)); - - /* Set digest size. */ - if (digest_size <= 0 || digest_size > BLAKE2B_OUTBYTES) { - PyErr_Format(PyExc_ValueError, - "digest_size must be between 1 and %d bytes", - BLAKE2B_OUTBYTES); - goto error; - } - self->param.digest_length = digest_size; - - /* Set salt parameter. */ - if ((salt->obj != NULL) && salt->len) { - if (salt->len > BLAKE2B_SALTBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum salt length is %d bytes", - BLAKE2B_SALTBYTES); - goto error; - } - memcpy(self->param.salt, salt->buf, salt->len); - } - - /* Set personalization parameter. */ - if ((person->obj != NULL) && person->len) { - if (person->len > BLAKE2B_PERSONALBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum person length is %d bytes", - BLAKE2B_PERSONALBYTES); - goto error; - } - memcpy(self->param.personal, person->buf, person->len); - } - - /* Set tree parameters. */ - if (fanout < 0 || fanout > 255) { - PyErr_SetString(PyExc_ValueError, - "fanout must be between 0 and 255"); - goto error; - } - self->param.fanout = (uint8_t)fanout; - - if (depth <= 0 || depth > 255) { - PyErr_SetString(PyExc_ValueError, - "depth must be between 1 and 255"); - goto error; - } - self->param.depth = (uint8_t)depth; - - if (leaf_size > 0xFFFFFFFFU) { - PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); - goto error; - } - // NB: Simple assignment here would be incorrect on big endian platforms. - store32(&(self->param.leaf_length), leaf_size); - -#ifdef HAVE_BLAKE2S - if (node_offset > 0xFFFFFFFFFFFFULL) { - /* maximum 2**48 - 1 */ - PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); - goto error; - } - store48(&(self->param.node_offset), node_offset); -#else - // NB: Simple assignment here would be incorrect on big endian platforms. - store64(&(self->param.node_offset), node_offset); -#endif - - if (node_depth < 0 || node_depth > 255) { - PyErr_SetString(PyExc_ValueError, - "node_depth must be between 0 and 255"); - goto error; - } - self->param.node_depth = node_depth; - - if (inner_size < 0 || inner_size > BLAKE2B_OUTBYTES) { - PyErr_Format(PyExc_ValueError, - "inner_size must be between 0 and is %d", - BLAKE2B_OUTBYTES); - goto error; - } - self->param.inner_length = inner_size; - - /* Set key length. */ - if ((key->obj != NULL) && key->len) { - if (key->len > BLAKE2B_KEYBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum key length is %d bytes", - BLAKE2B_KEYBYTES); - goto error; - } - self->param.key_length = (uint8_t)key->len; - } - - /* Initialize hash state. */ - if (blake2b_init_param(&self->state, &self->param) < 0) { - PyErr_SetString(PyExc_RuntimeError, - "error initializing hash state"); - goto error; - } - - /* Set last node flag (must come after initialization). */ - self->state.last_node = last_node; - - /* Process key block if any. */ - if (self->param.key_length) { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset(block, 0, sizeof(block)); - memcpy(block, key->buf, key->len); - blake2b_update(&self->state, block, sizeof(block)); - secure_zero_memory(block, sizeof(block)); - } - - /* Process initial data if any. */ - if (data != NULL) { - GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - - if (buf.len >= HASHLIB_GIL_MINSIZE) { - Py_BEGIN_ALLOW_THREADS - blake2b_update(&self->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } else { - blake2b_update(&self->state, buf.buf, buf.len); - } - PyBuffer_Release(&buf); - } - - return (PyObject *)self; - - error: - if (self != NULL) { - Py_DECREF(self); - } - return NULL; -} - -/*[clinic input] -_blake2.blake2b.copy - -Return a copy of the hash object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2b_copy_impl(BLAKE2bObject *self) -/*[clinic end generated code: output=ff6acee5f93656ae input=e383c2d199fd8a2e]*/ -{ - BLAKE2bObject *cpy; - - if ((cpy = new_BLAKE2bObject(Py_TYPE(self))) == NULL) - return NULL; - - ENTER_HASHLIB(self); - cpy->param = self->param; - cpy->state = self->state; - LEAVE_HASHLIB(self); - return (PyObject *)cpy; -} - -/*[clinic input] -_blake2.blake2b.update - - data: object - / - -Update this hash object's state with the provided bytes-like object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2b_update(BLAKE2bObject *self, PyObject *data) -/*[clinic end generated code: output=010dfcbe22654359 input=ffc4aa6a6a225d31]*/ -{ - Py_buffer buf; - - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - blake2b_update(&self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - blake2b_update(&self->state, buf.buf, buf.len); - } - - PyBuffer_Release(&buf); - - Py_RETURN_NONE; -} - -/*[clinic input] -_blake2.blake2b.digest - -Return the digest value as a bytes object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2b_digest_impl(BLAKE2bObject *self) -/*[clinic end generated code: output=a5864660f4bfc61a input=7d21659e9c5fff02]*/ -{ - uint8_t digest[BLAKE2B_OUTBYTES]; - blake2b_state state_cpy; - - ENTER_HASHLIB(self); - state_cpy = self->state; - blake2b_final(&state_cpy, digest, self->param.digest_length); - LEAVE_HASHLIB(self); - return PyBytes_FromStringAndSize((const char *)digest, - self->param.digest_length); -} - -/*[clinic input] -_blake2.blake2b.hexdigest - -Return the digest value as a string of hexadecimal digits. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2b_hexdigest_impl(BLAKE2bObject *self) -/*[clinic end generated code: output=b5598a87d8794a60 input=76930f6946351f56]*/ -{ - uint8_t digest[BLAKE2B_OUTBYTES]; - blake2b_state state_cpy; - - ENTER_HASHLIB(self); - state_cpy = self->state; - blake2b_final(&state_cpy, digest, self->param.digest_length); - LEAVE_HASHLIB(self); - return _Py_strhex((const char *)digest, self->param.digest_length); -} - - -static PyMethodDef py_blake2b_methods[] = { - _BLAKE2_BLAKE2B_COPY_METHODDEF - _BLAKE2_BLAKE2B_DIGEST_METHODDEF - _BLAKE2_BLAKE2B_HEXDIGEST_METHODDEF - _BLAKE2_BLAKE2B_UPDATE_METHODDEF - {NULL, NULL} -}; - - - -static PyObject * -py_blake2b_get_name(BLAKE2bObject *self, void *closure) -{ - return PyUnicode_FromString("blake2b"); -} - - - -static PyObject * -py_blake2b_get_block_size(BLAKE2bObject *self, void *closure) -{ - return PyLong_FromLong(BLAKE2B_BLOCKBYTES); -} - - - -static PyObject * -py_blake2b_get_digest_size(BLAKE2bObject *self, void *closure) -{ - return PyLong_FromLong(self->param.digest_length); -} - - -static PyGetSetDef py_blake2b_getsetters[] = { - {"name", (getter)py_blake2b_get_name, - NULL, NULL, NULL}, - {"block_size", (getter)py_blake2b_get_block_size, - NULL, NULL, NULL}, - {"digest_size", (getter)py_blake2b_get_digest_size, - NULL, NULL, NULL}, - {NULL} -}; - - -static void -py_blake2b_dealloc(PyObject *self) -{ - BLAKE2bObject *obj = (BLAKE2bObject *)self; - - /* Try not to leave state in memory. */ - secure_zero_memory(&obj->param, sizeof(obj->param)); - secure_zero_memory(&obj->state, sizeof(obj->state)); - - PyTypeObject *type = Py_TYPE(self); - PyObject_Free(self); - Py_DECREF(type); -} - -static PyType_Slot blake2b_type_slots[] = { - {Py_tp_dealloc, py_blake2b_dealloc}, - {Py_tp_doc, (char *)py_blake2b_new__doc__}, - {Py_tp_methods, py_blake2b_methods}, - {Py_tp_getset, py_blake2b_getsetters}, - {Py_tp_new, py_blake2b_new}, - {0,0} -}; - -PyType_Spec blake2b_type_spec = { - .name = "_blake2.blake2b", - .basicsize = sizeof(BLAKE2bObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, - .slots = blake2b_type_slots -}; diff --git a/Modules/_blake2/blake2module.c b/Modules/_blake2/blake2module.c deleted file mode 100644 index 78242214764..00000000000 --- a/Modules/_blake2/blake2module.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Written in 2013 by Dmitry Chestnykh - * Modified for CPython by Christian Heimes - * - * To the extent possible under law, the author have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ - */ - -#ifndef Py_BUILD_CORE_BUILTIN -# define Py_BUILD_CORE_MODULE 1 -#endif - -#include "Python.h" -#include "blake2module.h" - -extern PyType_Spec blake2b_type_spec; -extern PyType_Spec blake2s_type_spec; - -PyDoc_STRVAR(blake2mod__doc__, -"_blake2b provides BLAKE2b for hashlib\n" -); - -typedef struct { - PyTypeObject* blake2b_type; - PyTypeObject* blake2s_type; -} Blake2State; - -static inline Blake2State* -blake2_get_state(PyObject *module) -{ - void *state = PyModule_GetState(module); - assert(state != NULL); - return (Blake2State *)state; -} - -static struct PyMethodDef blake2mod_functions[] = { - {NULL, NULL} -}; - -static int -_blake2_traverse(PyObject *module, visitproc visit, void *arg) -{ - Blake2State *state = blake2_get_state(module); - Py_VISIT(state->blake2b_type); - Py_VISIT(state->blake2s_type); - return 0; -} - -static int -_blake2_clear(PyObject *module) -{ - Blake2State *state = blake2_get_state(module); - Py_CLEAR(state->blake2b_type); - Py_CLEAR(state->blake2s_type); - return 0; -} - -static void -_blake2_free(void *module) -{ - _blake2_clear((PyObject *)module); -} - -#define ADD_INT(d, name, value) do { \ - PyObject *x = PyLong_FromLong(value); \ - if (!x) \ - return -1; \ - if (PyDict_SetItemString(d, name, x) < 0) { \ - Py_DECREF(x); \ - return -1; \ - } \ - Py_DECREF(x); \ -} while(0) - -#define ADD_INT_CONST(NAME, VALUE) do { \ - if (PyModule_AddIntConstant(m, NAME, VALUE) < 0) { \ - return -1; \ - } \ -} while (0) - -static int -blake2_exec(PyObject *m) -{ - Blake2State* st = blake2_get_state(m); - - st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( - m, &blake2b_type_spec, NULL); - - if (NULL == st->blake2b_type) - return -1; - /* BLAKE2b */ - if (PyModule_AddType(m, st->blake2b_type) < 0) { - return -1; - } - - PyObject *d = st->blake2b_type->tp_dict; - ADD_INT(d, "SALT_SIZE", BLAKE2B_SALTBYTES); - ADD_INT(d, "PERSON_SIZE", BLAKE2B_PERSONALBYTES); - ADD_INT(d, "MAX_KEY_SIZE", BLAKE2B_KEYBYTES); - ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); - - ADD_INT_CONST("BLAKE2B_SALT_SIZE", BLAKE2B_SALTBYTES); - ADD_INT_CONST("BLAKE2B_PERSON_SIZE", BLAKE2B_PERSONALBYTES); - ADD_INT_CONST("BLAKE2B_MAX_KEY_SIZE", BLAKE2B_KEYBYTES); - ADD_INT_CONST("BLAKE2B_MAX_DIGEST_SIZE", BLAKE2B_OUTBYTES); - - /* BLAKE2s */ - st->blake2s_type = (PyTypeObject *)PyType_FromModuleAndSpec( - m, &blake2s_type_spec, NULL); - - if (NULL == st->blake2s_type) - return -1; - - if (PyModule_AddType(m, st->blake2s_type) < 0) { - return -1; - } - - d = st->blake2s_type->tp_dict; - ADD_INT(d, "SALT_SIZE", BLAKE2S_SALTBYTES); - ADD_INT(d, "PERSON_SIZE", BLAKE2S_PERSONALBYTES); - ADD_INT(d, "MAX_KEY_SIZE", BLAKE2S_KEYBYTES); - ADD_INT(d, "MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); - - ADD_INT_CONST("BLAKE2S_SALT_SIZE", BLAKE2S_SALTBYTES); - ADD_INT_CONST("BLAKE2S_PERSON_SIZE", BLAKE2S_PERSONALBYTES); - ADD_INT_CONST("BLAKE2S_MAX_KEY_SIZE", BLAKE2S_KEYBYTES); - ADD_INT_CONST("BLAKE2S_MAX_DIGEST_SIZE", BLAKE2S_OUTBYTES); - - return 0; -} - -#undef ADD_INT -#undef ADD_INT_CONST - -static PyModuleDef_Slot _blake2_slots[] = { - {Py_mod_exec, blake2_exec}, - {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, - {Py_mod_gil, Py_MOD_GIL_NOT_USED}, - {0, NULL} -}; - -static struct PyModuleDef blake2_module = { - PyModuleDef_HEAD_INIT, - "_blake2", - .m_doc = blake2mod__doc__, - .m_size = sizeof(Blake2State), - .m_methods = blake2mod_functions, - .m_slots = _blake2_slots, - .m_traverse = _blake2_traverse, - .m_clear = _blake2_clear, - .m_free = _blake2_free, -}; - -PyMODINIT_FUNC -PyInit__blake2(void) -{ - return PyModuleDef_Init(&blake2_module); -} diff --git a/Modules/_blake2/blake2module.h b/Modules/_blake2/blake2module.h deleted file mode 100644 index c8144ec9d48..00000000000 --- a/Modules/_blake2/blake2module.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef Py_BLAKE2MODULE_H -#define Py_BLAKE2MODULE_H - -#ifdef HAVE_LIBB2 -#include - -#else -// use vendored copy of blake2 - -// Prefix all public blake2 symbols with PyBlake2_ -#define blake2b PyBlake2_blake2b -#define blake2b_compress PyBlake2_blake2b_compress -#define blake2b_final PyBlake2_blake2b_final -#define blake2b_init PyBlake2_blake2b_init -#define blake2b_init_key PyBlake2_blake2b_init_key -#define blake2b_init_param PyBlake2_blake2b_init_param -#define blake2b_update PyBlake2_blake2b_update -#define blake2bp PyBlake2_blake2bp -#define blake2bp_final PyBlake2_blake2bp_final -#define blake2bp_init PyBlake2_blake2bp_init -#define blake2bp_init_key PyBlake2_blake2bp_init_key -#define blake2bp_update PyBlake2_blake2bp_update -#define blake2s PyBlake2_blake2s -#define blake2s_compress PyBlake2_blake2s_compress -#define blake2s_final PyBlake2_blake2s_final -#define blake2s_init PyBlake2_blake2s_init -#define blake2s_init_key PyBlake2_blake2s_init_key -#define blake2s_init_param PyBlake2_blake2s_init_param -#define blake2s_update PyBlake2_blake2s_update -#define blake2sp PyBlake2_blake2sp -#define blake2sp_final PyBlake2_blake2sp_final -#define blake2sp_init PyBlake2_blake2sp_init -#define blake2sp_init_key PyBlake2_blake2sp_init_key -#define blake2sp_update PyBlake2_blake2sp_update - -#include "impl/blake2.h" - -#endif // HAVE_LIBB2 - -// for secure_zero_memory(), store32(), store48(), and store64() -#include "impl/blake2-impl.h" - -#endif // Py_BLAKE2MODULE_H diff --git a/Modules/_blake2/blake2s_impl.c b/Modules/_blake2/blake2s_impl.c deleted file mode 100644 index 3014773ab52..00000000000 --- a/Modules/_blake2/blake2s_impl.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Written in 2013 by Dmitry Chestnykh - * Modified for CPython by Christian Heimes - * - * To the extent possible under law, the author have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ - */ - -/* WARNING: autogenerated file! - * - * The blake2s_impl.c is autogenerated from blake2s_impl.c. - */ - -#ifndef Py_BUILD_CORE_BUILTIN -# define Py_BUILD_CORE_MODULE 1 -#endif - -#include -#include "Python.h" -#include "pycore_strhex.h" // _Py_strhex() - -#include "../hashlib.h" -#include "blake2module.h" - -#ifndef HAVE_LIBB2 -/* pure SSE2 implementation is very slow, so only use the more optimized SSSE3+ - * https://bugs.python.org/issue31834 */ -#if defined(__SSSE3__) || defined(__SSE4_1__) || defined(__AVX__) || defined(__XOP__) -#include "impl/blake2s.c" -#else -#include "impl/blake2s-ref.c" -#endif -#endif // !HAVE_LIBB2 - -#define HAVE_BLAKE2S 1 - -extern PyType_Spec blake2s_type_spec; - - -typedef struct { - PyObject_HEAD - blake2s_param param; - blake2s_state state; - bool use_mutex; - PyMutex mutex; -} BLAKE2sObject; - -#include "clinic/blake2s_impl.c.h" - -/*[clinic input] -module _blake2 -class _blake2.blake2s "BLAKE2sObject *" "&PyBlake2_BLAKE2sType" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b79d7ffe07286ce]*/ - - -static BLAKE2sObject * -new_BLAKE2sObject(PyTypeObject *type) -{ - BLAKE2sObject *self; - self = (BLAKE2sObject *)type->tp_alloc(type, 0); - if (self == NULL) { - return NULL; - } - HASHLIB_INIT_MUTEX(self); - - return self; -} - -/*[clinic input] -@classmethod -_blake2.blake2s.__new__ as py_blake2s_new - data: object(c_default="NULL") = b'' - / - * - digest_size: int(c_default="BLAKE2S_OUTBYTES") = _blake2.blake2s.MAX_DIGEST_SIZE - key: Py_buffer(c_default="NULL", py_default="b''") = None - salt: Py_buffer(c_default="NULL", py_default="b''") = None - person: Py_buffer(c_default="NULL", py_default="b''") = None - fanout: int = 1 - depth: int = 1 - leaf_size: unsigned_long = 0 - node_offset: unsigned_long_long = 0 - node_depth: int = 0 - inner_size: int = 0 - last_node: bool = False - usedforsecurity: bool = True - -Return a new BLAKE2s hash object. -[clinic start generated code]*/ - -static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, - Py_buffer *key, Py_buffer *salt, Py_buffer *person, - int fanout, int depth, unsigned long leaf_size, - unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=556181f73905c686 input=4dda87723f23abb0]*/ -{ - BLAKE2sObject *self = NULL; - Py_buffer buf; - - self = new_BLAKE2sObject(type); - if (self == NULL) { - goto error; - } - - /* Zero parameter block. */ - memset(&self->param, 0, sizeof(self->param)); - - /* Set digest size. */ - if (digest_size <= 0 || digest_size > BLAKE2S_OUTBYTES) { - PyErr_Format(PyExc_ValueError, - "digest_size must be between 1 and %d bytes", - BLAKE2S_OUTBYTES); - goto error; - } - self->param.digest_length = digest_size; - - /* Set salt parameter. */ - if ((salt->obj != NULL) && salt->len) { - if (salt->len > BLAKE2S_SALTBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum salt length is %d bytes", - BLAKE2S_SALTBYTES); - goto error; - } - memcpy(self->param.salt, salt->buf, salt->len); - } - - /* Set personalization parameter. */ - if ((person->obj != NULL) && person->len) { - if (person->len > BLAKE2S_PERSONALBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum person length is %d bytes", - BLAKE2S_PERSONALBYTES); - goto error; - } - memcpy(self->param.personal, person->buf, person->len); - } - - /* Set tree parameters. */ - if (fanout < 0 || fanout > 255) { - PyErr_SetString(PyExc_ValueError, - "fanout must be between 0 and 255"); - goto error; - } - self->param.fanout = (uint8_t)fanout; - - if (depth <= 0 || depth > 255) { - PyErr_SetString(PyExc_ValueError, - "depth must be between 1 and 255"); - goto error; - } - self->param.depth = (uint8_t)depth; - - if (leaf_size > 0xFFFFFFFFU) { - PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); - goto error; - } - // NB: Simple assignment here would be incorrect on big endian platforms. - store32(&(self->param.leaf_length), leaf_size); - -#ifdef HAVE_BLAKE2S - if (node_offset > 0xFFFFFFFFFFFFULL) { - /* maximum 2**48 - 1 */ - PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); - goto error; - } - store48(&(self->param.node_offset), node_offset); -#else - // NB: Simple assignment here would be incorrect on big endian platforms. - store64(&(self->param.node_offset), node_offset); -#endif - - if (node_depth < 0 || node_depth > 255) { - PyErr_SetString(PyExc_ValueError, - "node_depth must be between 0 and 255"); - goto error; - } - self->param.node_depth = node_depth; - - if (inner_size < 0 || inner_size > BLAKE2S_OUTBYTES) { - PyErr_Format(PyExc_ValueError, - "inner_size must be between 0 and is %d", - BLAKE2S_OUTBYTES); - goto error; - } - self->param.inner_length = inner_size; - - /* Set key length. */ - if ((key->obj != NULL) && key->len) { - if (key->len > BLAKE2S_KEYBYTES) { - PyErr_Format(PyExc_ValueError, - "maximum key length is %d bytes", - BLAKE2S_KEYBYTES); - goto error; - } - self->param.key_length = (uint8_t)key->len; - } - - /* Initialize hash state. */ - if (blake2s_init_param(&self->state, &self->param) < 0) { - PyErr_SetString(PyExc_RuntimeError, - "error initializing hash state"); - goto error; - } - - /* Set last node flag (must come after initialization). */ - self->state.last_node = last_node; - - /* Process key block if any. */ - if (self->param.key_length) { - uint8_t block[BLAKE2S_BLOCKBYTES]; - memset(block, 0, sizeof(block)); - memcpy(block, key->buf, key->len); - blake2s_update(&self->state, block, sizeof(block)); - secure_zero_memory(block, sizeof(block)); - } - - /* Process initial data if any. */ - if (data != NULL) { - GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - - if (buf.len >= HASHLIB_GIL_MINSIZE) { - Py_BEGIN_ALLOW_THREADS - blake2s_update(&self->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } else { - blake2s_update(&self->state, buf.buf, buf.len); - } - PyBuffer_Release(&buf); - } - - return (PyObject *)self; - - error: - if (self != NULL) { - Py_DECREF(self); - } - return NULL; -} - -/*[clinic input] -_blake2.blake2s.copy - -Return a copy of the hash object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2s_copy_impl(BLAKE2sObject *self) -/*[clinic end generated code: output=5b90131c4eae275e input=0b9d44942f0fe4b2]*/ -{ - BLAKE2sObject *cpy; - - if ((cpy = new_BLAKE2sObject(Py_TYPE(self))) == NULL) - return NULL; - - ENTER_HASHLIB(self); - cpy->param = self->param; - cpy->state = self->state; - LEAVE_HASHLIB(self); - return (PyObject *)cpy; -} - -/*[clinic input] -_blake2.blake2s.update - - data: object - / - -Update this hash object's state with the provided bytes-like object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2s_update(BLAKE2sObject *self, PyObject *data) -/*[clinic end generated code: output=757dc087fec37815 input=97500db2f9de4aaa]*/ -{ - Py_buffer buf; - - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - blake2s_update(&self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - blake2s_update(&self->state, buf.buf, buf.len); - } - - PyBuffer_Release(&buf); - - Py_RETURN_NONE; -} - -/*[clinic input] -_blake2.blake2s.digest - -Return the digest value as a bytes object. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2s_digest_impl(BLAKE2sObject *self) -/*[clinic end generated code: output=40c566ca4bc6bc51 input=f41e0b8d6d937454]*/ -{ - uint8_t digest[BLAKE2S_OUTBYTES]; - blake2s_state state_cpy; - - ENTER_HASHLIB(self); - state_cpy = self->state; - blake2s_final(&state_cpy, digest, self->param.digest_length); - LEAVE_HASHLIB(self); - return PyBytes_FromStringAndSize((const char *)digest, - self->param.digest_length); -} - -/*[clinic input] -_blake2.blake2s.hexdigest - -Return the digest value as a string of hexadecimal digits. -[clinic start generated code]*/ - -static PyObject * -_blake2_blake2s_hexdigest_impl(BLAKE2sObject *self) -/*[clinic end generated code: output=15153eb5e59c52eb input=c77a1321567e8952]*/ -{ - uint8_t digest[BLAKE2S_OUTBYTES]; - blake2s_state state_cpy; - - ENTER_HASHLIB(self); - state_cpy = self->state; - blake2s_final(&state_cpy, digest, self->param.digest_length); - LEAVE_HASHLIB(self); - return _Py_strhex((const char *)digest, self->param.digest_length); -} - - -static PyMethodDef py_blake2s_methods[] = { - _BLAKE2_BLAKE2S_COPY_METHODDEF - _BLAKE2_BLAKE2S_DIGEST_METHODDEF - _BLAKE2_BLAKE2S_HEXDIGEST_METHODDEF - _BLAKE2_BLAKE2S_UPDATE_METHODDEF - {NULL, NULL} -}; - - - -static PyObject * -py_blake2s_get_name(BLAKE2sObject *self, void *closure) -{ - return PyUnicode_FromString("blake2s"); -} - - - -static PyObject * -py_blake2s_get_block_size(BLAKE2sObject *self, void *closure) -{ - return PyLong_FromLong(BLAKE2S_BLOCKBYTES); -} - - - -static PyObject * -py_blake2s_get_digest_size(BLAKE2sObject *self, void *closure) -{ - return PyLong_FromLong(self->param.digest_length); -} - - -static PyGetSetDef py_blake2s_getsetters[] = { - {"name", (getter)py_blake2s_get_name, - NULL, NULL, NULL}, - {"block_size", (getter)py_blake2s_get_block_size, - NULL, NULL, NULL}, - {"digest_size", (getter)py_blake2s_get_digest_size, - NULL, NULL, NULL}, - {NULL} -}; - - -static void -py_blake2s_dealloc(PyObject *self) -{ - BLAKE2sObject *obj = (BLAKE2sObject *)self; - - /* Try not to leave state in memory. */ - secure_zero_memory(&obj->param, sizeof(obj->param)); - secure_zero_memory(&obj->state, sizeof(obj->state)); - - PyTypeObject *type = Py_TYPE(self); - PyObject_Free(self); - Py_DECREF(type); -} - -static PyType_Slot blake2s_type_slots[] = { - {Py_tp_dealloc, py_blake2s_dealloc}, - {Py_tp_doc, (char *)py_blake2s_new__doc__}, - {Py_tp_methods, py_blake2s_methods}, - {Py_tp_getset, py_blake2s_getsetters}, - {Py_tp_new, py_blake2s_new}, - {0,0} -}; - -PyType_Spec blake2s_type_spec = { - .name = "_blake2.blake2s", - .basicsize = sizeof(BLAKE2sObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, - .slots = blake2s_type_slots -}; diff --git a/Modules/_blake2/clinic/blake2s_impl.c.h b/Modules/_blake2/clinic/blake2s_impl.c.h deleted file mode 100644 index 7a0f6eeff5b..00000000000 --- a/Modules/_blake2/clinic/blake2s_impl.c.h +++ /dev/null @@ -1,268 +0,0 @@ -/*[clinic input] -preserve -[clinic start generated code]*/ - -#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) -# include "pycore_gc.h" // PyGC_Head -# include "pycore_runtime.h" // _Py_ID() -#endif -#include "pycore_long.h" // _PyLong_UnsignedLong_Converter() -#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() - -PyDoc_STRVAR(py_blake2s_new__doc__, -"blake2s(data=b\'\', /, *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" -" key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" -" node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" -"--\n" -"\n" -"Return a new BLAKE2s hash object."); - -static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, - Py_buffer *key, Py_buffer *salt, Py_buffer *person, - int fanout, int depth, unsigned long leaf_size, - unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); - -static PyObject * -py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 12 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "blake2s", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[13]; - PyObject * const *fastargs; - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; - int digest_size = BLAKE2S_OUTBYTES; - Py_buffer key = {NULL, NULL}; - Py_buffer salt = {NULL, NULL}; - Py_buffer person = {NULL, NULL}; - int fanout = 1; - int depth = 1; - unsigned long leaf_size = 0; - unsigned long long node_offset = 0; - int node_depth = 0; - int inner_size = 0; - int last_node = 0; - int usedforsecurity = 1; - - fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 1, 0, argsbuf); - if (!fastargs) { - goto exit; - } - if (nargs < 1) { - goto skip_optional_posonly; - } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: - if (!noptargs) { - goto skip_optional_kwonly; - } - if (fastargs[1]) { - digest_size = PyLong_AsInt(fastargs[1]); - if (digest_size == -1 && PyErr_Occurred()) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[2]) { - if (PyObject_GetBuffer(fastargs[2], &key, PyBUF_SIMPLE) != 0) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[3]) { - if (PyObject_GetBuffer(fastargs[3], &salt, PyBUF_SIMPLE) != 0) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[4]) { - if (PyObject_GetBuffer(fastargs[4], &person, PyBUF_SIMPLE) != 0) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[5]) { - fanout = PyLong_AsInt(fastargs[5]); - if (fanout == -1 && PyErr_Occurred()) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[6]) { - depth = PyLong_AsInt(fastargs[6]); - if (depth == -1 && PyErr_Occurred()) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[7]) { - if (!_PyLong_UnsignedLong_Converter(fastargs[7], &leaf_size)) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[8]) { - if (!_PyLong_UnsignedLongLong_Converter(fastargs[8], &node_offset)) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[9]) { - node_depth = PyLong_AsInt(fastargs[9]); - if (node_depth == -1 && PyErr_Occurred()) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[10]) { - inner_size = PyLong_AsInt(fastargs[10]); - if (inner_size == -1 && PyErr_Occurred()) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (fastargs[11]) { - last_node = PyObject_IsTrue(fastargs[11]); - if (last_node < 0) { - goto exit; - } - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; - } -skip_optional_kwonly: - return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); - -exit: - /* Cleanup for key */ - if (key.obj) { - PyBuffer_Release(&key); - } - /* Cleanup for salt */ - if (salt.obj) { - PyBuffer_Release(&salt); - } - /* Cleanup for person */ - if (person.obj) { - PyBuffer_Release(&person); - } - - return return_value; -} - -PyDoc_STRVAR(_blake2_blake2s_copy__doc__, -"copy($self, /)\n" -"--\n" -"\n" -"Return a copy of the hash object."); - -#define _BLAKE2_BLAKE2S_COPY_METHODDEF \ - {"copy", (PyCFunction)_blake2_blake2s_copy, METH_NOARGS, _blake2_blake2s_copy__doc__}, - -static PyObject * -_blake2_blake2s_copy_impl(BLAKE2sObject *self); - -static PyObject * -_blake2_blake2s_copy(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) -{ - return _blake2_blake2s_copy_impl(self); -} - -PyDoc_STRVAR(_blake2_blake2s_update__doc__, -"update($self, data, /)\n" -"--\n" -"\n" -"Update this hash object\'s state with the provided bytes-like object."); - -#define _BLAKE2_BLAKE2S_UPDATE_METHODDEF \ - {"update", (PyCFunction)_blake2_blake2s_update, METH_O, _blake2_blake2s_update__doc__}, - -PyDoc_STRVAR(_blake2_blake2s_digest__doc__, -"digest($self, /)\n" -"--\n" -"\n" -"Return the digest value as a bytes object."); - -#define _BLAKE2_BLAKE2S_DIGEST_METHODDEF \ - {"digest", (PyCFunction)_blake2_blake2s_digest, METH_NOARGS, _blake2_blake2s_digest__doc__}, - -static PyObject * -_blake2_blake2s_digest_impl(BLAKE2sObject *self); - -static PyObject * -_blake2_blake2s_digest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) -{ - return _blake2_blake2s_digest_impl(self); -} - -PyDoc_STRVAR(_blake2_blake2s_hexdigest__doc__, -"hexdigest($self, /)\n" -"--\n" -"\n" -"Return the digest value as a string of hexadecimal digits."); - -#define _BLAKE2_BLAKE2S_HEXDIGEST_METHODDEF \ - {"hexdigest", (PyCFunction)_blake2_blake2s_hexdigest, METH_NOARGS, _blake2_blake2s_hexdigest__doc__}, - -static PyObject * -_blake2_blake2s_hexdigest_impl(BLAKE2sObject *self); - -static PyObject * -_blake2_blake2s_hexdigest(BLAKE2sObject *self, PyObject *Py_UNUSED(ignored)) -{ - return _blake2_blake2s_hexdigest_impl(self); -} -/*[clinic end generated code: output=24690e4e2586cafd input=a9049054013a1b77]*/ diff --git a/Modules/_blake2/impl/blake2-config.h b/Modules/_blake2/impl/blake2-config.h deleted file mode 100644 index c09cb4bcf06..00000000000 --- a/Modules/_blake2/impl/blake2-config.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2_CONFIG_H__ -#define __BLAKE2_CONFIG_H__ - -#if defined(__SSE2__) -#define HAVE_SSE2 -#endif - -#if defined(__SSSE3__) -#define HAVE_SSSE3 -#endif - -#if defined(__SSE4_1__) -#define HAVE_SSE4_1 -#endif - -#if defined(__AVX__) -#define HAVE_AVX -#endif - -#if defined(__XOP__) -#define HAVE_XOP -#endif - - -#ifdef HAVE_AVX2 -#ifndef HAVE_AVX -#define HAVE_AVX -#endif -#endif - -#ifdef HAVE_XOP -#ifndef HAVE_AVX -#define HAVE_AVX -#endif -#endif - -#ifdef HAVE_AVX -#ifndef HAVE_SSE4_1 -#define HAVE_SSE4_1 -#endif -#endif - -#ifdef HAVE_SSE4_1 -#ifndef HAVE_SSSE3 -#define HAVE_SSSE3 -#endif -#endif - -#ifdef HAVE_SSSE3 -#define HAVE_SSE2 -#endif - -#if !defined(HAVE_SSE2) -#error "This code requires at least SSE2." -#endif - -#endif - diff --git a/Modules/_blake2/impl/blake2-impl.h b/Modules/_blake2/impl/blake2-impl.h deleted file mode 100644 index 9d2fbb72fc1..00000000000 --- a/Modules/_blake2/impl/blake2-impl.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2_IMPL_H__ -#define __BLAKE2_IMPL_H__ - -#if defined(_WIN32) || defined(WIN32) -#include -#endif - -#include -#include -#include - -#define BLAKE2_IMPL_CAT(x,y) x ## y -#define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y) -#define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX) - -static inline uint32_t load32( const void *src ) -{ -#if defined(NATIVE_LITTLE_ENDIAN) - uint32_t w; - memcpy( &w, src, sizeof( w ) ); - return w; -#else - const uint8_t *p = ( uint8_t * )src; - uint32_t w = *p++; - w |= ( uint32_t )( *p++ ) << 8; - w |= ( uint32_t )( *p++ ) << 16; - w |= ( uint32_t )( *p++ ) << 24; - return w; -#endif -} - -static inline uint64_t load64( const void *src ) -{ -#if defined(NATIVE_LITTLE_ENDIAN) - uint64_t w; - memcpy( &w, src, sizeof( w ) ); - return w; -#else - const uint8_t *p = ( uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - w |= ( uint64_t )( *p++ ) << 48; - w |= ( uint64_t )( *p++ ) << 56; - return w; -#endif -} - -static inline void store32( void *dst, uint32_t w ) -{ -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy( dst, &w, sizeof( w ) ); -#else - uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; -#endif -} - -static inline void store64( void *dst, uint64_t w ) -{ -#if defined(NATIVE_LITTLE_ENDIAN) - memcpy( dst, &w, sizeof( w ) ); -#else - uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; -#endif -} - -static inline uint64_t load48( const void *src ) -{ - const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - return w; -} - -static inline void store48( void *dst, uint64_t w ) -{ - uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; -} - -static inline uint32_t rotl32( const uint32_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 32 - c ) ); -} - -static inline uint64_t rotl64( const uint64_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 64 - c ) ); -} - -static inline uint32_t rotr32( const uint32_t w, const unsigned c ) -{ - return ( w >> c ) | ( w << ( 32 - c ) ); -} - -static inline uint64_t rotr64( const uint64_t w, const unsigned c ) -{ - return ( w >> c ) | ( w << ( 64 - c ) ); -} - -/* prevents compiler optimizing out memset() */ -static inline void secure_zero_memory(void *v, size_t n) -{ -#if defined(_WIN32) || defined(WIN32) - SecureZeroMemory(v, n); -#elif defined(__hpux) - static void *(*const volatile memset_v)(void *, int, size_t) = &memset; - memset_v(v, 0, n); -#else -// prioritize first the general C11 call -#if defined(HAVE_MEMSET_S) - memset_s(v, n, 0, n); -#elif defined(HAVE_EXPLICIT_BZERO) - explicit_bzero(v, n); -#elif defined(HAVE_EXPLICIT_MEMSET) - explicit_memset(v, 0, n); -#else - memset(v, 0, n); - __asm__ __volatile__("" :: "r"(v) : "memory"); -#endif -#endif -} - -#endif - diff --git a/Modules/_blake2/impl/blake2.h b/Modules/_blake2/impl/blake2.h deleted file mode 100644 index a08d82efefe..00000000000 --- a/Modules/_blake2/impl/blake2.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2_H__ -#define __BLAKE2_H__ - -#include -#include - -#if defined(_WIN32) || defined(__CYGWIN__) - #define BLAKE2_DLL_IMPORT __declspec(dllimport) - #define BLAKE2_DLL_EXPORT __declspec(dllexport) - #define BLAKE2_DLL_PRIVATE -#elif __GNUC__ >= 4 - #define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) -#else - #define BLAKE2_DLL_IMPORT - #define BLAKE2_DLL_EXPORT - #define BLAKE2_DLL_PRIVATE -#endif - -#if defined(BLAKE2_DLL) - #if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL - #define BLAKE2_API BLAKE2_DLL_EXPORT - #else - #define BLAKE2_API BLAKE2_DLL_IMPORT - #endif - #define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic -#else - #define BLAKE2_API - #define BLAKE2_PRIVATE -#endif - -#if defined(__cplusplus) -extern "C" { -#elif defined(_MSC_VER) && !defined(inline) -#define inline __inline -#endif - - enum blake2s_constant - { - BLAKE2S_BLOCKBYTES = 64, - BLAKE2S_OUTBYTES = 32, - BLAKE2S_KEYBYTES = 32, - BLAKE2S_SALTBYTES = 8, - BLAKE2S_PERSONALBYTES = 8 - }; - - enum blake2b_constant - { - BLAKE2B_BLOCKBYTES = 128, - BLAKE2B_OUTBYTES = 64, - BLAKE2B_KEYBYTES = 64, - BLAKE2B_SALTBYTES = 16, - BLAKE2B_PERSONALBYTES = 16 - }; - -#pragma pack(push, 1) - typedef struct __blake2s_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint8_t node_offset[6];// 14 - uint8_t node_depth; // 15 - uint8_t inner_length; // 16 - // uint8_t reserved[0]; - uint8_t salt[BLAKE2S_SALTBYTES]; // 24 - uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32 - } blake2s_param; - - typedef struct __blake2s_state - { - uint32_t h[8]; - uint32_t t[2]; - uint32_t f[2]; - uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; - uint8_t last_node; - } blake2s_state; - - typedef struct __blake2b_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint64_t node_offset; // 16 - uint8_t node_depth; // 17 - uint8_t inner_length; // 18 - uint8_t reserved[14]; // 32 - uint8_t salt[BLAKE2B_SALTBYTES]; // 48 - uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64 - } blake2b_param; - - typedef struct __blake2b_state - { - uint64_t h[8]; - uint64_t t[2]; - uint64_t f[2]; - uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; - uint8_t last_node; - } blake2b_state; - - typedef struct __blake2sp_state - { - blake2s_state S[8][1]; - blake2s_state R[1]; - uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; - } blake2sp_state; - - typedef struct __blake2bp_state - { - blake2b_state S[4][1]; - blake2b_state R[1]; - uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; - } blake2bp_state; -#pragma pack(pop) - - // Streaming API - BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ); - BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - - BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ); - BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - - BLAKE2_API int blake2sp_init( blake2sp_state *S, size_t outlen ); - BLAKE2_API int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ); - - BLAKE2_API int blake2bp_init( blake2bp_state *S, size_t outlen ); - BLAKE2_API int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ); - - // Simple API - BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - - BLAKE2_API int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - -#if defined(__cplusplus) -} -#endif - -#endif - diff --git a/Modules/_blake2/impl/blake2b-load-sse2.h b/Modules/_blake2/impl/blake2b-load-sse2.h deleted file mode 100644 index 1ba153c87d7..00000000000 --- a/Modules/_blake2/impl/blake2b-load-sse2.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE2_H__ -#define __BLAKE2B_LOAD_SSE2_H__ - -#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) -#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) -#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) -#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) -#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) -#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) -#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) -#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5) -#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2) -#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7) -#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1) -#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13) -#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4) -#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0) -#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2) -#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4) -#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6) -#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8) -#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0) -#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11) -#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15) -#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14) -#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14) -#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13) -#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9) -#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2) -#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12) -#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1) -#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8) -#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6) -#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11) -#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3) -#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1) -#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4) -#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7) -#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6) -#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3) -#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12) -#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) -#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) -#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12) -#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13) -#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9) -#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15) -#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11) -#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7) - - -#endif - diff --git a/Modules/_blake2/impl/blake2b-load-sse41.h b/Modules/_blake2/impl/blake2b-load-sse41.h deleted file mode 100644 index f6c1bc8393f..00000000000 --- a/Modules/_blake2/impl/blake2b-load-sse41.h +++ /dev/null @@ -1,402 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE41_H__ -#define __BLAKE2B_LOAD_SSE41_H__ - -#define LOAD_MSG_0_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m0, m1); \ -b1 = _mm_unpacklo_epi64(m2, m3); \ -} while(0) - - -#define LOAD_MSG_0_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m0, m1); \ -b1 = _mm_unpackhi_epi64(m2, m3); \ -} while(0) - - -#define LOAD_MSG_0_3(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m4, m5); \ -b1 = _mm_unpacklo_epi64(m6, m7); \ -} while(0) - - -#define LOAD_MSG_0_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m4, m5); \ -b1 = _mm_unpackhi_epi64(m6, m7); \ -} while(0) - - -#define LOAD_MSG_1_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m7, m2); \ -b1 = _mm_unpackhi_epi64(m4, m6); \ -} while(0) - - -#define LOAD_MSG_1_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m5, m4); \ -b1 = _mm_alignr_epi8(m3, m7, 8); \ -} while(0) - - -#define LOAD_MSG_1_3(b0, b1) \ -do \ -{ \ -b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ -b1 = _mm_unpackhi_epi64(m5, m2); \ -} while(0) - - -#define LOAD_MSG_1_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m6, m1); \ -b1 = _mm_unpackhi_epi64(m3, m1); \ -} while(0) - - -#define LOAD_MSG_2_1(b0, b1) \ -do \ -{ \ -b0 = _mm_alignr_epi8(m6, m5, 8); \ -b1 = _mm_unpackhi_epi64(m2, m7); \ -} while(0) - - -#define LOAD_MSG_2_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m4, m0); \ -b1 = _mm_blend_epi16(m1, m6, 0xF0); \ -} while(0) - - -#define LOAD_MSG_2_3(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m5, m1, 0xF0); \ -b1 = _mm_unpackhi_epi64(m3, m4); \ -} while(0) - - -#define LOAD_MSG_2_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m7, m3); \ -b1 = _mm_alignr_epi8(m2, m0, 8); \ -} while(0) - - -#define LOAD_MSG_3_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m3, m1); \ -b1 = _mm_unpackhi_epi64(m6, m5); \ -} while(0) - - -#define LOAD_MSG_3_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m4, m0); \ -b1 = _mm_unpacklo_epi64(m6, m7); \ -} while(0) - - -#define LOAD_MSG_3_3(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m1, m2, 0xF0); \ -b1 = _mm_blend_epi16(m2, m7, 0xF0); \ -} while(0) - - -#define LOAD_MSG_3_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m3, m5); \ -b1 = _mm_unpacklo_epi64(m0, m4); \ -} while(0) - - -#define LOAD_MSG_4_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m4, m2); \ -b1 = _mm_unpacklo_epi64(m1, m5); \ -} while(0) - - -#define LOAD_MSG_4_2(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m0, m3, 0xF0); \ -b1 = _mm_blend_epi16(m2, m7, 0xF0); \ -} while(0) - - -#define LOAD_MSG_4_3(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m7, m5, 0xF0); \ -b1 = _mm_blend_epi16(m3, m1, 0xF0); \ -} while(0) - - -#define LOAD_MSG_4_4(b0, b1) \ -do \ -{ \ -b0 = _mm_alignr_epi8(m6, m0, 8); \ -b1 = _mm_blend_epi16(m4, m6, 0xF0); \ -} while(0) - - -#define LOAD_MSG_5_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m1, m3); \ -b1 = _mm_unpacklo_epi64(m0, m4); \ -} while(0) - - -#define LOAD_MSG_5_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m6, m5); \ -b1 = _mm_unpackhi_epi64(m5, m1); \ -} while(0) - - -#define LOAD_MSG_5_3(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m2, m3, 0xF0); \ -b1 = _mm_unpackhi_epi64(m7, m0); \ -} while(0) - - -#define LOAD_MSG_5_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m6, m2); \ -b1 = _mm_blend_epi16(m7, m4, 0xF0); \ -} while(0) - - -#define LOAD_MSG_6_1(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m6, m0, 0xF0); \ -b1 = _mm_unpacklo_epi64(m7, m2); \ -} while(0) - - -#define LOAD_MSG_6_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m2, m7); \ -b1 = _mm_alignr_epi8(m5, m6, 8); \ -} while(0) - - -#define LOAD_MSG_6_3(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m0, m3); \ -b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ -} while(0) - - -#define LOAD_MSG_6_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m3, m1); \ -b1 = _mm_blend_epi16(m1, m5, 0xF0); \ -} while(0) - - -#define LOAD_MSG_7_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m6, m3); \ -b1 = _mm_blend_epi16(m6, m1, 0xF0); \ -} while(0) - - -#define LOAD_MSG_7_2(b0, b1) \ -do \ -{ \ -b0 = _mm_alignr_epi8(m7, m5, 8); \ -b1 = _mm_unpackhi_epi64(m0, m4); \ -} while(0) - - -#define LOAD_MSG_7_3(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m2, m7); \ -b1 = _mm_unpacklo_epi64(m4, m1); \ -} while(0) - - -#define LOAD_MSG_7_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m0, m2); \ -b1 = _mm_unpacklo_epi64(m3, m5); \ -} while(0) - - -#define LOAD_MSG_8_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m3, m7); \ -b1 = _mm_alignr_epi8(m0, m5, 8); \ -} while(0) - - -#define LOAD_MSG_8_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m7, m4); \ -b1 = _mm_alignr_epi8(m4, m1, 8); \ -} while(0) - - -#define LOAD_MSG_8_3(b0, b1) \ -do \ -{ \ -b0 = m6; \ -b1 = _mm_alignr_epi8(m5, m0, 8); \ -} while(0) - - -#define LOAD_MSG_8_4(b0, b1) \ -do \ -{ \ -b0 = _mm_blend_epi16(m1, m3, 0xF0); \ -b1 = m2; \ -} while(0) - - -#define LOAD_MSG_9_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m5, m4); \ -b1 = _mm_unpackhi_epi64(m3, m0); \ -} while(0) - - -#define LOAD_MSG_9_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m1, m2); \ -b1 = _mm_blend_epi16(m3, m2, 0xF0); \ -} while(0) - - -#define LOAD_MSG_9_3(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m7, m4); \ -b1 = _mm_unpackhi_epi64(m1, m6); \ -} while(0) - - -#define LOAD_MSG_9_4(b0, b1) \ -do \ -{ \ -b0 = _mm_alignr_epi8(m7, m5, 8); \ -b1 = _mm_unpacklo_epi64(m6, m0); \ -} while(0) - - -#define LOAD_MSG_10_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m0, m1); \ -b1 = _mm_unpacklo_epi64(m2, m3); \ -} while(0) - - -#define LOAD_MSG_10_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m0, m1); \ -b1 = _mm_unpackhi_epi64(m2, m3); \ -} while(0) - - -#define LOAD_MSG_10_3(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m4, m5); \ -b1 = _mm_unpacklo_epi64(m6, m7); \ -} while(0) - - -#define LOAD_MSG_10_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpackhi_epi64(m4, m5); \ -b1 = _mm_unpackhi_epi64(m6, m7); \ -} while(0) - - -#define LOAD_MSG_11_1(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m7, m2); \ -b1 = _mm_unpackhi_epi64(m4, m6); \ -} while(0) - - -#define LOAD_MSG_11_2(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m5, m4); \ -b1 = _mm_alignr_epi8(m3, m7, 8); \ -} while(0) - - -#define LOAD_MSG_11_3(b0, b1) \ -do \ -{ \ -b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ -b1 = _mm_unpackhi_epi64(m5, m2); \ -} while(0) - - -#define LOAD_MSG_11_4(b0, b1) \ -do \ -{ \ -b0 = _mm_unpacklo_epi64(m6, m1); \ -b1 = _mm_unpackhi_epi64(m3, m1); \ -} while(0) - - -#endif - diff --git a/Modules/_blake2/impl/blake2b-ref.c b/Modules/_blake2/impl/blake2b-ref.c deleted file mode 100644 index e58c43659d9..00000000000 --- a/Modules/_blake2/impl/blake2b-ref.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - BLAKE2 reference source code package - reference C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -static const uint64_t blake2b_IV[8] = -{ - 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, - 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, - 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, - 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL -}; - -static const uint8_t blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - -static inline int blake2b_set_lastnode( blake2b_state *S ) -{ - S->f[1] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0ULL; - return 0; -} - -/* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_set_lastnode( S ); - - S->f[0] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0ULL; - return 0; -} - -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) -{ - S->t[0] += inc; - S->t[1] += ( S->t[0] < inc ); - return 0; -} - - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - store64( &P->node_offset, node_offset ); - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) -{ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; -} - -#if defined(__cplusplus) -extern "C" { -#endif - int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); -#if defined(__cplusplus) -} -#endif - -/* init xors IV with input parameter block */ -int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) -{ - blake2b_init0( S ); - uint8_t *p = ( uint8_t * )( P ); - - /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) - S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); - - S->outlen = P->digest_length; - return 0; -} - - - -int blake2b_init( blake2b_state *S, size_t outlen ) -{ - blake2b_param P[1]; - - if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - - P->digest_length = ( uint8_t ) outlen; - P->key_length = 0; - P->fanout = 1; - P->depth = 1; - store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); - P->node_depth = 0; - P->inner_length = 0; - memset( P->reserved, 0, sizeof( P->reserved ) ); - memset( P->salt, 0, sizeof( P->salt ) ); - memset( P->personal, 0, sizeof( P->personal ) ); - return blake2b_init_param( S, P ); -} - - -int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) -{ - blake2b_param P[1]; - - if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - - if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; - - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; - P->fanout = 1; - P->depth = 1; - store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); - P->node_depth = 0; - P->inner_length = 0; - memset( P->reserved, 0, sizeof( P->reserved ) ); - memset( P->salt, 0, sizeof( P->salt ) ); - memset( P->personal, 0, sizeof( P->personal ) ); - - if( blake2b_init_param( S, P ) < 0 ) return -1; - - { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset( block, 0, BLAKE2B_BLOCKBYTES ); - memcpy( block, key, keylen ); - blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); - secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ - } - return 0; -} - -static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) -{ - uint64_t m[16]; - uint64_t v[16]; - size_t i; - - for( i = 0; i < 16; ++i ) - m[i] = load64( block + i * sizeof( m[i] ) ); - - for( i = 0; i < 8; ++i ) - v[i] = S->h[i]; - - v[ 8] = blake2b_IV[0]; - v[ 9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = S->t[0] ^ blake2b_IV[4]; - v[13] = S->t[1] ^ blake2b_IV[5]; - v[14] = S->f[0] ^ blake2b_IV[6]; - v[15] = S->f[1] ^ blake2b_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) - ROUND( 0 ); - ROUND( 1 ); - ROUND( 2 ); - ROUND( 3 ); - ROUND( 4 ); - ROUND( 5 ); - ROUND( 6 ); - ROUND( 7 ); - ROUND( 8 ); - ROUND( 9 ); - ROUND( 10 ); - ROUND( 11 ); - - for( i = 0; i < 8; ++i ) - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - -#undef G -#undef ROUND - return 0; -} - - -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) -{ - while( inlen > 0 ) - { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - - if( inlen > fill ) - { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; - } - } - - return 0; -} - -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) -{ - uint8_t buffer[BLAKE2B_OUTBYTES]; - size_t i; - - if(S->outlen != outlen) return -1; - - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } - - blake2b_increment_counter( S, S->buflen ); - blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ - blake2b_compress( S, S->buf ); - - for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ - store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - - memcpy( out, buffer, outlen ); - return 0; -} - -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) -{ - blake2b_state S[1]; - - /* Verify parameters */ - if ( NULL == in && inlen > 0 ) return -1; - - if ( NULL == out ) return -1; - - if( NULL == key && keylen > 0 ) return -1; - - if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; - - if( keylen > BLAKE2B_KEYBYTES ) return -1; - - if( keylen > 0 ) - { - if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; - } - else - { - if( blake2b_init( S, outlen ) < 0 ) return -1; - } - - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0 ) return -1; - return blake2b_final( S, out, outlen ); -} - - diff --git a/Modules/_blake2/impl/blake2b-round.h b/Modules/_blake2/impl/blake2b-round.h deleted file mode 100644 index 5b452c4d63b..00000000000 --- a/Modules/_blake2/impl/blake2b-round.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2B_ROUND_H__ -#define __BLAKE2B_ROUND_H__ - -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) -#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) - -#define TOF(reg) _mm_castsi128_ps((reg)) -#define TOI(reg) _mm_castps_si128((reg)) - -#define LIKELY(x) __builtin_expect((x),1) - - -/* Microarchitecture-specific macros */ -#ifndef HAVE_XOP -#ifdef HAVE_SSSE3 -#define _mm_roti_epi64(x, c) \ - (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ - : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ - : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ - : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ - : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) -#else -#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) -#endif -#else -/* ... */ -#endif - - - -#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ - \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ - \ - row4l = _mm_roti_epi64(row4l, -32); \ - row4h = _mm_roti_epi64(row4h, -32); \ - \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ - \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ - \ - row2l = _mm_roti_epi64(row2l, -24); \ - row2h = _mm_roti_epi64(row2h, -24); \ - -#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ - \ - row4l = _mm_xor_si128(row4l, row1l); \ - row4h = _mm_xor_si128(row4h, row1h); \ - \ - row4l = _mm_roti_epi64(row4l, -16); \ - row4h = _mm_roti_epi64(row4h, -16); \ - \ - row3l = _mm_add_epi64(row3l, row4l); \ - row3h = _mm_add_epi64(row3h, row4h); \ - \ - row2l = _mm_xor_si128(row2l, row3l); \ - row2h = _mm_xor_si128(row2h, row3h); \ - \ - row2l = _mm_roti_epi64(row2l, -63); \ - row2h = _mm_roti_epi64(row2h, -63); \ - -#if defined(HAVE_SSSE3) -#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ - t0 = _mm_alignr_epi8(row2h, row2l, 8); \ - t1 = _mm_alignr_epi8(row2l, row2h, 8); \ - row2l = t0; \ - row2h = t1; \ - \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ - \ - t0 = _mm_alignr_epi8(row4h, row4l, 8); \ - t1 = _mm_alignr_epi8(row4l, row4h, 8); \ - row4l = t1; \ - row4h = t0; - -#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ - t0 = _mm_alignr_epi8(row2l, row2h, 8); \ - t1 = _mm_alignr_epi8(row2h, row2l, 8); \ - row2l = t0; \ - row2h = t1; \ - \ - t0 = row3l; \ - row3l = row3h; \ - row3h = t0; \ - \ - t0 = _mm_alignr_epi8(row4l, row4h, 8); \ - t1 = _mm_alignr_epi8(row4h, row4l, 8); \ - row4l = t1; \ - row4h = t0; -#else - -#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ - t0 = row4l;\ - t1 = row2l;\ - row4l = row3l;\ - row3l = row3h;\ - row3h = row4l;\ - row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ - row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ - row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ - row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) - -#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ - t0 = row3l;\ - row3l = row3h;\ - row3h = t0;\ - t0 = row2l;\ - t1 = row4l;\ - row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ - row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ - row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ - row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) - -#endif - -#if defined(HAVE_SSE4_1) -#include "blake2b-load-sse41.h" -#else -#include "blake2b-load-sse2.h" -#endif - -#define ROUND(r) \ - LOAD_MSG_ ##r ##_1(b0, b1); \ - G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ - LOAD_MSG_ ##r ##_2(b0, b1); \ - G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ - DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ - LOAD_MSG_ ##r ##_3(b0, b1); \ - G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ - LOAD_MSG_ ##r ##_4(b0, b1); \ - G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ - UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); - -#endif - diff --git a/Modules/_blake2/impl/blake2b.c b/Modules/_blake2/impl/blake2b.c deleted file mode 100644 index cef22838917..00000000000 --- a/Modules/_blake2/impl/blake2b.c +++ /dev/null @@ -1,436 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -#include "blake2-config.h" - -#if defined(_MSC_VER) -#include -#endif - -#if defined(HAVE_SSE2) -#include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) && !defined(__clang__) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - -#if defined(HAVE_SSSE3) -#include -#endif -#if defined(HAVE_SSE4_1) -#include -#endif -#if defined(HAVE_AVX) -#include -#endif -#if defined(HAVE_XOP) && !defined(_MSC_VER) -#include -#endif - - - -#include "blake2b-round.h" - -static const uint64_t blake2b_IV[8] = -{ - 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, - 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, - 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, - 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL -}; - -static const uint8_t blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastnode( blake2b_state *S ) -{ - S->f[1] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0ULL; - return 0; -} - -static inline int blake2b_set_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_set_lastnode( S ); - - S->f[0] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0ULL; - return 0; -} - - -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) -{ -#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) - // ADD/ADC chain - __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; - t += inc; - S->t[0] = ( uint64_t )( t >> 0 ); - S->t[1] = ( uint64_t )( t >> 64 ); -#else - S->t[0] += inc; - S->t[1] += ( S->t[0] < inc ); -#endif - return 0; -} - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - P->node_offset = node_offset; - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) -{ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; -} - - - -#if defined(__cplusplus) -extern "C" { -#endif - int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); -#if defined(__cplusplus) -} -#endif - -/* init xors IV with input parameter block */ -int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) -{ - uint8_t *p, *h, *v; - //blake2b_init0( S ); - v = ( uint8_t * )( blake2b_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); - /* IV XOR ParamBlock */ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; - - S->outlen = P->digest_length; - return 0; -} - - -/* Some sort of default parameter block initialization, for sequential blake2b */ - -int blake2b_init( blake2b_state *S, size_t outlen ) -{ - if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - - const blake2b_param P = - { - ( uint8_t ) outlen, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - return blake2b_init_param( S, &P ); -} - -int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) -{ - if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - - if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; - - const blake2b_param P = - { - ( uint8_t ) outlen, - ( uint8_t ) keylen, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - - if( blake2b_init_param( S, &P ) < 0 ) - return 0; - - { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset( block, 0, BLAKE2B_BLOCKBYTES ); - memcpy( block, key, keylen ); - blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); - secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ - } - return 0; -} - -static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) -{ - __m128i row1l, row1h; - __m128i row2l, row2h; - __m128i row3l, row3h; - __m128i row4l, row4h; - __m128i b0, b1; - __m128i t0, t1; -#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) - const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); - const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); -#endif -#if defined(HAVE_SSE4_1) - const __m128i m0 = LOADU( block + 00 ); - const __m128i m1 = LOADU( block + 16 ); - const __m128i m2 = LOADU( block + 32 ); - const __m128i m3 = LOADU( block + 48 ); - const __m128i m4 = LOADU( block + 64 ); - const __m128i m5 = LOADU( block + 80 ); - const __m128i m6 = LOADU( block + 96 ); - const __m128i m7 = LOADU( block + 112 ); -#else - const uint64_t m0 = ( ( uint64_t * )block )[ 0]; - const uint64_t m1 = ( ( uint64_t * )block )[ 1]; - const uint64_t m2 = ( ( uint64_t * )block )[ 2]; - const uint64_t m3 = ( ( uint64_t * )block )[ 3]; - const uint64_t m4 = ( ( uint64_t * )block )[ 4]; - const uint64_t m5 = ( ( uint64_t * )block )[ 5]; - const uint64_t m6 = ( ( uint64_t * )block )[ 6]; - const uint64_t m7 = ( ( uint64_t * )block )[ 7]; - const uint64_t m8 = ( ( uint64_t * )block )[ 8]; - const uint64_t m9 = ( ( uint64_t * )block )[ 9]; - const uint64_t m10 = ( ( uint64_t * )block )[10]; - const uint64_t m11 = ( ( uint64_t * )block )[11]; - const uint64_t m12 = ( ( uint64_t * )block )[12]; - const uint64_t m13 = ( ( uint64_t * )block )[13]; - const uint64_t m14 = ( ( uint64_t * )block )[14]; - const uint64_t m15 = ( ( uint64_t * )block )[15]; -#endif - row1l = LOADU( &S->h[0] ); - row1h = LOADU( &S->h[2] ); - row2l = LOADU( &S->h[4] ); - row2h = LOADU( &S->h[6] ); - row3l = LOADU( &blake2b_IV[0] ); - row3h = LOADU( &blake2b_IV[2] ); - row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) ); - row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) ); - ROUND( 0 ); - ROUND( 1 ); - ROUND( 2 ); - ROUND( 3 ); - ROUND( 4 ); - ROUND( 5 ); - ROUND( 6 ); - ROUND( 7 ); - ROUND( 8 ); - ROUND( 9 ); - ROUND( 10 ); - ROUND( 11 ); - row1l = _mm_xor_si128( row3l, row1l ); - row1h = _mm_xor_si128( row3h, row1h ); - STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) ); - STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) ); - row2l = _mm_xor_si128( row4l, row2l ); - row2h = _mm_xor_si128( row4h, row2h ); - STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); - STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); - return 0; -} - - -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) -{ - while( inlen > 0 ) - { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - - if( inlen > fill ) - { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; - } - } - - return 0; -} - - -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) -{ - if(S->outlen != outlen) return -1; - - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } - - blake2b_increment_counter( S, S->buflen ); - blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ - blake2b_compress( S, S->buf ); - memcpy( out, &S->h[0], outlen ); - return 0; -} - - -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) -{ - blake2b_state S[1]; - - /* Verify parameters */ - if ( NULL == in && inlen > 0 ) return -1; - - if ( NULL == out ) return -1; - - if( NULL == key && keylen > 0 ) return -1; - - if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; - - if( keylen > BLAKE2B_KEYBYTES ) return -1; - - if( keylen ) - { - if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; - } - else - { - if( blake2b_init( S, outlen ) < 0 ) return -1; - } - - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2b_final( S, out, outlen ); -} - -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); -} -#endif diff --git a/Modules/_blake2/impl/blake2s-load-sse2.h b/Modules/_blake2/impl/blake2s-load-sse2.h deleted file mode 100644 index b24483cf931..00000000000 --- a/Modules/_blake2/impl/blake2s-load-sse2.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE2_H__ -#define __BLAKE2S_LOAD_SSE2_H__ - -#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) -#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) -#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8) -#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9) -#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14) -#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10) -#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1) -#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12) -#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11) -#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8) -#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10) -#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14) -#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7) -#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9) -#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2) -#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6) -#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9) -#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0) -#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14) -#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1) -#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2) -#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12) -#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4) -#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13) -#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12) -#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5) -#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0) -#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7) -#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13) -#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11) -#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5) -#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0) -#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6) -#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15) -#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12) -#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2) -#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10) -#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2) -#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15) -#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11) - - -#endif diff --git a/Modules/_blake2/impl/blake2s-load-sse41.h b/Modules/_blake2/impl/blake2s-load-sse41.h deleted file mode 100644 index 3ac12eb6f5d..00000000000 --- a/Modules/_blake2/impl/blake2s-load-sse41.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE41_H__ -#define __BLAKE2S_LOAD_SSE41_H__ - -#define LOAD_MSG_0_1(buf) \ -buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); - -#define LOAD_MSG_0_2(buf) \ -buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1))); - -#define LOAD_MSG_0_3(buf) \ -buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0))); - -#define LOAD_MSG_0_4(buf) \ -buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1))); - -#define LOAD_MSG_1_1(buf) \ -t0 = _mm_blend_epi16(m1, m2, 0x0C); \ -t1 = _mm_slli_si128(m3, 4); \ -t2 = _mm_blend_epi16(t0, t1, 0xF0); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3)); - -#define LOAD_MSG_1_2(buf) \ -t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \ -t1 = _mm_blend_epi16(m1,m3,0xC0); \ -t2 = _mm_blend_epi16(t0, t1, 0xF0); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); - -#define LOAD_MSG_1_3(buf) \ -t0 = _mm_slli_si128(m1, 4); \ -t1 = _mm_blend_epi16(m2, t0, 0x30); \ -t2 = _mm_blend_epi16(m0, t1, 0xF0); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); - -#define LOAD_MSG_1_4(buf) \ -t0 = _mm_unpackhi_epi32(m0,m1); \ -t1 = _mm_slli_si128(m3, 4); \ -t2 = _mm_blend_epi16(t0, t1, 0x0C); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1)); - -#define LOAD_MSG_2_1(buf) \ -t0 = _mm_unpackhi_epi32(m2,m3); \ -t1 = _mm_blend_epi16(m3,m1,0x0C); \ -t2 = _mm_blend_epi16(t0, t1, 0x0F); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); - -#define LOAD_MSG_2_2(buf) \ -t0 = _mm_unpacklo_epi32(m2,m0); \ -t1 = _mm_blend_epi16(t0, m0, 0xF0); \ -t2 = _mm_slli_si128(m3, 8); \ -buf = _mm_blend_epi16(t1, t2, 0xC0); - -#define LOAD_MSG_2_3(buf) \ -t0 = _mm_blend_epi16(m0, m2, 0x3C); \ -t1 = _mm_srli_si128(m1, 12); \ -t2 = _mm_blend_epi16(t0,t1,0x03); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2)); - -#define LOAD_MSG_2_4(buf) \ -t0 = _mm_slli_si128(m3, 4); \ -t1 = _mm_blend_epi16(m0, m1, 0x33); \ -t2 = _mm_blend_epi16(t1, t0, 0xC0); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3)); - -#define LOAD_MSG_3_1(buf) \ -t0 = _mm_unpackhi_epi32(m0,m1); \ -t1 = _mm_unpackhi_epi32(t0, m2); \ -t2 = _mm_blend_epi16(t1, m3, 0x0C); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2)); - -#define LOAD_MSG_3_2(buf) \ -t0 = _mm_slli_si128(m2, 8); \ -t1 = _mm_blend_epi16(m3,m0,0x0C); \ -t2 = _mm_blend_epi16(t1, t0, 0xC0); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); - -#define LOAD_MSG_3_3(buf) \ -t0 = _mm_blend_epi16(m0,m1,0x0F); \ -t1 = _mm_blend_epi16(t0, m3, 0xC0); \ -buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); - -#define LOAD_MSG_3_4(buf) \ -t0 = _mm_unpacklo_epi32(m0,m2); \ -t1 = _mm_unpackhi_epi32(m1,m2); \ -buf = _mm_unpacklo_epi64(t1,t0); - -#define LOAD_MSG_4_1(buf) \ -t0 = _mm_unpacklo_epi64(m1,m2); \ -t1 = _mm_unpackhi_epi64(m0,m2); \ -t2 = _mm_blend_epi16(t0,t1,0x33); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3)); - -#define LOAD_MSG_4_2(buf) \ -t0 = _mm_unpackhi_epi64(m1,m3); \ -t1 = _mm_unpacklo_epi64(m0,m1); \ -buf = _mm_blend_epi16(t0,t1,0x33); - -#define LOAD_MSG_4_3(buf) \ -t0 = _mm_unpackhi_epi64(m3,m1); \ -t1 = _mm_unpackhi_epi64(m2,m0); \ -buf = _mm_blend_epi16(t1,t0,0x33); - -#define LOAD_MSG_4_4(buf) \ -t0 = _mm_blend_epi16(m0,m2,0x03); \ -t1 = _mm_slli_si128(t0, 8); \ -t2 = _mm_blend_epi16(t1,m3,0x0F); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3)); - -#define LOAD_MSG_5_1(buf) \ -t0 = _mm_unpackhi_epi32(m0,m1); \ -t1 = _mm_unpacklo_epi32(m0,m2); \ -buf = _mm_unpacklo_epi64(t0,t1); - -#define LOAD_MSG_5_2(buf) \ -t0 = _mm_srli_si128(m2, 4); \ -t1 = _mm_blend_epi16(m0,m3,0x03); \ -buf = _mm_blend_epi16(t1,t0,0x3C); - -#define LOAD_MSG_5_3(buf) \ -t0 = _mm_blend_epi16(m1,m0,0x0C); \ -t1 = _mm_srli_si128(m3, 4); \ -t2 = _mm_blend_epi16(t0,t1,0x30); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0)); - -#define LOAD_MSG_5_4(buf) \ -t0 = _mm_unpacklo_epi64(m1,m2); \ -t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \ -buf = _mm_blend_epi16(t0,t1,0x33); - -#define LOAD_MSG_6_1(buf) \ -t0 = _mm_slli_si128(m1, 12); \ -t1 = _mm_blend_epi16(m0,m3,0x33); \ -buf = _mm_blend_epi16(t1,t0,0xC0); - -#define LOAD_MSG_6_2(buf) \ -t0 = _mm_blend_epi16(m3,m2,0x30); \ -t1 = _mm_srli_si128(m1, 4); \ -t2 = _mm_blend_epi16(t0,t1,0x03); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0)); - -#define LOAD_MSG_6_3(buf) \ -t0 = _mm_unpacklo_epi64(m0,m2); \ -t1 = _mm_srli_si128(m1, 4); \ -buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0)); - -#define LOAD_MSG_6_4(buf) \ -t0 = _mm_unpackhi_epi32(m1,m2); \ -t1 = _mm_unpackhi_epi64(m0,t0); \ -buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2)); - -#define LOAD_MSG_7_1(buf) \ -t0 = _mm_unpackhi_epi32(m0,m1); \ -t1 = _mm_blend_epi16(t0,m3,0x0F); \ -buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1)); - -#define LOAD_MSG_7_2(buf) \ -t0 = _mm_blend_epi16(m2,m3,0x30); \ -t1 = _mm_srli_si128(m0,4); \ -t2 = _mm_blend_epi16(t0,t1,0x03); \ -buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3)); - -#define LOAD_MSG_7_3(buf) \ -t0 = _mm_unpackhi_epi64(m0,m3); \ -t1 = _mm_unpacklo_epi64(m1,m2); \ -t2 = _mm_blend_epi16(t0,t1,0x3C); \ -buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1)); - -#define LOAD_MSG_7_4(buf) \ -t0 = _mm_unpacklo_epi32(m0,m1); \ -t1 = _mm_unpackhi_epi32(m1,m2); \ -buf = _mm_unpacklo_epi64(t0,t1); - -#define LOAD_MSG_8_1(buf) \ -t0 = _mm_unpackhi_epi32(m1,m3); \ -t1 = _mm_unpacklo_epi64(t0,m0); \ -t2 = _mm_blend_epi16(t1,m2,0xC0); \ -buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2)); - -#define LOAD_MSG_8_2(buf) \ -t0 = _mm_unpackhi_epi32(m0,m3); \ -t1 = _mm_blend_epi16(m2,t0,0xF0); \ -buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3)); - -#define LOAD_MSG_8_3(buf) \ -t0 = _mm_blend_epi16(m2,m0,0x0C); \ -t1 = _mm_slli_si128(t0,4); \ -buf = _mm_blend_epi16(t1,m3,0x0F); - -#define LOAD_MSG_8_4(buf) \ -t0 = _mm_blend_epi16(m1,m0,0x30); \ -buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2)); - -#define LOAD_MSG_9_1(buf) \ -t0 = _mm_blend_epi16(m0,m2,0x03); \ -t1 = _mm_blend_epi16(m1,m2,0x30); \ -t2 = _mm_blend_epi16(t1,t0,0x0F); \ -buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2)); - -#define LOAD_MSG_9_2(buf) \ -t0 = _mm_slli_si128(m0,4); \ -t1 = _mm_blend_epi16(m1,t0,0xC0); \ -buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3)); - -#define LOAD_MSG_9_3(buf) \ -t0 = _mm_unpackhi_epi32(m0,m3); \ -t1 = _mm_unpacklo_epi32(m2,m3); \ -t2 = _mm_unpackhi_epi64(t0,t1); \ -buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1)); - -#define LOAD_MSG_9_4(buf) \ -t0 = _mm_blend_epi16(m3,m2,0xC0); \ -t1 = _mm_unpacklo_epi32(m0,m3); \ -t2 = _mm_blend_epi16(t0,t1,0x0F); \ -buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); - -#endif - diff --git a/Modules/_blake2/impl/blake2s-load-xop.h b/Modules/_blake2/impl/blake2s-load-xop.h deleted file mode 100644 index 14d9e7f7640..00000000000 --- a/Modules/_blake2/impl/blake2s-load-xop.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2S_LOAD_XOP_H__ -#define __BLAKE2S_LOAD_XOP_H__ - -#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB - -/* Basic VPPERM emulation, for testing purposes */ -/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) -{ - const __m128i sixteen = _mm_set1_epi8(16); - const __m128i t0 = _mm_shuffle_epi8(src1, sel); - const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); - const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), - _mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00 - return _mm_blendv_epi8(t0, s1, mask); -}*/ - -#define LOAD_MSG_0_1(buf) \ -buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); - -#define LOAD_MSG_0_2(buf) \ -buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); - -#define LOAD_MSG_0_3(buf) \ -buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); - -#define LOAD_MSG_0_4(buf) \ -buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) ); - -#define LOAD_MSG_1_1(buf) \ -t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); - -#define LOAD_MSG_1_2(buf) \ -t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); - -#define LOAD_MSG_1_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); - -#define LOAD_MSG_1_4(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); - -#define LOAD_MSG_2_1(buf) \ -t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) ); - -#define LOAD_MSG_2_2(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_2_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) ); - -#define LOAD_MSG_2_4(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); - -#define LOAD_MSG_3_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \ -t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) ); - -#define LOAD_MSG_3_2(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) ); - -#define LOAD_MSG_3_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_3_4(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ -buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) ); - -#define LOAD_MSG_4_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) ); - -#define LOAD_MSG_4_2(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_4_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \ -t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) ); - -#define LOAD_MSG_4_4(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) ); - -#define LOAD_MSG_5_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_5_2(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) ); - -#define LOAD_MSG_5_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); - -#define LOAD_MSG_5_4(buf) \ -t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) ); - -#define LOAD_MSG_6_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) ); - -#define LOAD_MSG_6_2(buf) \ -t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) ); - -#define LOAD_MSG_6_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) ); - -#define LOAD_MSG_6_4(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \ -buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_7_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) ); - -#define LOAD_MSG_7_2(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); - -#define LOAD_MSG_7_3(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \ -t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); - -#define LOAD_MSG_7_4(buf) \ -t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) ); - -#define LOAD_MSG_8_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \ -t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) ); - -#define LOAD_MSG_8_2(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); - -#define LOAD_MSG_8_3(buf) \ -t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ - -#define LOAD_MSG_8_4(buf) \ -buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); - -#define LOAD_MSG_9_1(buf) \ -t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) ); - -#define LOAD_MSG_9_2(buf) \ -buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) ); - -#define LOAD_MSG_9_3(buf) \ -t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \ -buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) ); - -#define LOAD_MSG_9_4(buf) \ -t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ -buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); - -#endif - diff --git a/Modules/_blake2/impl/blake2s-ref.c b/Modules/_blake2/impl/blake2s-ref.c deleted file mode 100644 index ab86cc1b34e..00000000000 --- a/Modules/_blake2/impl/blake2s-ref.c +++ /dev/null @@ -1,368 +0,0 @@ -/* - BLAKE2 reference source code package - reference C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -static const uint32_t blake2s_IV[8] = -{ - 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, - 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL -}; - -static const uint8_t blake2s_sigma[10][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - -static inline int blake2s_set_lastnode( blake2s_state *S ) -{ - S->f[1] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0U; - return 0; -} - -/* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_set_lastnode( S ); - - S->f[0] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0U; - return 0; -} - -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) -{ - S->t[0] += inc; - S->t[1] += ( S->t[0] < inc ); - return 0; -} - -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) -{ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; -} - -#if defined(__cplusplus) -extern "C" { -#endif - int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); -#if defined(__cplusplus) -} -#endif - -/* init2 xors IV with input parameter block */ -int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) -{ - blake2s_init0( S ); - uint32_t *p = ( uint32_t * )( P ); - - /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) - S->h[i] ^= load32( &p[i] ); - - S->outlen = P->digest_length; - return 0; -} - - -// Sequential blake2s initialization -int blake2s_init( blake2s_state *S, size_t outlen ) -{ - blake2s_param P[1]; - - /* Move interval verification here? */ - if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - - P->digest_length = ( uint8_t) outlen; - P->key_length = 0; - P->fanout = 1; - P->depth = 1; - store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); - P->node_depth = 0; - P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); - memset( P->salt, 0, sizeof( P->salt ) ); - memset( P->personal, 0, sizeof( P->personal ) ); - return blake2s_init_param( S, P ); -} - -int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) -{ - blake2s_param P[1]; - - if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - - if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; - - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; - P->fanout = 1; - P->depth = 1; - store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); - P->node_depth = 0; - P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); - memset( P->salt, 0, sizeof( P->salt ) ); - memset( P->personal, 0, sizeof( P->personal ) ); - - if( blake2s_init_param( S, P ) < 0 ) return -1; - - { - uint8_t block[BLAKE2S_BLOCKBYTES]; - memset( block, 0, BLAKE2S_BLOCKBYTES ); - memcpy( block, key, keylen ); - blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); - secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ - } - return 0; -} - -static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) -{ - uint32_t m[16]; - uint32_t v[16]; - - for( size_t i = 0; i < 16; ++i ) - m[i] = load32( block + i * sizeof( m[i] ) ); - - for( size_t i = 0; i < 8; ++i ) - v[i] = S->h[i]; - - v[ 8] = blake2s_IV[0]; - v[ 9] = blake2s_IV[1]; - v[10] = blake2s_IV[2]; - v[11] = blake2s_IV[3]; - v[12] = S->t[0] ^ blake2s_IV[4]; - v[13] = S->t[1] ^ blake2s_IV[5]; - v[14] = S->f[0] ^ blake2s_IV[6]; - v[15] = S->f[1] ^ blake2s_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = rotr32(d ^ a, 16); \ - c = c + d; \ - b = rotr32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = rotr32(d ^ a, 8); \ - c = c + d; \ - b = rotr32(b ^ c, 7); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) - ROUND( 0 ); - ROUND( 1 ); - ROUND( 2 ); - ROUND( 3 ); - ROUND( 4 ); - ROUND( 5 ); - ROUND( 6 ); - ROUND( 7 ); - ROUND( 8 ); - ROUND( 9 ); - - for( size_t i = 0; i < 8; ++i ) - S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; - -#undef G -#undef ROUND - return 0; -} - - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) -{ - while( inlen > 0 ) - { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - - if( inlen > fill ) - { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; - } - } - - return 0; -} - -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) -{ - uint8_t buffer[BLAKE2S_OUTBYTES]; - size_t i; - - if(S->outlen != outlen) return -1; - - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memmove( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } - - blake2s_increment_counter( S, ( uint32_t )S->buflen ); - blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ - blake2s_compress( S, S->buf ); - - for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ - store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - - memcpy( out, buffer, outlen ); - return 0; -} - -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) -{ - blake2s_state S[1]; - - /* Verify parameters */ - if ( NULL == in && inlen > 0 ) return -1; - - if ( NULL == out ) return -1; - - if ( NULL == key && keylen > 0 ) return -1; - - if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; - - if( keylen > BLAKE2S_KEYBYTES ) return -1; - - if( keylen > 0 ) - { - if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; - } - else - { - if( blake2s_init( S, outlen ) < 0 ) return -1; - } - - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); -} - diff --git a/Modules/_blake2/impl/blake2s-round.h b/Modules/_blake2/impl/blake2s-round.h deleted file mode 100644 index 3af4be35bee..00000000000 --- a/Modules/_blake2/impl/blake2s-round.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ -#pragma once -#ifndef __BLAKE2S_ROUND_H__ -#define __BLAKE2S_ROUND_H__ - -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) -#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) - -#define TOF(reg) _mm_castsi128_ps((reg)) -#define TOI(reg) _mm_castps_si128((reg)) - -#define LIKELY(x) __builtin_expect((x),1) - - -/* Microarchitecture-specific macros */ -#ifndef HAVE_XOP -#ifdef HAVE_SSSE3 -#define _mm_roti_epi32(r, c) ( \ - (8==-(c)) ? _mm_shuffle_epi8(r,r8) \ - : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \ - : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) ) -#else -#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) -#endif -#else -/* ... */ -#endif - - -#define G1(row1,row2,row3,row4,buf) \ - row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ - row4 = _mm_xor_si128( row4, row1 ); \ - row4 = _mm_roti_epi32(row4, -16); \ - row3 = _mm_add_epi32( row3, row4 ); \ - row2 = _mm_xor_si128( row2, row3 ); \ - row2 = _mm_roti_epi32(row2, -12); - -#define G2(row1,row2,row3,row4,buf) \ - row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \ - row4 = _mm_xor_si128( row4, row1 ); \ - row4 = _mm_roti_epi32(row4, -8); \ - row3 = _mm_add_epi32( row3, row4 ); \ - row2 = _mm_xor_si128( row2, row3 ); \ - row2 = _mm_roti_epi32(row2, -7); - -#define DIAGONALIZE(row1,row2,row3,row4) \ - row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \ - row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ - row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) ); - -#define UNDIAGONALIZE(row1,row2,row3,row4) \ - row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \ - row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \ - row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) ); - -#if defined(HAVE_XOP) -#include "blake2s-load-xop.h" -#elif defined(HAVE_SSE4_1) -#include "blake2s-load-sse41.h" -#else -#include "blake2s-load-sse2.h" -#endif - -#define ROUND(r) \ - LOAD_MSG_ ##r ##_1(buf1); \ - G1(row1,row2,row3,row4,buf1); \ - LOAD_MSG_ ##r ##_2(buf2); \ - G2(row1,row2,row3,row4,buf2); \ - DIAGONALIZE(row1,row2,row3,row4); \ - LOAD_MSG_ ##r ##_3(buf3); \ - G1(row1,row2,row3,row4,buf3); \ - LOAD_MSG_ ##r ##_4(buf4); \ - G2(row1,row2,row3,row4,buf4); \ - UNDIAGONALIZE(row1,row2,row3,row4); \ - -#endif - diff --git a/Modules/_blake2/impl/blake2s.c b/Modules/_blake2/impl/blake2s.c deleted file mode 100644 index e7f63fd274f..00000000000 --- a/Modules/_blake2/impl/blake2s.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - BLAKE2 reference source code package - optimized C implementations - - Written in 2012 by Samuel Neves - - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . -*/ - -#include -#include -#include - -#include "blake2.h" -#include "blake2-impl.h" - -#include "blake2-config.h" - -#if defined(_MSC_VER) -#include -#endif - -#if defined(HAVE_SSE2) -#include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) && !defined(__clang__) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - - -#if defined(HAVE_SSSE3) -#include -#endif -#if defined(HAVE_SSE4_1) -#include -#endif -#if defined(HAVE_AVX) -#include -#endif -#if defined(HAVE_XOP) && !defined(_MSC_VER) -#include -#endif - -#include "blake2s-round.h" - -static const uint32_t blake2s_IV[8] = -{ - 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, - 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL -}; - -static const uint8_t blake2s_sigma[10][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastnode( blake2s_state *S ) -{ - S->f[1] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0U; - return 0; -} - -static inline int blake2s_set_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_set_lastnode( S ); - - S->f[0] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0U; - return 0; -} - -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) -{ - uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; - t += inc; - S->t[0] = ( uint32_t )( t >> 0 ); - S->t[1] = ( uint32_t )( t >> 32 ); - return 0; -} - - -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) -{ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; -} - -#if defined(__cplusplus) -extern "C" { -#endif - int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); -#if defined(__cplusplus) -} -#endif - - -/* init2 xors IV with input parameter block */ -int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) -{ - uint8_t *p, *h, *v; - //blake2s_init0( S ); - v = ( uint8_t * )( blake2s_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); - /* IV XOR ParamBlock */ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; - - S->outlen = P->digest_length; - return 0; -} - - -/* Some sort of default parameter block initialization, for sequential blake2s */ -int blake2s_init( blake2s_state *S, size_t outlen ) -{ - if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - - const blake2s_param P = - { - outlen, - 0, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - return blake2s_init_param( S, &P ); -} - - -int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) -{ - if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - - if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; - - const blake2s_param P = - { - outlen, - keylen, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - - if( blake2s_init_param( S, &P ) < 0 ) - return -1; - - { - uint8_t block[BLAKE2S_BLOCKBYTES]; - memset( block, 0, BLAKE2S_BLOCKBYTES ); - memcpy( block, key, keylen ); - blake2s_update( S, block, BLAKE2S_BLOCKBYTES ); - secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ - } - return 0; -} - - -static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) -{ - __m128i row1, row2, row3, row4; - __m128i buf1, buf2, buf3, buf4; -#if defined(HAVE_SSE4_1) - __m128i t0, t1; -#if !defined(HAVE_XOP) - __m128i t2; -#endif -#endif - __m128i ff0, ff1; -#if defined(HAVE_SSSE3) && !defined(HAVE_XOP) - const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); - const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); -#endif -#if defined(HAVE_SSE4_1) - const __m128i m0 = LOADU( block + 00 ); - const __m128i m1 = LOADU( block + 16 ); - const __m128i m2 = LOADU( block + 32 ); - const __m128i m3 = LOADU( block + 48 ); -#else - const uint32_t m0 = ( ( uint32_t * )block )[ 0]; - const uint32_t m1 = ( ( uint32_t * )block )[ 1]; - const uint32_t m2 = ( ( uint32_t * )block )[ 2]; - const uint32_t m3 = ( ( uint32_t * )block )[ 3]; - const uint32_t m4 = ( ( uint32_t * )block )[ 4]; - const uint32_t m5 = ( ( uint32_t * )block )[ 5]; - const uint32_t m6 = ( ( uint32_t * )block )[ 6]; - const uint32_t m7 = ( ( uint32_t * )block )[ 7]; - const uint32_t m8 = ( ( uint32_t * )block )[ 8]; - const uint32_t m9 = ( ( uint32_t * )block )[ 9]; - const uint32_t m10 = ( ( uint32_t * )block )[10]; - const uint32_t m11 = ( ( uint32_t * )block )[11]; - const uint32_t m12 = ( ( uint32_t * )block )[12]; - const uint32_t m13 = ( ( uint32_t * )block )[13]; - const uint32_t m14 = ( ( uint32_t * )block )[14]; - const uint32_t m15 = ( ( uint32_t * )block )[15]; -#endif - row1 = ff0 = LOADU( &S->h[0] ); - row2 = ff1 = LOADU( &S->h[4] ); - row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); - row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); - ROUND( 0 ); - ROUND( 1 ); - ROUND( 2 ); - ROUND( 3 ); - ROUND( 4 ); - ROUND( 5 ); - ROUND( 6 ); - ROUND( 7 ); - ROUND( 8 ); - ROUND( 9 ); - STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); - STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); - return 0; -} - - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) -{ - while( inlen > 0 ) - { - size_t left = S->buflen; - size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - - if( inlen > fill ) - { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; - } - } - - return 0; -} - - -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) -{ - uint8_t buffer[BLAKE2S_OUTBYTES]; - - if(outlen != S->outlen ) return -1; - - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memmove( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } - - blake2s_increment_counter( S, ( uint32_t )S->buflen ); - blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ - blake2s_compress( S, S->buf ); - - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ - store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - - memcpy( out, buffer, outlen ); - return 0; -} - -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) -{ - blake2s_state S[1]; - - /* Verify parameters */ - if ( NULL == in && inlen > 0 ) return -1; - - if ( NULL == out ) return -1; - - if ( NULL == key && keylen > 0) return -1; - - if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; - - if( keylen > BLAKE2S_KEYBYTES ) return -1; - - if( keylen > 0 ) - { - if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1; - } - else - { - if( blake2s_init( S, outlen ) < 0 ) return -1; - } - - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); -} - -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 ); -} -#endif - diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c new file mode 100644 index 00000000000..e13f16fd971 --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -0,0 +1,1493 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "internal/Hacl_Hash_Blake2b.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "lib_memzero0.h" + +static void +update_block( + uint64_t *wv, + uint64_t *hash, + bool flag, + bool last_node, + FStar_UInt128_uint128 totlen, + uint8_t *d +) +{ + uint64_t m_w[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + 0U, + 16U, + 1U, + uint64_t *os = m_w; + uint8_t *bj = d + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t mask[4U] = { 0U }; + uint64_t wv_14; + if (flag) + { + wv_14 = 0xFFFFFFFFFFFFFFFFULL; + } + else + { + wv_14 = 0ULL; + } + uint64_t wv_15; + if (last_node) + { + wv_15 = 0xFFFFFFFFFFFFFFFFULL; + } + else + { + wv_15 = 0ULL; + } + mask[0U] = FStar_UInt128_uint128_to_uint64(totlen); + mask[1U] = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)); + mask[2U] = wv_14; + mask[3U] = wv_15; + memcpy(wv, hash, 16U * sizeof (uint64_t)); + uint64_t *wv3 = wv + 12U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv3; + uint64_t x = wv3[i] ^ mask[i]; + os[i] = x;); + KRML_MAYBE_FOR12(i0, + 0U, + 12U, + 1U, + uint32_t start_idx = i0 % 10U * 16U; + uint64_t m_st[16U] = { 0U }; + uint64_t *r0 = m_st; + uint64_t *r1 = m_st + 4U; + uint64_t *r20 = m_st + 8U; + uint64_t *r30 = m_st + 12U; + uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U]; + uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U]; + uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U]; + uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U]; + uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U]; + uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U]; + uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U]; + uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U]; + uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U]; + uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U]; + uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U]; + uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U]; + uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U]; + uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U]; + uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U]; + uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U]; + uint64_t uu____0 = m_w[s2]; + uint64_t uu____1 = m_w[s4]; + uint64_t uu____2 = m_w[s6]; + r0[0U] = m_w[s0]; + r0[1U] = uu____0; + r0[2U] = uu____1; + r0[3U] = uu____2; + uint64_t uu____3 = m_w[s3]; + uint64_t uu____4 = m_w[s5]; + uint64_t uu____5 = m_w[s7]; + r1[0U] = m_w[s1]; + r1[1U] = uu____3; + r1[2U] = uu____4; + r1[3U] = uu____5; + uint64_t uu____6 = m_w[s10]; + uint64_t uu____7 = m_w[s12]; + uint64_t uu____8 = m_w[s14]; + r20[0U] = m_w[s8]; + r20[1U] = uu____6; + r20[2U] = uu____7; + r20[3U] = uu____8; + uint64_t uu____9 = m_w[s11]; + uint64_t uu____10 = m_w[s13]; + uint64_t uu____11 = m_w[s15]; + r30[0U] = m_w[s9]; + r30[1U] = uu____9; + r30[2U] = uu____10; + r30[3U] = uu____11; + uint64_t *x = m_st; + uint64_t *y = m_st + 4U; + uint64_t *z = m_st + 8U; + uint64_t *w = m_st + 12U; + uint32_t a = 0U; + uint32_t b0 = 1U; + uint32_t c0 = 2U; + uint32_t d10 = 3U; + uint64_t *wv_a0 = wv + a * 4U; + uint64_t *wv_b0 = wv + b0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a0; + uint64_t x1 = wv_a0[i] + wv_b0[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a0; + uint64_t x1 = wv_a0[i] + x[i]; + os[i] = x1;); + uint64_t *wv_a1 = wv + d10 * 4U; + uint64_t *wv_b1 = wv + a * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a1; + uint64_t x1 = wv_a1[i] ^ wv_b1[i]; + os[i] = x1;); + uint64_t *r10 = wv_a1; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r10; + uint64_t x1 = r10[i]; + uint64_t x10 = x1 >> 32U | x1 << 32U; + os[i] = x10;); + uint64_t *wv_a2 = wv + c0 * 4U; + uint64_t *wv_b2 = wv + d10 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a2; + uint64_t x1 = wv_a2[i] + wv_b2[i]; + os[i] = x1;); + uint64_t *wv_a3 = wv + b0 * 4U; + uint64_t *wv_b3 = wv + c0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a3; + uint64_t x1 = wv_a3[i] ^ wv_b3[i]; + os[i] = x1;); + uint64_t *r12 = wv_a3; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r12; + uint64_t x1 = r12[i]; + uint64_t x10 = x1 >> 24U | x1 << 40U; + os[i] = x10;); + uint64_t *wv_a4 = wv + a * 4U; + uint64_t *wv_b4 = wv + b0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a4; + uint64_t x1 = wv_a4[i] + wv_b4[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a4; + uint64_t x1 = wv_a4[i] + y[i]; + os[i] = x1;); + uint64_t *wv_a5 = wv + d10 * 4U; + uint64_t *wv_b5 = wv + a * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a5; + uint64_t x1 = wv_a5[i] ^ wv_b5[i]; + os[i] = x1;); + uint64_t *r13 = wv_a5; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r13; + uint64_t x1 = r13[i]; + uint64_t x10 = x1 >> 16U | x1 << 48U; + os[i] = x10;); + uint64_t *wv_a6 = wv + c0 * 4U; + uint64_t *wv_b6 = wv + d10 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a6; + uint64_t x1 = wv_a6[i] + wv_b6[i]; + os[i] = x1;); + uint64_t *wv_a7 = wv + b0 * 4U; + uint64_t *wv_b7 = wv + c0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a7; + uint64_t x1 = wv_a7[i] ^ wv_b7[i]; + os[i] = x1;); + uint64_t *r14 = wv_a7; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r14; + uint64_t x1 = r14[i]; + uint64_t x10 = x1 >> 63U | x1 << 1U; + os[i] = x10;); + uint64_t *r15 = wv + 4U; + uint64_t *r21 = wv + 8U; + uint64_t *r31 = wv + 12U; + uint64_t *r110 = r15; + uint64_t x00 = r110[1U]; + uint64_t x10 = r110[2U]; + uint64_t x20 = r110[3U]; + uint64_t x30 = r110[0U]; + r110[0U] = x00; + r110[1U] = x10; + r110[2U] = x20; + r110[3U] = x30; + uint64_t *r111 = r21; + uint64_t x01 = r111[2U]; + uint64_t x11 = r111[3U]; + uint64_t x21 = r111[0U]; + uint64_t x31 = r111[1U]; + r111[0U] = x01; + r111[1U] = x11; + r111[2U] = x21; + r111[3U] = x31; + uint64_t *r112 = r31; + uint64_t x02 = r112[3U]; + uint64_t x12 = r112[0U]; + uint64_t x22 = r112[1U]; + uint64_t x32 = r112[2U]; + r112[0U] = x02; + r112[1U] = x12; + r112[2U] = x22; + r112[3U] = x32; + uint32_t a0 = 0U; + uint32_t b = 1U; + uint32_t c = 2U; + uint32_t d1 = 3U; + uint64_t *wv_a = wv + a0 * 4U; + uint64_t *wv_b8 = wv + b * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a; + uint64_t x1 = wv_a[i] + wv_b8[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a; + uint64_t x1 = wv_a[i] + z[i]; + os[i] = x1;); + uint64_t *wv_a8 = wv + d1 * 4U; + uint64_t *wv_b9 = wv + a0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a8; + uint64_t x1 = wv_a8[i] ^ wv_b9[i]; + os[i] = x1;); + uint64_t *r16 = wv_a8; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r16; + uint64_t x1 = r16[i]; + uint64_t x13 = x1 >> 32U | x1 << 32U; + os[i] = x13;); + uint64_t *wv_a9 = wv + c * 4U; + uint64_t *wv_b10 = wv + d1 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a9; + uint64_t x1 = wv_a9[i] + wv_b10[i]; + os[i] = x1;); + uint64_t *wv_a10 = wv + b * 4U; + uint64_t *wv_b11 = wv + c * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a10; + uint64_t x1 = wv_a10[i] ^ wv_b11[i]; + os[i] = x1;); + uint64_t *r17 = wv_a10; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r17; + uint64_t x1 = r17[i]; + uint64_t x13 = x1 >> 24U | x1 << 40U; + os[i] = x13;); + uint64_t *wv_a11 = wv + a0 * 4U; + uint64_t *wv_b12 = wv + b * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a11; + uint64_t x1 = wv_a11[i] + wv_b12[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a11; + uint64_t x1 = wv_a11[i] + w[i]; + os[i] = x1;); + uint64_t *wv_a12 = wv + d1 * 4U; + uint64_t *wv_b13 = wv + a0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a12; + uint64_t x1 = wv_a12[i] ^ wv_b13[i]; + os[i] = x1;); + uint64_t *r18 = wv_a12; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r18; + uint64_t x1 = r18[i]; + uint64_t x13 = x1 >> 16U | x1 << 48U; + os[i] = x13;); + uint64_t *wv_a13 = wv + c * 4U; + uint64_t *wv_b14 = wv + d1 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a13; + uint64_t x1 = wv_a13[i] + wv_b14[i]; + os[i] = x1;); + uint64_t *wv_a14 = wv + b * 4U; + uint64_t *wv_b = wv + c * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = wv_a14; + uint64_t x1 = wv_a14[i] ^ wv_b[i]; + os[i] = x1;); + uint64_t *r19 = wv_a14; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = r19; + uint64_t x1 = r19[i]; + uint64_t x13 = x1 >> 63U | x1 << 1U; + os[i] = x13;); + uint64_t *r113 = wv + 4U; + uint64_t *r2 = wv + 8U; + uint64_t *r3 = wv + 12U; + uint64_t *r11 = r113; + uint64_t x03 = r11[3U]; + uint64_t x13 = r11[0U]; + uint64_t x23 = r11[1U]; + uint64_t x33 = r11[2U]; + r11[0U] = x03; + r11[1U] = x13; + r11[2U] = x23; + r11[3U] = x33; + uint64_t *r114 = r2; + uint64_t x04 = r114[2U]; + uint64_t x14 = r114[3U]; + uint64_t x24 = r114[0U]; + uint64_t x34 = r114[1U]; + r114[0U] = x04; + r114[1U] = x14; + r114[2U] = x24; + r114[3U] = x34; + uint64_t *r115 = r3; + uint64_t x0 = r115[1U]; + uint64_t x1 = r115[2U]; + uint64_t x2 = r115[3U]; + uint64_t x3 = r115[0U]; + r115[0U] = x0; + r115[1U] = x1; + r115[2U] = x2; + r115[3U] = x3;); + uint64_t *s0 = hash; + uint64_t *s1 = hash + 4U; + uint64_t *r0 = wv; + uint64_t *r1 = wv + 4U; + uint64_t *r2 = wv + 8U; + uint64_t *r3 = wv + 12U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = s0; + uint64_t x = s0[i] ^ r0[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = s0; + uint64_t x = s0[i] ^ r2[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = s1; + uint64_t x = s1[i] ^ r1[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = s1; + uint64_t x = s1[i] ^ r3[i]; + os[i] = x;); +} + +void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) +{ + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U, + .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal + }; + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = hash; + uint64_t *r1 = hash + 4U; + uint64_t *r2 = hash + 8U; + uint64_t *r3 = hash + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk1 = (uint8_t)kk; + uint8_t nn1 = (uint8_t)nn; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk1 + << 8U + ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + tmp[1U] = p.node_offset; + tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; +} + +static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p) +{ + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = hash; + uint64_t *r1 = hash + 4U; + uint64_t *r2 = hash + 8U; + uint64_t *r3 = hash + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk = p.key_length; + uint8_t nn = p.digest_length; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn + ^ + ((uint64_t)kk + << 8U + ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + tmp[1U] = p.node_offset; + tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; +} + +static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) +{ + FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); + uint8_t b[128U] = { 0U }; + memcpy(b, k, kk * sizeof (uint8_t)); + if (ll == 0U) + { + update_block(wv, hash, true, false, lb, b); + } + else + { + update_block(wv, hash, false, false, lb, b); + } + Lib_Memzero0_memzero(b, 128U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2b_update_multi( + uint32_t len, + uint64_t *wv, + uint64_t *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks, + uint32_t nb +) +{ + KRML_MAYBE_UNUSED_VAR(len); + for (uint32_t i = 0U; i < nb; i++) + { + FStar_UInt128_uint128 + totlen = + FStar_UInt128_add_mod(prev, + FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U))); + uint8_t *b = blocks + i * 128U; + update_block(wv, hash, false, false, totlen, b); + } +} + +void +Hacl_Hash_Blake2b_update_last( + uint32_t len, + uint64_t *wv, + uint64_t *hash, + bool last_node, + FStar_UInt128_uint128 prev, + uint32_t rem, + uint8_t *d +) +{ + uint8_t b[128U] = { 0U }; + uint8_t *last = d + len - rem; + memcpy(b, last, rem * sizeof (uint8_t)); + FStar_UInt128_uint128 + totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len)); + update_block(wv, hash, true, last_node, totlen, b); + Lib_Memzero0_memzero(b, 128U, uint8_t, void *); +} + +static void +update_blocks( + uint32_t len, + uint64_t *wv, + uint64_t *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks +) +{ + uint32_t nb0 = len / 128U; + uint32_t rem0 = len % 128U; + uint32_t nb; + if (rem0 == 0U && nb0 > 0U) + { + nb = nb0 - 1U; + } + else + { + nb = nb0; + } + uint32_t rem; + if (rem0 == 0U && nb0 > 0U) + { + rem = 128U; + } + else + { + rem = rem0; + } + Hacl_Hash_Blake2b_update_multi(len, wv, hash, prev, blocks, nb); + Hacl_Hash_Blake2b_update_last(len, wv, hash, false, prev, rem, blocks); +} + +static inline void +update(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll, uint8_t *d) +{ + FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); + if (kk > 0U) + { + update_key(wv, hash, kk, k, ll); + if (!(ll == 0U)) + { + update_blocks(ll, wv, hash, lb, d); + return; + } + return; + } + update_blocks(ll, wv, hash, FStar_UInt128_uint64_to_uint128((uint64_t)0U), d); +} + +void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash) +{ + uint8_t b[64U] = { 0U }; + uint8_t *first = b; + uint8_t *second = b + 32U; + uint64_t *row0 = hash; + uint64_t *row1 = hash + 4U; + KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(first + i * 8U, row0[i]);); + KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store64_le(second + i * 8U, row1[i]);); + uint8_t *final = b; + memcpy(output, final, nn * sizeof (uint8_t)); + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +static Hacl_Hash_Blake2b_state_t +*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key) +{ + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t)); + uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t)); + uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t)); + Hacl_Hash_Blake2b_block_state_t + block_state = + { + .fst = kk.key_length, + .snd = kk.digest_length, + .thd = kk.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint8_t kk10 = kk.key_length; + uint32_t ite; + if (kk10 != 0U) + { + ite = 128U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2b_state_t + s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + Hacl_Hash_Blake2b_state_t + *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t)); + p[0U] = s; + Hacl_Hash_Blake2b_blake2_params *p1 = key.fst; + uint8_t kk1 = p1->key_length; + uint8_t nn = p1->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; + init_with_params(block_state.f3.snd, pv); + return p; +} + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 32 for S, 64 for B. +- The digest_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_state_t +*Hacl_Hash_Blake2b_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +) +{ + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + Hacl_Hash_Blake2b_index + i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node }; + return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk) +{ + uint8_t nn = 64U; + Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false }; + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + Hacl_Hash_Blake2b_state_t *s = Hacl_Hash_Blake2b_malloc_with_params_and_key(&p0, false, k); + return s; +} + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void) +{ + return Hacl_Hash_Blake2b_malloc_with_key(NULL, 0U); +} + +static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_state_t *s) +{ + Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk1 = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node }); +} + +static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params_and_key key) +{ + Hacl_Hash_Blake2b_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state; + bool last_node0 = block_state.thd; + uint8_t nn0 = block_state.snd; + uint8_t kk10 = block_state.fst; + Hacl_Hash_Blake2b_index + i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 }; + KRML_MAYBE_UNUSED_VAR(i); + Hacl_Hash_Blake2b_blake2_params *p = key.fst; + uint8_t kk1 = p->key_length; + uint8_t nn = p->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i1.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + init_with_params(block_state.f3.snd, pv); + uint8_t kk11 = i.key_length; + uint32_t ite; + if (kk11 != 0U) + { + ite = 128U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2b_state_t + tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp; +} + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2b_reset_with_key_and_params( + Hacl_Hash_Blake2b_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +) +{ + index_of_state(s); + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k) +{ + Hacl_Hash_Blake2b_index idx = index_of_state(s); + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s) +{ + Hacl_Hash_Blake2b_reset_with_key(s, NULL); +} + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len) +{ + Hacl_Hash_Blake2b_state_t s = *state; + uint64_t total_len = s.total_len; + if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len) + { + return Hacl_Streaming_Types_MaximumLengthExceeded; + } + uint32_t sz; + if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL) + { + sz = 128U; + } + else + { + sz = (uint32_t)(total_len % (uint64_t)128U); + } + if (chunk_len <= 128U - sz) + { + Hacl_Hash_Blake2b_state_t s1 = *state; + Hacl_Hash_Blake2b_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + uint8_t *buf2 = buf + sz1; + memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); + uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; + *state + = + ( + (Hacl_Hash_Blake2b_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len2 + } + ); + } + else if (sz == 0U) + { + Hacl_Hash_Blake2b_state_t s1 = *state; + Hacl_Hash_Blake2b_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____uint64_t___uint64_t_ acc = block_state1.f3; + uint64_t *wv = acc.fst; + uint64_t *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2b_update_multi(128U, + wv, + hash, + FStar_UInt128_uint64_to_uint128(prevlen), + buf, + nb); + } + uint32_t ite; + if ((uint64_t)chunk_len % (uint64_t)128U == 0ULL && (uint64_t)chunk_len > 0ULL) + { + ite = 128U; + } + else + { + ite = (uint32_t)((uint64_t)chunk_len % (uint64_t)128U); + } + uint32_t n_blocks = (chunk_len - ite) / 128U; + uint32_t data1_len = n_blocks * 128U; + uint32_t data2_len = chunk_len - data1_len; + uint8_t *data1 = chunk; + uint8_t *data2 = chunk + data1_len; + K____uint64_t___uint64_t_ acc = block_state1.f3; + uint64_t *wv = acc.fst; + uint64_t *hash = acc.snd; + uint32_t nb = data1_len / 128U; + Hacl_Hash_Blake2b_update_multi(data1_len, + wv, + hash, + FStar_UInt128_uint64_to_uint128(total_len1), + data1, + nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2b_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)chunk_len + } + ); + } + else + { + uint32_t diff = 128U - sz; + uint8_t *chunk1 = chunk; + uint8_t *chunk2 = chunk + diff; + Hacl_Hash_Blake2b_state_t s1 = *state; + Hacl_Hash_Blake2b_block_state_t block_state10 = s1.block_state; + uint8_t *buf0 = s1.buf; + uint64_t total_len10 = s1.total_len; + uint32_t sz10; + if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL) + { + sz10 = 128U; + } + else + { + sz10 = (uint32_t)(total_len10 % (uint64_t)128U); + } + uint8_t *buf2 = buf0 + sz10; + memcpy(buf2, chunk1, diff * sizeof (uint8_t)); + uint64_t total_len2 = total_len10 + (uint64_t)diff; + *state + = + ( + (Hacl_Hash_Blake2b_state_t){ + .block_state = block_state10, + .buf = buf0, + .total_len = total_len2 + } + ); + Hacl_Hash_Blake2b_state_t s10 = *state; + Hacl_Hash_Blake2b_block_state_t block_state1 = s10.block_state; + uint8_t *buf = s10.buf; + uint64_t total_len1 = s10.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____uint64_t___uint64_t_ acc = block_state1.f3; + uint64_t *wv = acc.fst; + uint64_t *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2b_update_multi(128U, + wv, + hash, + FStar_UInt128_uint64_to_uint128(prevlen), + buf, + nb); + } + uint32_t ite; + if + ((uint64_t)(chunk_len - diff) % (uint64_t)128U == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL) + { + ite = 128U; + } + else + { + ite = (uint32_t)((uint64_t)(chunk_len - diff) % (uint64_t)128U); + } + uint32_t n_blocks = (chunk_len - diff - ite) / 128U; + uint32_t data1_len = n_blocks * 128U; + uint32_t data2_len = chunk_len - diff - data1_len; + uint8_t *data1 = chunk2; + uint8_t *data2 = chunk2 + data1_len; + K____uint64_t___uint64_t_ acc = block_state1.f3; + uint64_t *wv = acc.fst; + uint64_t *hash = acc.snd; + uint32_t nb = data1_len / 128U; + Hacl_Hash_Blake2b_update_multi(data1_len, + wv, + hash, + FStar_UInt128_uint64_to_uint128(total_len1), + data1, + nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2b_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)(chunk_len - diff) + } + ); + } + return Hacl_Streaming_Types_Success; +} + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst) +{ + Hacl_Hash_Blake2b_block_state_t block_state0 = (*s).block_state; + bool last_node0 = block_state0.thd; + uint8_t nn0 = block_state0.snd; + uint8_t kk0 = block_state0.fst; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 }; + Hacl_Hash_Blake2b_state_t scrut = *s; + Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state; + uint8_t *buf_ = scrut.buf; + uint64_t total_len = scrut.total_len; + uint32_t r; + if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL) + { + r = 128U; + } + else + { + r = (uint32_t)(total_len % (uint64_t)128U); + } + uint8_t *buf_1 = buf_; + uint64_t wv0[16U] = { 0U }; + uint64_t b[16U] = { 0U }; + Hacl_Hash_Blake2b_block_state_t + tmp_block_state = + { + .fst = i1.key_length, + .snd = i1.digest_length, + .thd = i1.last_node, + .f3 = { .fst = wv0, .snd = b } + }; + uint64_t *src_b = block_state.f3.snd; + uint64_t *dst_b = tmp_block_state.f3.snd; + memcpy(dst_b, src_b, 16U * sizeof (uint64_t)); + uint64_t prev_len = total_len - (uint64_t)r; + uint32_t ite; + if (r % 128U == 0U && r > 0U) + { + ite = 128U; + } + else + { + ite = r % 128U; + } + uint8_t *buf_last = buf_1 + r - ite; + uint8_t *buf_multi = buf_1; + K____uint64_t___uint64_t_ acc0 = tmp_block_state.f3; + uint64_t *wv1 = acc0.fst; + uint64_t *hash0 = acc0.snd; + uint32_t nb = 0U; + Hacl_Hash_Blake2b_update_multi(0U, + wv1, + hash0, + FStar_UInt128_uint64_to_uint128(prev_len), + buf_multi, + nb); + uint64_t prev_len_last = total_len - (uint64_t)r; + K____uint64_t___uint64_t_ acc = tmp_block_state.f3; + bool last_node1 = tmp_block_state.thd; + uint64_t *wv = acc.fst; + uint64_t *hash = acc.snd; + Hacl_Hash_Blake2b_update_last(r, + wv, + hash, + last_node1, + FStar_UInt128_uint64_to_uint128(prev_len_last), + r, + buf_last); + uint8_t nn1 = tmp_block_state.snd; + Hacl_Hash_Blake2b_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd); + Hacl_Hash_Blake2b_block_state_t block_state1 = (*s).block_state; + bool last_node = block_state1.thd; + uint8_t nn = block_state1.snd; + uint8_t kk = block_state1.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length; +} + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s) +{ + Hacl_Hash_Blake2b_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }); +} + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state) +{ + Hacl_Hash_Blake2b_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2b_block_state_t block_state = scrut.block_state; + uint64_t *b = block_state.f3.snd; + uint64_t *wv = block_state.f3.fst; + KRML_HOST_FREE(wv); + KRML_HOST_FREE(b); + KRML_HOST_FREE(buf); + KRML_HOST_FREE(state); +} + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state) +{ + Hacl_Hash_Blake2b_state_t scrut = *state; + Hacl_Hash_Blake2b_block_state_t block_state0 = scrut.block_state; + uint8_t *buf0 = scrut.buf; + uint64_t total_len0 = scrut.total_len; + bool last_node = block_state0.thd; + uint8_t nn = block_state0.snd; + uint8_t kk1 = block_state0.fst; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t)); + memcpy(buf, buf0, 128U * sizeof (uint8_t)); + uint64_t *wv = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t)); + uint64_t *b = (uint64_t *)KRML_HOST_CALLOC(16U, sizeof (uint64_t)); + Hacl_Hash_Blake2b_block_state_t + block_state = + { + .fst = i.key_length, + .snd = i.digest_length, + .thd = i.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint64_t *src_b = block_state0.f3.snd; + uint64_t *dst_b = block_state.f3.snd; + memcpy(dst_b, src_b, 16U * sizeof (uint64_t)); + Hacl_Hash_Blake2b_state_t + s = { .block_state = block_state, .buf = buf, .total_len = total_len0 }; + Hacl_Hash_Blake2b_state_t + *p = (Hacl_Hash_Blake2b_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2b_state_t)); + p[0U] = s; + return p; +} + +/** +Write the BLAKE2b digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2b_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +) +{ + uint64_t b[16U] = { 0U }; + uint64_t b1[16U] = { 0U }; + Hacl_Hash_Blake2b_init(b, key_len, output_len); + update(b1, b, key_len, key, input_len, input); + Hacl_Hash_Blake2b_finish(output_len, output, b); + Lib_Memzero0_memzero(b1, 16U, uint64_t, void *); + Lib_Memzero0_memzero(b, 16U, uint64_t, void *); +} + +/** +Write the BLAKE2b digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2b_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +) +{ + uint64_t b[16U] = { 0U }; + uint64_t b1[16U] = { 0U }; + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = b; + uint64_t *r1 = b + 4U; + uint64_t *r2 = b + 8U; + uint64_t *r3 = b + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk = params.key_length; + uint8_t nn = params.digest_length; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = params.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = params.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn + ^ + ((uint64_t)kk + << 8U + ^ + ((uint64_t)params.fanout + << 16U + ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + tmp[1U] = params.node_offset; + tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; + update(b1, b, (uint32_t)params.key_length, key, input_len, input); + Hacl_Hash_Blake2b_finish((uint32_t)params.digest_length, output, b); + Lib_Memzero0_memzero(b1, 16U, uint64_t, void *); + Lib_Memzero0_memzero(b, 16U, uint64_t, void *); +} + diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.h b/Modules/_hacl/Hacl_Hash_Blake2b.h new file mode 100644 index 00000000000..5b5b037bcdc --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2b.h @@ -0,0 +1,245 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Hash_Blake2b_H +#define __Hacl_Hash_Blake2b_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "python_hacl_namespaces.h" +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Streaming_Types.h" + + +typedef struct Hacl_Hash_Blake2b_blake2_params_s +{ + uint8_t digest_length; + uint8_t key_length; + uint8_t fanout; + uint8_t depth; + uint32_t leaf_length; + uint64_t node_offset; + uint8_t node_depth; + uint8_t inner_length; + uint8_t *salt; + uint8_t *personal; +} +Hacl_Hash_Blake2b_blake2_params; + +typedef struct Hacl_Hash_Blake2b_index_s +{ + uint8_t key_length; + uint8_t digest_length; + bool last_node; +} +Hacl_Hash_Blake2b_index; + +#define HACL_HASH_BLAKE2B_BLOCK_BYTES (128U) + +#define HACL_HASH_BLAKE2B_OUT_BYTES (64U) + +#define HACL_HASH_BLAKE2B_KEY_BYTES (64U) + +#define HACL_HASH_BLAKE2B_SALT_BYTES (16U) + +#define HACL_HASH_BLAKE2B_PERSONAL_BYTES (16U) + +typedef struct K____uint64_t___uint64_t__s +{ + uint64_t *fst; + uint64_t *snd; +} +K____uint64_t___uint64_t_; + +typedef struct Hacl_Hash_Blake2b_block_state_t_s +{ + uint8_t fst; + uint8_t snd; + bool thd; + K____uint64_t___uint64_t_ f3; +} +Hacl_Hash_Blake2b_block_state_t; + +typedef struct Hacl_Hash_Blake2b_state_t_s +{ + Hacl_Hash_Blake2b_block_state_t block_state; + uint8_t *buf; + uint64_t total_len; +} +Hacl_Hash_Blake2b_state_t; + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 32 for S, 64 for B. +- The digest_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_state_t +*Hacl_Hash_Blake2b_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +); + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc_with_key(uint8_t *k, uint8_t kk); + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_malloc(void); + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2b_reset_with_key_and_params( + Hacl_Hash_Blake2b_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +); + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_reset_with_key(Hacl_Hash_Blake2b_state_t *s, uint8_t *k); + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_reset(Hacl_Hash_Blake2b_state_t *s); + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint32_t chunk_len); + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2B_32_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2b_digest(Hacl_Hash_Blake2b_state_t *s, uint8_t *dst); + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_info(Hacl_Hash_Blake2b_state_t *s); + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2b_free(Hacl_Hash_Blake2b_state_t *state); + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2b_state_t *Hacl_Hash_Blake2b_copy(Hacl_Hash_Blake2b_state_t *state); + +/** +Write the BLAKE2b digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2b_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +); + +/** +Write the BLAKE2b digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2b_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Hash_Blake2b_H_DEFINED +#endif diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c new file mode 100644 index 00000000000..35608aea71a --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -0,0 +1,1338 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "internal/Hacl_Hash_Blake2b_Simd256.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "lib_memzero0.h" + +static inline void +update_block( + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + bool flag, + bool last_node, + FStar_UInt128_uint128 totlen, + uint8_t *d +) +{ + uint64_t m_w[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + 0U, + 16U, + 1U, + uint64_t *os = m_w; + uint8_t *bj = d + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_zero; + uint64_t wv_14; + if (flag) + { + wv_14 = 0xFFFFFFFFFFFFFFFFULL; + } + else + { + wv_14 = 0ULL; + } + uint64_t wv_15; + if (last_node) + { + wv_15 = 0xFFFFFFFFFFFFFFFFULL; + } + else + { + wv_15 = 0ULL; + } + mask = + Lib_IntVector_Intrinsics_vec256_load64s(FStar_UInt128_uint128_to_uint64(totlen), + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(totlen, 64U)), + wv_14, + wv_15); + memcpy(wv, hash, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 *wv3 = wv + 3U; + wv3[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv3[0U], mask); + KRML_MAYBE_FOR12(i, + 0U, + 12U, + 1U, + uint32_t start_idx = i % 10U * 16U; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 m_st[4U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = m_st; + Lib_IntVector_Intrinsics_vec256 *r1 = m_st + 1U; + Lib_IntVector_Intrinsics_vec256 *r20 = m_st + 2U; + Lib_IntVector_Intrinsics_vec256 *r30 = m_st + 3U; + uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U]; + uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U]; + uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U]; + uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U]; + uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U]; + uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U]; + uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U]; + uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U]; + uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U]; + uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U]; + uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U]; + uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U]; + uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U]; + uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U]; + uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U]; + uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U]; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]); + r20[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]); + r30[0U] = Lib_IntVector_Intrinsics_vec256_load64s(m_w[s9], m_w[s11], m_w[s13], m_w[s15]); + Lib_IntVector_Intrinsics_vec256 *x = m_st; + Lib_IntVector_Intrinsics_vec256 *y = m_st + 1U; + Lib_IntVector_Intrinsics_vec256 *z = m_st + 2U; + Lib_IntVector_Intrinsics_vec256 *w = m_st + 3U; + uint32_t a = 0U; + uint32_t b0 = 1U; + uint32_t c0 = 2U; + uint32_t d10 = 3U; + Lib_IntVector_Intrinsics_vec256 *wv_a0 = wv + a * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b0 = wv + b0 * 1U; + wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a0[0U], wv_b0[0U]); + wv_a0[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a0[0U], x[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a1 = wv + d10 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b1 = wv + a * 1U; + wv_a1[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a1[0U], wv_b1[0U]); + wv_a1[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a1[0U], 32U); + Lib_IntVector_Intrinsics_vec256 *wv_a2 = wv + c0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b2 = wv + d10 * 1U; + wv_a2[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a2[0U], wv_b2[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a3 = wv + b0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b3 = wv + c0 * 1U; + wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a3[0U], wv_b3[0U]); + wv_a3[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a3[0U], 24U); + Lib_IntVector_Intrinsics_vec256 *wv_a4 = wv + a * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b4 = wv + b0 * 1U; + wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a4[0U], wv_b4[0U]); + wv_a4[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a4[0U], y[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a5 = wv + d10 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b5 = wv + a * 1U; + wv_a5[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a5[0U], wv_b5[0U]); + wv_a5[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a5[0U], 16U); + Lib_IntVector_Intrinsics_vec256 *wv_a6 = wv + c0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b6 = wv + d10 * 1U; + wv_a6[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a6[0U], wv_b6[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a7 = wv + b0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b7 = wv + c0 * 1U; + wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a7[0U], wv_b7[0U]); + wv_a7[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a7[0U], 63U); + Lib_IntVector_Intrinsics_vec256 *r10 = wv + 1U; + Lib_IntVector_Intrinsics_vec256 *r21 = wv + 2U; + Lib_IntVector_Intrinsics_vec256 *r31 = wv + 3U; + Lib_IntVector_Intrinsics_vec256 v00 = r10[0U]; + Lib_IntVector_Intrinsics_vec256 + v1 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v00, 1U); + r10[0U] = v1; + Lib_IntVector_Intrinsics_vec256 v01 = r21[0U]; + Lib_IntVector_Intrinsics_vec256 + v10 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v01, 2U); + r21[0U] = v10; + Lib_IntVector_Intrinsics_vec256 v02 = r31[0U]; + Lib_IntVector_Intrinsics_vec256 + v11 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v02, 3U); + r31[0U] = v11; + uint32_t a0 = 0U; + uint32_t b = 1U; + uint32_t c = 2U; + uint32_t d1 = 3U; + Lib_IntVector_Intrinsics_vec256 *wv_a = wv + a0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b8 = wv + b * 1U; + wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], wv_b8[0U]); + wv_a[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a[0U], z[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a8 = wv + d1 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b9 = wv + a0 * 1U; + wv_a8[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a8[0U], wv_b9[0U]); + wv_a8[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a8[0U], 32U); + Lib_IntVector_Intrinsics_vec256 *wv_a9 = wv + c * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b10 = wv + d1 * 1U; + wv_a9[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a9[0U], wv_b10[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a10 = wv + b * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b11 = wv + c * 1U; + wv_a10[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a10[0U], wv_b11[0U]); + wv_a10[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a10[0U], 24U); + Lib_IntVector_Intrinsics_vec256 *wv_a11 = wv + a0 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b12 = wv + b * 1U; + wv_a11[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a11[0U], wv_b12[0U]); + wv_a11[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a11[0U], w[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a12 = wv + d1 * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b13 = wv + a0 * 1U; + wv_a12[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a12[0U], wv_b13[0U]); + wv_a12[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a12[0U], 16U); + Lib_IntVector_Intrinsics_vec256 *wv_a13 = wv + c * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b14 = wv + d1 * 1U; + wv_a13[0U] = Lib_IntVector_Intrinsics_vec256_add64(wv_a13[0U], wv_b14[0U]); + Lib_IntVector_Intrinsics_vec256 *wv_a14 = wv + b * 1U; + Lib_IntVector_Intrinsics_vec256 *wv_b = wv + c * 1U; + wv_a14[0U] = Lib_IntVector_Intrinsics_vec256_xor(wv_a14[0U], wv_b[0U]); + wv_a14[0U] = Lib_IntVector_Intrinsics_vec256_rotate_right64(wv_a14[0U], 63U); + Lib_IntVector_Intrinsics_vec256 *r11 = wv + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = wv + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = wv + 3U; + Lib_IntVector_Intrinsics_vec256 v0 = r11[0U]; + Lib_IntVector_Intrinsics_vec256 + v12 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v0, 3U); + r11[0U] = v12; + Lib_IntVector_Intrinsics_vec256 v03 = r2[0U]; + Lib_IntVector_Intrinsics_vec256 + v13 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v03, 2U); + r2[0U] = v13; + Lib_IntVector_Intrinsics_vec256 v04 = r3[0U]; + Lib_IntVector_Intrinsics_vec256 + v14 = Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(v04, 1U); + r3[0U] = v14;); + Lib_IntVector_Intrinsics_vec256 *s0 = hash; + Lib_IntVector_Intrinsics_vec256 *s1 = hash + 1U; + Lib_IntVector_Intrinsics_vec256 *r0 = wv; + Lib_IntVector_Intrinsics_vec256 *r1 = wv + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = wv + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = wv + 3U; + s0[0U] = Lib_IntVector_Intrinsics_vec256_xor(s0[0U], r0[0U]); + s0[0U] = Lib_IntVector_Intrinsics_vec256_xor(s0[0U], r2[0U]); + s1[0U] = Lib_IntVector_Intrinsics_vec256_xor(s1[0U], r1[0U]); + s1[0U] = Lib_IntVector_Intrinsics_vec256_xor(s1[0U], r3[0U]); +} + +void +Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t kk, uint32_t nn) +{ + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = 64U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U, + .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal + }; + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = hash; + Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk1 = (uint8_t)kk; + uint8_t nn1 = (uint8_t)nn; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk1 + << 8U + ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + tmp[1U] = p.node_offset; + tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); +} + +static void +init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p) +{ + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = hash; + Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk = p.key_length; + uint8_t nn = p.digest_length; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn + ^ + ((uint64_t)kk + << 8U + ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + tmp[1U] = p.node_offset; + tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); +} + +static void +update_key( + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + uint32_t kk, + uint8_t *k, + uint32_t ll +) +{ + FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); + uint8_t b[128U] = { 0U }; + memcpy(b, k, kk * sizeof (uint8_t)); + if (ll == 0U) + { + update_block(wv, hash, true, false, lb, b); + } + else + { + update_block(wv, hash, false, false, lb, b); + } + Lib_Memzero0_memzero(b, 128U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2b_Simd256_update_multi( + uint32_t len, + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks, + uint32_t nb +) +{ + KRML_MAYBE_UNUSED_VAR(len); + for (uint32_t i = 0U; i < nb; i++) + { + FStar_UInt128_uint128 + totlen = + FStar_UInt128_add_mod(prev, + FStar_UInt128_uint64_to_uint128((uint64_t)((i + 1U) * 128U))); + uint8_t *b = blocks + i * 128U; + update_block(wv, hash, false, false, totlen, b); + } +} + +void +Hacl_Hash_Blake2b_Simd256_update_last( + uint32_t len, + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + bool last_node, + FStar_UInt128_uint128 prev, + uint32_t rem, + uint8_t *d +) +{ + uint8_t b[128U] = { 0U }; + uint8_t *last = d + len - rem; + memcpy(b, last, rem * sizeof (uint8_t)); + FStar_UInt128_uint128 + totlen = FStar_UInt128_add_mod(prev, FStar_UInt128_uint64_to_uint128((uint64_t)len)); + update_block(wv, hash, true, last_node, totlen, b); + Lib_Memzero0_memzero(b, 128U, uint8_t, void *); +} + +static inline void +update_blocks( + uint32_t len, + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks +) +{ + uint32_t nb0 = len / 128U; + uint32_t rem0 = len % 128U; + uint32_t nb; + if (rem0 == 0U && nb0 > 0U) + { + nb = nb0 - 1U; + } + else + { + nb = nb0; + } + uint32_t rem; + if (rem0 == 0U && nb0 > 0U) + { + rem = 128U; + } + else + { + rem = rem0; + } + Hacl_Hash_Blake2b_Simd256_update_multi(len, wv, hash, prev, blocks, nb); + Hacl_Hash_Blake2b_Simd256_update_last(len, wv, hash, false, prev, rem, blocks); +} + +static inline void +update( + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + uint32_t kk, + uint8_t *k, + uint32_t ll, + uint8_t *d +) +{ + FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); + if (kk > 0U) + { + update_key(wv, hash, kk, k, ll); + if (!(ll == 0U)) + { + update_blocks(ll, wv, hash, lb, d); + return; + } + return; + } + update_blocks(ll, wv, hash, FStar_UInt128_uint64_to_uint128((uint64_t)0U), d); +} + +void +Hacl_Hash_Blake2b_Simd256_finish( + uint32_t nn, + uint8_t *output, + Lib_IntVector_Intrinsics_vec256 *hash +) +{ + uint8_t b[64U] = { 0U }; + uint8_t *first = b; + uint8_t *second = b + 32U; + Lib_IntVector_Intrinsics_vec256 *row0 = hash; + Lib_IntVector_Intrinsics_vec256 *row1 = hash + 1U; + Lib_IntVector_Intrinsics_vec256_store64_le(first, row0[0U]); + Lib_IntVector_Intrinsics_vec256_store64_le(second, row1[0U]); + uint8_t *final = b; + memcpy(output, final, nn * sizeof (uint8_t)); + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2b_Simd256_load_state256b_from_state32( + Lib_IntVector_Intrinsics_vec256 *st, + uint64_t *st32 +) +{ + Lib_IntVector_Intrinsics_vec256 *r0 = st; + Lib_IntVector_Intrinsics_vec256 *r1 = st + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = st + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = st + 3U; + uint64_t *b0 = st32; + uint64_t *b1 = st32 + 4U; + uint64_t *b2 = st32 + 8U; + uint64_t *b3 = st32 + 12U; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(b0[0U], b0[1U], b0[2U], b0[3U]); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(b1[0U], b1[1U], b1[2U], b1[3U]); + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(b2[0U], b2[1U], b2[2U], b2[3U]); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(b3[0U], b3[1U], b3[2U], b3[3U]); +} + +void +Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32( + uint64_t *st32, + Lib_IntVector_Intrinsics_vec256 *st +) +{ + Lib_IntVector_Intrinsics_vec256 *r0 = st; + Lib_IntVector_Intrinsics_vec256 *r1 = st + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = st + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = st + 3U; + uint64_t *b0 = st32; + uint64_t *b1 = st32 + 4U; + uint64_t *b2 = st32 + 8U; + uint64_t *b3 = st32 + 12U; + uint8_t b8[32U] = { 0U }; + Lib_IntVector_Intrinsics_vec256_store64_le(b8, r0[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = b0; + uint8_t *bj = b8 + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint8_t b80[32U] = { 0U }; + Lib_IntVector_Intrinsics_vec256_store64_le(b80, r1[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = b1; + uint8_t *bj = b80 + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint8_t b81[32U] = { 0U }; + Lib_IntVector_Intrinsics_vec256_store64_le(b81, r2[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = b2; + uint8_t *bj = b81 + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint8_t b82[32U] = { 0U }; + Lib_IntVector_Intrinsics_vec256_store64_le(b82, r3[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint64_t *os = b3; + uint8_t *bj = b82 + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); +} + +Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void) +{ + Lib_IntVector_Intrinsics_vec256 + *buf = + (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, + sizeof (Lib_IntVector_Intrinsics_vec256) * 4U); + memset(buf, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + return buf; +} + +static Hacl_Hash_Blake2b_Simd256_state_t +*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key) +{ + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t)); + Lib_IntVector_Intrinsics_vec256 + *wv = + (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, + sizeof (Lib_IntVector_Intrinsics_vec256) * 4U); + memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 + *b = + (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, + sizeof (Lib_IntVector_Intrinsics_vec256) * 4U); + memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Hacl_Hash_Blake2b_Simd256_block_state_t + block_state = + { + .fst = kk.key_length, + .snd = kk.digest_length, + .thd = kk.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint8_t kk10 = kk.key_length; + uint32_t ite; + if (kk10 != 0U) + { + ite = 128U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2b_Simd256_state_t + s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + Hacl_Hash_Blake2b_Simd256_state_t + *p = + (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof ( + Hacl_Hash_Blake2b_Simd256_state_t + )); + p[0U] = s; + Hacl_Hash_Blake2b_blake2_params *p1 = key.fst; + uint8_t kk1 = p1->key_length; + uint8_t nn = p1->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; + init_with_params(block_state.f3.snd, pv); + return p; +} + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 256 for S, 64 for B. +- The digest_length must not exceed 256 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +) +{ + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + Hacl_Hash_Blake2b_index + i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node }; + return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 256 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk) +{ + uint8_t nn = 64U; + Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false }; + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + Hacl_Hash_Blake2b_Simd256_state_t + *s = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(&p0, false, k); + return s; +} + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void) +{ + return Hacl_Hash_Blake2b_Simd256_malloc_with_key0(NULL, 0U); +} + +static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2b_Simd256_state_t *s) +{ + Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk1 = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node }); +} + +static void +reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and_key key) +{ + Hacl_Hash_Blake2b_Simd256_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state; + bool last_node0 = block_state.thd; + uint8_t nn0 = block_state.snd; + uint8_t kk10 = block_state.fst; + Hacl_Hash_Blake2b_index + i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 }; + KRML_MAYBE_UNUSED_VAR(i); + Hacl_Hash_Blake2b_blake2_params *p = key.fst; + uint8_t kk1 = p->key_length; + uint8_t nn = p->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i1.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + init_with_params(block_state.f3.snd, pv); + uint8_t kk11 = i.key_length; + uint32_t ite; + if (kk11 != 0U) + { + ite = 128U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2b_Simd256_state_t + tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp; +} + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params( + Hacl_Hash_Blake2b_Simd256_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +) +{ + index_of_state(s); + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k) +{ + Hacl_Hash_Blake2b_index idx = index_of_state(s); + uint8_t salt[16U] = { 0U }; + uint8_t personal[16U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s) +{ + Hacl_Hash_Blake2b_Simd256_reset_with_key(s, NULL); +} + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2b_Simd256_update( + Hacl_Hash_Blake2b_Simd256_state_t *state, + uint8_t *chunk, + uint32_t chunk_len +) +{ + Hacl_Hash_Blake2b_Simd256_state_t s = *state; + uint64_t total_len = s.total_len; + if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len) + { + return Hacl_Streaming_Types_MaximumLengthExceeded; + } + uint32_t sz; + if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL) + { + sz = 128U; + } + else + { + sz = (uint32_t)(total_len % (uint64_t)128U); + } + if (chunk_len <= 128U - sz) + { + Hacl_Hash_Blake2b_Simd256_state_t s1 = *state; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + uint8_t *buf2 = buf + sz1; + memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); + uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; + *state + = + ( + (Hacl_Hash_Blake2b_Simd256_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len2 + } + ); + } + else if (sz == 0U) + { + Hacl_Hash_Blake2b_Simd256_state_t s1 = *state; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec256 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec256 *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2b_Simd256_update_multi(128U, + wv, + hash, + FStar_UInt128_uint64_to_uint128(prevlen), + buf, + nb); + } + uint32_t ite; + if ((uint64_t)chunk_len % (uint64_t)128U == 0ULL && (uint64_t)chunk_len > 0ULL) + { + ite = 128U; + } + else + { + ite = (uint32_t)((uint64_t)chunk_len % (uint64_t)128U); + } + uint32_t n_blocks = (chunk_len - ite) / 128U; + uint32_t data1_len = n_blocks * 128U; + uint32_t data2_len = chunk_len - data1_len; + uint8_t *data1 = chunk; + uint8_t *data2 = chunk + data1_len; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec256 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec256 *hash = acc.snd; + uint32_t nb = data1_len / 128U; + Hacl_Hash_Blake2b_Simd256_update_multi(data1_len, + wv, + hash, + FStar_UInt128_uint64_to_uint128(total_len1), + data1, + nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2b_Simd256_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)chunk_len + } + ); + } + else + { + uint32_t diff = 128U - sz; + uint8_t *chunk1 = chunk; + uint8_t *chunk2 = chunk + diff; + Hacl_Hash_Blake2b_Simd256_state_t s1 = *state; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state10 = s1.block_state; + uint8_t *buf0 = s1.buf; + uint64_t total_len10 = s1.total_len; + uint32_t sz10; + if (total_len10 % (uint64_t)128U == 0ULL && total_len10 > 0ULL) + { + sz10 = 128U; + } + else + { + sz10 = (uint32_t)(total_len10 % (uint64_t)128U); + } + uint8_t *buf2 = buf0 + sz10; + memcpy(buf2, chunk1, diff * sizeof (uint8_t)); + uint64_t total_len2 = total_len10 + (uint64_t)diff; + *state + = + ( + (Hacl_Hash_Blake2b_Simd256_state_t){ + .block_state = block_state10, + .buf = buf0, + .total_len = total_len2 + } + ); + Hacl_Hash_Blake2b_Simd256_state_t s10 = *state; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = s10.block_state; + uint8_t *buf = s10.buf; + uint64_t total_len1 = s10.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)128U == 0ULL && total_len1 > 0ULL) + { + sz1 = 128U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)128U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec256 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec256 *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2b_Simd256_update_multi(128U, + wv, + hash, + FStar_UInt128_uint64_to_uint128(prevlen), + buf, + nb); + } + uint32_t ite; + if + ((uint64_t)(chunk_len - diff) % (uint64_t)128U == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL) + { + ite = 128U; + } + else + { + ite = (uint32_t)((uint64_t)(chunk_len - diff) % (uint64_t)128U); + } + uint32_t n_blocks = (chunk_len - diff - ite) / 128U; + uint32_t data1_len = n_blocks * 128U; + uint32_t data2_len = chunk_len - diff - data1_len; + uint8_t *data1 = chunk2; + uint8_t *data2 = chunk2 + data1_len; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec256 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec256 *hash = acc.snd; + uint32_t nb = data1_len / 128U; + Hacl_Hash_Blake2b_Simd256_update_multi(data1_len, + wv, + hash, + FStar_UInt128_uint64_to_uint128(total_len1), + data1, + nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2b_Simd256_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)(chunk_len - diff) + } + ); + } + return Hacl_Streaming_Types_Success; +} + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst) +{ + Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = (*s).block_state; + bool last_node0 = block_state0.thd; + uint8_t nn0 = block_state0.snd; + uint8_t kk0 = block_state0.fst; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 }; + Hacl_Hash_Blake2b_Simd256_state_t scrut = *s; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state; + uint8_t *buf_ = scrut.buf; + uint64_t total_len = scrut.total_len; + uint32_t r; + if (total_len % (uint64_t)128U == 0ULL && total_len > 0ULL) + { + r = 128U; + } + else + { + r = (uint32_t)(total_len % (uint64_t)128U); + } + uint8_t *buf_1 = buf_; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 wv0[4U] KRML_POST_ALIGN(32) = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U }; + Hacl_Hash_Blake2b_Simd256_block_state_t + tmp_block_state = + { + .fst = i1.key_length, + .snd = i1.digest_length, + .thd = i1.last_node, + .f3 = { .fst = wv0, .snd = b } + }; + Lib_IntVector_Intrinsics_vec256 *src_b = block_state.f3.snd; + Lib_IntVector_Intrinsics_vec256 *dst_b = tmp_block_state.f3.snd; + memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + uint64_t prev_len = total_len - (uint64_t)r; + uint32_t ite; + if (r % 128U == 0U && r > 0U) + { + ite = 128U; + } + else + { + ite = r % 128U; + } + uint8_t *buf_last = buf_1 + r - ite; + uint8_t *buf_multi = buf_1; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ + acc0 = tmp_block_state.f3; + Lib_IntVector_Intrinsics_vec256 *wv1 = acc0.fst; + Lib_IntVector_Intrinsics_vec256 *hash0 = acc0.snd; + uint32_t nb = 0U; + Hacl_Hash_Blake2b_Simd256_update_multi(0U, + wv1, + hash0, + FStar_UInt128_uint64_to_uint128(prev_len), + buf_multi, + nb); + uint64_t prev_len_last = total_len - (uint64_t)r; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ + acc = tmp_block_state.f3; + bool last_node1 = tmp_block_state.thd; + Lib_IntVector_Intrinsics_vec256 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec256 *hash = acc.snd; + Hacl_Hash_Blake2b_Simd256_update_last(r, + wv, + hash, + last_node1, + FStar_UInt128_uint64_to_uint128(prev_len_last), + r, + buf_last); + uint8_t nn1 = tmp_block_state.snd; + Hacl_Hash_Blake2b_Simd256_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd); + Hacl_Hash_Blake2b_Simd256_block_state_t block_state1 = (*s).block_state; + bool last_node = block_state1.thd; + uint8_t nn = block_state1.snd; + uint8_t kk = block_state1.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length; +} + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s) +{ + Hacl_Hash_Blake2b_Simd256_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }); +} + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state) +{ + Hacl_Hash_Blake2b_Simd256_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state = scrut.block_state; + Lib_IntVector_Intrinsics_vec256 *b = block_state.f3.snd; + Lib_IntVector_Intrinsics_vec256 *wv = block_state.f3.fst; + KRML_ALIGNED_FREE(wv); + KRML_ALIGNED_FREE(b); + KRML_HOST_FREE(buf); + KRML_HOST_FREE(state); +} + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state) +{ + Hacl_Hash_Blake2b_Simd256_state_t scrut = *state; + Hacl_Hash_Blake2b_Simd256_block_state_t block_state0 = scrut.block_state; + uint8_t *buf0 = scrut.buf; + uint64_t total_len0 = scrut.total_len; + bool last_node = block_state0.thd; + uint8_t nn = block_state0.snd; + uint8_t kk1 = block_state0.fst; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(128U, sizeof (uint8_t)); + memcpy(buf, buf0, 128U * sizeof (uint8_t)); + Lib_IntVector_Intrinsics_vec256 + *wv = + (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, + sizeof (Lib_IntVector_Intrinsics_vec256) * 4U); + memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Lib_IntVector_Intrinsics_vec256 + *b = + (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, + sizeof (Lib_IntVector_Intrinsics_vec256) * 4U); + memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Hacl_Hash_Blake2b_Simd256_block_state_t + block_state = + { + .fst = i.key_length, + .snd = i.digest_length, + .thd = i.last_node, + .f3 = { .fst = wv, .snd = b } + }; + Lib_IntVector_Intrinsics_vec256 *src_b = block_state0.f3.snd; + Lib_IntVector_Intrinsics_vec256 *dst_b = block_state.f3.snd; + memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec256)); + Hacl_Hash_Blake2b_Simd256_state_t + s = { .block_state = block_state, .buf = buf, .total_len = total_len0 }; + Hacl_Hash_Blake2b_Simd256_state_t + *p = + (Hacl_Hash_Blake2b_Simd256_state_t *)KRML_HOST_MALLOC(sizeof ( + Hacl_Hash_Blake2b_Simd256_state_t + )); + p[0U] = s; + return p; +} + +/** +Write the BLAKE2b digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2b_Simd256_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +) +{ + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b1[4U] KRML_POST_ALIGN(32) = { 0U }; + Hacl_Hash_Blake2b_Simd256_init(b, key_len, output_len); + update(b1, b, key_len, key, input_len, input); + Hacl_Hash_Blake2b_Simd256_finish(output_len, output, b); + Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *); + Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *); +} + +/** +Write the BLAKE2b digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +) +{ + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b[4U] KRML_POST_ALIGN(32) = { 0U }; + KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 b1[4U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = b; + Lib_IntVector_Intrinsics_vec256 *r1 = b + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = b + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = b + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk = params.key_length; + uint8_t nn = params.digest_length; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = params.salt + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = params.personal + i * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + tmp[0U] = + (uint64_t)nn + ^ + ((uint64_t)kk + << 8U + ^ + ((uint64_t)params.fanout + << 16U + ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + tmp[1U] = params.node_offset; + tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); + update(b1, b, (uint32_t)params.key_length, key, input_len, input); + Hacl_Hash_Blake2b_Simd256_finish((uint32_t)params.digest_length, output, b); + Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec256, void *); + Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec256, void *); +} + diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h new file mode 100644 index 00000000000..6c11a4ba321 --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h @@ -0,0 +1,231 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Hash_Blake2b_Simd256_H +#define __Hacl_Hash_Blake2b_Simd256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "python_hacl_namespaces.h" +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Streaming_Types.h" + +#include "Hacl_Hash_Blake2b.h" +#include "libintvector.h" + +#define HACL_HASH_BLAKE2B_SIMD256_BLOCK_BYTES (128U) + +#define HACL_HASH_BLAKE2B_SIMD256_OUT_BYTES (64U) + +#define HACL_HASH_BLAKE2B_SIMD256_KEY_BYTES (64U) + +#define HACL_HASH_BLAKE2B_SIMD256_SALT_BYTES (16U) + +#define HACL_HASH_BLAKE2B_SIMD256_PERSONAL_BYTES (16U) + +typedef struct K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256__s +{ + Lib_IntVector_Intrinsics_vec256 *fst; + Lib_IntVector_Intrinsics_vec256 *snd; +} +K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_; + +typedef struct Hacl_Hash_Blake2b_Simd256_block_state_t_s +{ + uint8_t fst; + uint8_t snd; + bool thd; + K____Lib_IntVector_Intrinsics_vec256___Lib_IntVector_Intrinsics_vec256_ f3; +} +Hacl_Hash_Blake2b_Simd256_block_state_t; + +typedef struct Hacl_Hash_Blake2b_Simd256_state_t_s +{ + Hacl_Hash_Blake2b_Simd256_block_state_t block_state; + uint8_t *buf; + uint64_t total_len; +} +Hacl_Hash_Blake2b_Simd256_state_t; + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 256 for S, 64 for B. +- The digest_length must not exceed 256 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +); + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 256 for S, 64 for B. + +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_malloc_with_key0(uint8_t *k, uint8_t kk); + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2b_Simd256_state_t *Hacl_Hash_Blake2b_Simd256_malloc(void); + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params( + Hacl_Hash_Blake2b_Simd256_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +); + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2b_Simd256_reset_with_key(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *k); + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2b_Simd256_reset(Hacl_Hash_Blake2b_Simd256_state_t *s); + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2b_Simd256_update( + Hacl_Hash_Blake2b_Simd256_state_t *state, + uint8_t *chunk, + uint32_t chunk_len +); + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 256 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2B_256_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2b_Simd256_digest(Hacl_Hash_Blake2b_Simd256_state_t *s, uint8_t *dst); + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2b_Simd256_info(Hacl_Hash_Blake2b_Simd256_state_t *s); + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2b_Simd256_free(Hacl_Hash_Blake2b_Simd256_state_t *state); + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2b_Simd256_state_t +*Hacl_Hash_Blake2b_Simd256_copy(Hacl_Hash_Blake2b_Simd256_state_t *state); + +/** +Write the BLAKE2b digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2b_Simd256_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +); + +/** +Write the BLAKE2b digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Hash_Blake2b_Simd256_H_DEFINED +#endif diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c new file mode 100644 index 00000000000..167f38fbd1c --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -0,0 +1,1444 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "internal/Hacl_Hash_Blake2s.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "lib_memzero0.h" + +static inline void +update_block( + uint32_t *wv, + uint32_t *hash, + bool flag, + bool last_node, + uint64_t totlen, + uint8_t *d +) +{ + uint32_t m_w[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + 0U, + 16U, + 1U, + uint32_t *os = m_w; + uint8_t *bj = d + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + uint32_t mask[4U] = { 0U }; + uint32_t wv_14; + if (flag) + { + wv_14 = 0xFFFFFFFFU; + } + else + { + wv_14 = 0U; + } + uint32_t wv_15; + if (last_node) + { + wv_15 = 0xFFFFFFFFU; + } + else + { + wv_15 = 0U; + } + mask[0U] = (uint32_t)totlen; + mask[1U] = (uint32_t)(totlen >> 32U); + mask[2U] = wv_14; + mask[3U] = wv_15; + memcpy(wv, hash, 16U * sizeof (uint32_t)); + uint32_t *wv3 = wv + 12U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv3; + uint32_t x = wv3[i] ^ mask[i]; + os[i] = x;); + KRML_MAYBE_FOR10(i0, + 0U, + 10U, + 1U, + uint32_t start_idx = i0 % 10U * 16U; + uint32_t m_st[16U] = { 0U }; + uint32_t *r0 = m_st; + uint32_t *r1 = m_st + 4U; + uint32_t *r20 = m_st + 8U; + uint32_t *r30 = m_st + 12U; + uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U]; + uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U]; + uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U]; + uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U]; + uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U]; + uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U]; + uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U]; + uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U]; + uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U]; + uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U]; + uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U]; + uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U]; + uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U]; + uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U]; + uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U]; + uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U]; + uint32_t uu____0 = m_w[s2]; + uint32_t uu____1 = m_w[s4]; + uint32_t uu____2 = m_w[s6]; + r0[0U] = m_w[s0]; + r0[1U] = uu____0; + r0[2U] = uu____1; + r0[3U] = uu____2; + uint32_t uu____3 = m_w[s3]; + uint32_t uu____4 = m_w[s5]; + uint32_t uu____5 = m_w[s7]; + r1[0U] = m_w[s1]; + r1[1U] = uu____3; + r1[2U] = uu____4; + r1[3U] = uu____5; + uint32_t uu____6 = m_w[s10]; + uint32_t uu____7 = m_w[s12]; + uint32_t uu____8 = m_w[s14]; + r20[0U] = m_w[s8]; + r20[1U] = uu____6; + r20[2U] = uu____7; + r20[3U] = uu____8; + uint32_t uu____9 = m_w[s11]; + uint32_t uu____10 = m_w[s13]; + uint32_t uu____11 = m_w[s15]; + r30[0U] = m_w[s9]; + r30[1U] = uu____9; + r30[2U] = uu____10; + r30[3U] = uu____11; + uint32_t *x = m_st; + uint32_t *y = m_st + 4U; + uint32_t *z = m_st + 8U; + uint32_t *w = m_st + 12U; + uint32_t a = 0U; + uint32_t b0 = 1U; + uint32_t c0 = 2U; + uint32_t d10 = 3U; + uint32_t *wv_a0 = wv + a * 4U; + uint32_t *wv_b0 = wv + b0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a0; + uint32_t x1 = wv_a0[i] + wv_b0[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a0; + uint32_t x1 = wv_a0[i] + x[i]; + os[i] = x1;); + uint32_t *wv_a1 = wv + d10 * 4U; + uint32_t *wv_b1 = wv + a * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a1; + uint32_t x1 = wv_a1[i] ^ wv_b1[i]; + os[i] = x1;); + uint32_t *r10 = wv_a1; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r10; + uint32_t x1 = r10[i]; + uint32_t x10 = x1 >> 16U | x1 << 16U; + os[i] = x10;); + uint32_t *wv_a2 = wv + c0 * 4U; + uint32_t *wv_b2 = wv + d10 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a2; + uint32_t x1 = wv_a2[i] + wv_b2[i]; + os[i] = x1;); + uint32_t *wv_a3 = wv + b0 * 4U; + uint32_t *wv_b3 = wv + c0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a3; + uint32_t x1 = wv_a3[i] ^ wv_b3[i]; + os[i] = x1;); + uint32_t *r12 = wv_a3; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r12; + uint32_t x1 = r12[i]; + uint32_t x10 = x1 >> 12U | x1 << 20U; + os[i] = x10;); + uint32_t *wv_a4 = wv + a * 4U; + uint32_t *wv_b4 = wv + b0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a4; + uint32_t x1 = wv_a4[i] + wv_b4[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a4; + uint32_t x1 = wv_a4[i] + y[i]; + os[i] = x1;); + uint32_t *wv_a5 = wv + d10 * 4U; + uint32_t *wv_b5 = wv + a * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a5; + uint32_t x1 = wv_a5[i] ^ wv_b5[i]; + os[i] = x1;); + uint32_t *r13 = wv_a5; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r13; + uint32_t x1 = r13[i]; + uint32_t x10 = x1 >> 8U | x1 << 24U; + os[i] = x10;); + uint32_t *wv_a6 = wv + c0 * 4U; + uint32_t *wv_b6 = wv + d10 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a6; + uint32_t x1 = wv_a6[i] + wv_b6[i]; + os[i] = x1;); + uint32_t *wv_a7 = wv + b0 * 4U; + uint32_t *wv_b7 = wv + c0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a7; + uint32_t x1 = wv_a7[i] ^ wv_b7[i]; + os[i] = x1;); + uint32_t *r14 = wv_a7; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r14; + uint32_t x1 = r14[i]; + uint32_t x10 = x1 >> 7U | x1 << 25U; + os[i] = x10;); + uint32_t *r15 = wv + 4U; + uint32_t *r21 = wv + 8U; + uint32_t *r31 = wv + 12U; + uint32_t *r110 = r15; + uint32_t x00 = r110[1U]; + uint32_t x10 = r110[2U]; + uint32_t x20 = r110[3U]; + uint32_t x30 = r110[0U]; + r110[0U] = x00; + r110[1U] = x10; + r110[2U] = x20; + r110[3U] = x30; + uint32_t *r111 = r21; + uint32_t x01 = r111[2U]; + uint32_t x11 = r111[3U]; + uint32_t x21 = r111[0U]; + uint32_t x31 = r111[1U]; + r111[0U] = x01; + r111[1U] = x11; + r111[2U] = x21; + r111[3U] = x31; + uint32_t *r112 = r31; + uint32_t x02 = r112[3U]; + uint32_t x12 = r112[0U]; + uint32_t x22 = r112[1U]; + uint32_t x32 = r112[2U]; + r112[0U] = x02; + r112[1U] = x12; + r112[2U] = x22; + r112[3U] = x32; + uint32_t a0 = 0U; + uint32_t b = 1U; + uint32_t c = 2U; + uint32_t d1 = 3U; + uint32_t *wv_a = wv + a0 * 4U; + uint32_t *wv_b8 = wv + b * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a; + uint32_t x1 = wv_a[i] + wv_b8[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a; + uint32_t x1 = wv_a[i] + z[i]; + os[i] = x1;); + uint32_t *wv_a8 = wv + d1 * 4U; + uint32_t *wv_b9 = wv + a0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a8; + uint32_t x1 = wv_a8[i] ^ wv_b9[i]; + os[i] = x1;); + uint32_t *r16 = wv_a8; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r16; + uint32_t x1 = r16[i]; + uint32_t x13 = x1 >> 16U | x1 << 16U; + os[i] = x13;); + uint32_t *wv_a9 = wv + c * 4U; + uint32_t *wv_b10 = wv + d1 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a9; + uint32_t x1 = wv_a9[i] + wv_b10[i]; + os[i] = x1;); + uint32_t *wv_a10 = wv + b * 4U; + uint32_t *wv_b11 = wv + c * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a10; + uint32_t x1 = wv_a10[i] ^ wv_b11[i]; + os[i] = x1;); + uint32_t *r17 = wv_a10; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r17; + uint32_t x1 = r17[i]; + uint32_t x13 = x1 >> 12U | x1 << 20U; + os[i] = x13;); + uint32_t *wv_a11 = wv + a0 * 4U; + uint32_t *wv_b12 = wv + b * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a11; + uint32_t x1 = wv_a11[i] + wv_b12[i]; + os[i] = x1;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a11; + uint32_t x1 = wv_a11[i] + w[i]; + os[i] = x1;); + uint32_t *wv_a12 = wv + d1 * 4U; + uint32_t *wv_b13 = wv + a0 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a12; + uint32_t x1 = wv_a12[i] ^ wv_b13[i]; + os[i] = x1;); + uint32_t *r18 = wv_a12; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r18; + uint32_t x1 = r18[i]; + uint32_t x13 = x1 >> 8U | x1 << 24U; + os[i] = x13;); + uint32_t *wv_a13 = wv + c * 4U; + uint32_t *wv_b14 = wv + d1 * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a13; + uint32_t x1 = wv_a13[i] + wv_b14[i]; + os[i] = x1;); + uint32_t *wv_a14 = wv + b * 4U; + uint32_t *wv_b = wv + c * 4U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = wv_a14; + uint32_t x1 = wv_a14[i] ^ wv_b[i]; + os[i] = x1;); + uint32_t *r19 = wv_a14; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = r19; + uint32_t x1 = r19[i]; + uint32_t x13 = x1 >> 7U | x1 << 25U; + os[i] = x13;); + uint32_t *r113 = wv + 4U; + uint32_t *r2 = wv + 8U; + uint32_t *r3 = wv + 12U; + uint32_t *r11 = r113; + uint32_t x03 = r11[3U]; + uint32_t x13 = r11[0U]; + uint32_t x23 = r11[1U]; + uint32_t x33 = r11[2U]; + r11[0U] = x03; + r11[1U] = x13; + r11[2U] = x23; + r11[3U] = x33; + uint32_t *r114 = r2; + uint32_t x04 = r114[2U]; + uint32_t x14 = r114[3U]; + uint32_t x24 = r114[0U]; + uint32_t x34 = r114[1U]; + r114[0U] = x04; + r114[1U] = x14; + r114[2U] = x24; + r114[3U] = x34; + uint32_t *r115 = r3; + uint32_t x0 = r115[1U]; + uint32_t x1 = r115[2U]; + uint32_t x2 = r115[3U]; + uint32_t x3 = r115[0U]; + r115[0U] = x0; + r115[1U] = x1; + r115[2U] = x2; + r115[3U] = x3;); + uint32_t *s0 = hash; + uint32_t *s1 = hash + 4U; + uint32_t *r0 = wv; + uint32_t *r1 = wv + 4U; + uint32_t *r2 = wv + 8U; + uint32_t *r3 = wv + 12U; + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = s0; + uint32_t x = s0[i] ^ r0[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = s0; + uint32_t x = s0[i] ^ r2[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = s1; + uint32_t x = s1[i] ^ r1[i]; + os[i] = x;); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = s1; + uint32_t x = s1[i] ^ r3[i]; + os[i] = x;); +} + +void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) +{ + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U, + .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal + }; + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = hash; + uint32_t *r1 = hash + 4U; + uint32_t *r2 = hash + 8U; + uint32_t *r3 = hash + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)(uint8_t)nn + ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + tmp[1U] = p.leaf_length; + tmp[2U] = (uint32_t)p.node_offset; + tmp[3U] = + (uint32_t)(p.node_offset >> 32U) + ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; +} + +static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p) +{ + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = hash; + uint32_t *r1 = hash + 4U; + uint32_t *r2 = hash + 8U; + uint32_t *r3 = hash + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)p.digest_length + ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + tmp[1U] = p.leaf_length; + tmp[2U] = (uint32_t)p.node_offset; + tmp[3U] = + (uint32_t)(p.node_offset >> 32U) + ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; +} + +static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) +{ + uint64_t lb = (uint64_t)64U; + uint8_t b[64U] = { 0U }; + memcpy(b, k, kk * sizeof (uint8_t)); + if (ll == 0U) + { + update_block(wv, hash, true, false, lb, b); + } + else + { + update_block(wv, hash, false, false, lb, b); + } + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2s_update_multi( + uint32_t len, + uint32_t *wv, + uint32_t *hash, + uint64_t prev, + uint8_t *blocks, + uint32_t nb +) +{ + KRML_MAYBE_UNUSED_VAR(len); + for (uint32_t i = 0U; i < nb; i++) + { + uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U); + uint8_t *b = blocks + i * 64U; + update_block(wv, hash, false, false, totlen, b); + } +} + +void +Hacl_Hash_Blake2s_update_last( + uint32_t len, + uint32_t *wv, + uint32_t *hash, + bool last_node, + uint64_t prev, + uint32_t rem, + uint8_t *d +) +{ + uint8_t b[64U] = { 0U }; + uint8_t *last = d + len - rem; + memcpy(b, last, rem * sizeof (uint8_t)); + uint64_t totlen = prev + (uint64_t)len; + update_block(wv, hash, true, last_node, totlen, b); + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +static void +update_blocks(uint32_t len, uint32_t *wv, uint32_t *hash, uint64_t prev, uint8_t *blocks) +{ + uint32_t nb0 = len / 64U; + uint32_t rem0 = len % 64U; + uint32_t nb; + if (rem0 == 0U && nb0 > 0U) + { + nb = nb0 - 1U; + } + else + { + nb = nb0; + } + uint32_t rem; + if (rem0 == 0U && nb0 > 0U) + { + rem = 64U; + } + else + { + rem = rem0; + } + Hacl_Hash_Blake2s_update_multi(len, wv, hash, prev, blocks, nb); + Hacl_Hash_Blake2s_update_last(len, wv, hash, false, prev, rem, blocks); +} + +static inline void +update(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll, uint8_t *d) +{ + uint64_t lb = (uint64_t)64U; + if (kk > 0U) + { + update_key(wv, hash, kk, k, ll); + if (!(ll == 0U)) + { + update_blocks(ll, wv, hash, lb, d); + return; + } + return; + } + update_blocks(ll, wv, hash, (uint64_t)0U, d); +} + +void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash) +{ + uint8_t b[32U] = { 0U }; + uint8_t *first = b; + uint8_t *second = b + 16U; + uint32_t *row0 = hash; + uint32_t *row1 = hash + 4U; + KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(first + i * 4U, row0[i]);); + KRML_MAYBE_FOR4(i, 0U, 4U, 1U, store32_le(second + i * 4U, row1[i]);); + uint8_t *final = b; + memcpy(output, final, nn * sizeof (uint8_t)); + Lib_Memzero0_memzero(b, 32U, uint8_t, void *); +} + +static Hacl_Hash_Blake2s_state_t +*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key) +{ + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t)); + uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t)); + uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t)); + Hacl_Hash_Blake2s_block_state_t + block_state = + { + .fst = kk.key_length, + .snd = kk.digest_length, + .thd = kk.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint8_t kk10 = kk.key_length; + uint32_t ite; + if (kk10 != 0U) + { + ite = 64U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2s_state_t + s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + Hacl_Hash_Blake2s_state_t + *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t)); + p[0U] = s; + Hacl_Hash_Blake2b_blake2_params *p1 = key.fst; + uint8_t kk1 = p1->key_length; + uint8_t nn = p1->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; + init_with_params(block_state.f3.snd, pv); + return p; +} + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 32 for S, 64 for B. +- The digest_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_state_t +*Hacl_Hash_Blake2s_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +) +{ + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + Hacl_Hash_Blake2b_index + i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node }; + return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk) +{ + uint8_t nn = 32U; + Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false }; + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + Hacl_Hash_Blake2s_state_t *s = Hacl_Hash_Blake2s_malloc_with_params_and_key(&p0, false, k); + return s; +} + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void) +{ + return Hacl_Hash_Blake2s_malloc_with_key(NULL, 0U); +} + +static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_state_t *s) +{ + Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk1 = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node }); +} + +static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params_and_key key) +{ + Hacl_Hash_Blake2s_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state; + bool last_node0 = block_state.thd; + uint8_t nn0 = block_state.snd; + uint8_t kk10 = block_state.fst; + Hacl_Hash_Blake2b_index + i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 }; + KRML_MAYBE_UNUSED_VAR(i); + Hacl_Hash_Blake2b_blake2_params *p = key.fst; + uint8_t kk1 = p->key_length; + uint8_t nn = p->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i1.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + init_with_params(block_state.f3.snd, pv); + uint8_t kk11 = i.key_length; + uint32_t ite; + if (kk11 != 0U) + { + ite = 64U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2s_state_t + tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp; +} + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2s_reset_with_key_and_params( + Hacl_Hash_Blake2s_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +) +{ + index_of_state(s); + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k) +{ + Hacl_Hash_Blake2b_index idx = index_of_state(s); + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s) +{ + Hacl_Hash_Blake2s_reset_with_key(s, NULL); +} + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len) +{ + Hacl_Hash_Blake2s_state_t s = *state; + uint64_t total_len = s.total_len; + if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len) + { + return Hacl_Streaming_Types_MaximumLengthExceeded; + } + uint32_t sz; + if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL) + { + sz = 64U; + } + else + { + sz = (uint32_t)(total_len % (uint64_t)64U); + } + if (chunk_len <= 64U - sz) + { + Hacl_Hash_Blake2s_state_t s1 = *state; + Hacl_Hash_Blake2s_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + uint8_t *buf2 = buf + sz1; + memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); + uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; + *state + = + ( + (Hacl_Hash_Blake2s_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len2 + } + ); + } + else if (sz == 0U) + { + Hacl_Hash_Blake2s_state_t s1 = *state; + Hacl_Hash_Blake2s_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____uint32_t___uint32_t_ acc = block_state1.f3; + uint32_t *wv = acc.fst; + uint32_t *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb); + } + uint32_t ite; + if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL) + { + ite = 64U; + } + else + { + ite = (uint32_t)((uint64_t)chunk_len % (uint64_t)64U); + } + uint32_t n_blocks = (chunk_len - ite) / 64U; + uint32_t data1_len = n_blocks * 64U; + uint32_t data2_len = chunk_len - data1_len; + uint8_t *data1 = chunk; + uint8_t *data2 = chunk + data1_len; + K____uint32_t___uint32_t_ acc = block_state1.f3; + uint32_t *wv = acc.fst; + uint32_t *hash = acc.snd; + uint32_t nb = data1_len / 64U; + Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2s_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)chunk_len + } + ); + } + else + { + uint32_t diff = 64U - sz; + uint8_t *chunk1 = chunk; + uint8_t *chunk2 = chunk + diff; + Hacl_Hash_Blake2s_state_t s1 = *state; + Hacl_Hash_Blake2s_block_state_t block_state10 = s1.block_state; + uint8_t *buf0 = s1.buf; + uint64_t total_len10 = s1.total_len; + uint32_t sz10; + if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL) + { + sz10 = 64U; + } + else + { + sz10 = (uint32_t)(total_len10 % (uint64_t)64U); + } + uint8_t *buf2 = buf0 + sz10; + memcpy(buf2, chunk1, diff * sizeof (uint8_t)); + uint64_t total_len2 = total_len10 + (uint64_t)diff; + *state + = + ( + (Hacl_Hash_Blake2s_state_t){ + .block_state = block_state10, + .buf = buf0, + .total_len = total_len2 + } + ); + Hacl_Hash_Blake2s_state_t s10 = *state; + Hacl_Hash_Blake2s_block_state_t block_state1 = s10.block_state; + uint8_t *buf = s10.buf; + uint64_t total_len1 = s10.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____uint32_t___uint32_t_ acc = block_state1.f3; + uint32_t *wv = acc.fst; + uint32_t *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2s_update_multi(64U, wv, hash, prevlen, buf, nb); + } + uint32_t ite; + if + ((uint64_t)(chunk_len - diff) % (uint64_t)64U == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL) + { + ite = 64U; + } + else + { + ite = (uint32_t)((uint64_t)(chunk_len - diff) % (uint64_t)64U); + } + uint32_t n_blocks = (chunk_len - diff - ite) / 64U; + uint32_t data1_len = n_blocks * 64U; + uint32_t data2_len = chunk_len - diff - data1_len; + uint8_t *data1 = chunk2; + uint8_t *data2 = chunk2 + data1_len; + K____uint32_t___uint32_t_ acc = block_state1.f3; + uint32_t *wv = acc.fst; + uint32_t *hash = acc.snd; + uint32_t nb = data1_len / 64U; + Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2s_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)(chunk_len - diff) + } + ); + } + return Hacl_Streaming_Types_Success; +} + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst) +{ + Hacl_Hash_Blake2s_block_state_t block_state0 = (*s).block_state; + bool last_node0 = block_state0.thd; + uint8_t nn0 = block_state0.snd; + uint8_t kk0 = block_state0.fst; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 }; + Hacl_Hash_Blake2s_state_t scrut = *s; + Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state; + uint8_t *buf_ = scrut.buf; + uint64_t total_len = scrut.total_len; + uint32_t r; + if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL) + { + r = 64U; + } + else + { + r = (uint32_t)(total_len % (uint64_t)64U); + } + uint8_t *buf_1 = buf_; + uint32_t wv0[16U] = { 0U }; + uint32_t b[16U] = { 0U }; + Hacl_Hash_Blake2s_block_state_t + tmp_block_state = + { + .fst = i1.key_length, + .snd = i1.digest_length, + .thd = i1.last_node, + .f3 = { .fst = wv0, .snd = b } + }; + uint32_t *src_b = block_state.f3.snd; + uint32_t *dst_b = tmp_block_state.f3.snd; + memcpy(dst_b, src_b, 16U * sizeof (uint32_t)); + uint64_t prev_len = total_len - (uint64_t)r; + uint32_t ite; + if (r % 64U == 0U && r > 0U) + { + ite = 64U; + } + else + { + ite = r % 64U; + } + uint8_t *buf_last = buf_1 + r - ite; + uint8_t *buf_multi = buf_1; + K____uint32_t___uint32_t_ acc0 = tmp_block_state.f3; + uint32_t *wv1 = acc0.fst; + uint32_t *hash0 = acc0.snd; + uint32_t nb = 0U; + Hacl_Hash_Blake2s_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb); + uint64_t prev_len_last = total_len - (uint64_t)r; + K____uint32_t___uint32_t_ acc = tmp_block_state.f3; + bool last_node1 = tmp_block_state.thd; + uint32_t *wv = acc.fst; + uint32_t *hash = acc.snd; + Hacl_Hash_Blake2s_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last); + uint8_t nn1 = tmp_block_state.snd; + Hacl_Hash_Blake2s_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd); + Hacl_Hash_Blake2s_block_state_t block_state1 = (*s).block_state; + bool last_node = block_state1.thd; + uint8_t nn = block_state1.snd; + uint8_t kk = block_state1.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length; +} + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s) +{ + Hacl_Hash_Blake2s_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }); +} + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state) +{ + Hacl_Hash_Blake2s_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2s_block_state_t block_state = scrut.block_state; + uint32_t *b = block_state.f3.snd; + uint32_t *wv = block_state.f3.fst; + KRML_HOST_FREE(wv); + KRML_HOST_FREE(b); + KRML_HOST_FREE(buf); + KRML_HOST_FREE(state); +} + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state) +{ + Hacl_Hash_Blake2s_state_t scrut = *state; + Hacl_Hash_Blake2s_block_state_t block_state0 = scrut.block_state; + uint8_t *buf0 = scrut.buf; + uint64_t total_len0 = scrut.total_len; + bool last_node = block_state0.thd; + uint8_t nn = block_state0.snd; + uint8_t kk1 = block_state0.fst; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t)); + memcpy(buf, buf0, 64U * sizeof (uint8_t)); + uint32_t *wv = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t)); + uint32_t *b = (uint32_t *)KRML_HOST_CALLOC(16U, sizeof (uint32_t)); + Hacl_Hash_Blake2s_block_state_t + block_state = + { + .fst = i.key_length, + .snd = i.digest_length, + .thd = i.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint32_t *src_b = block_state0.f3.snd; + uint32_t *dst_b = block_state.f3.snd; + memcpy(dst_b, src_b, 16U * sizeof (uint32_t)); + Hacl_Hash_Blake2s_state_t + s = { .block_state = block_state, .buf = buf, .total_len = total_len0 }; + Hacl_Hash_Blake2s_state_t + *p = (Hacl_Hash_Blake2s_state_t *)KRML_HOST_MALLOC(sizeof (Hacl_Hash_Blake2s_state_t)); + p[0U] = s; + return p; +} + +/** +Write the BLAKE2s digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2s_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +) +{ + uint32_t b[16U] = { 0U }; + uint32_t b1[16U] = { 0U }; + Hacl_Hash_Blake2s_init(b, key_len, output_len); + update(b1, b, key_len, key, input_len, input); + Hacl_Hash_Blake2s_finish(output_len, output, b); + Lib_Memzero0_memzero(b1, 16U, uint32_t, void *); + Lib_Memzero0_memzero(b, 16U, uint32_t, void *); +} + +/** +Write the BLAKE2s digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2s_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +) +{ + uint32_t b[16U] = { 0U }; + uint32_t b1[16U] = { 0U }; + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = b; + uint32_t *r1 = b + 4U; + uint32_t *r2 = b + 8U; + uint32_t *r3 = b + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = params.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = params.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)params.digest_length + ^ + ((uint32_t)params.key_length + << 8U + ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + tmp[1U] = params.leaf_length; + tmp[2U] = (uint32_t)params.node_offset; + tmp[3U] = + (uint32_t)(params.node_offset >> 32U) + ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; + update(b1, b, (uint32_t)params.key_length, key, input_len, input); + Hacl_Hash_Blake2s_finish((uint32_t)params.digest_length, output, b); + Lib_Memzero0_memzero(b1, 16U, uint32_t, void *); + Lib_Memzero0_memzero(b, 16U, uint32_t, void *); +} + diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.h b/Modules/_hacl/Hacl_Hash_Blake2s.h new file mode 100644 index 00000000000..5c01da14401 --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2s.h @@ -0,0 +1,222 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Hash_Blake2s_H +#define __Hacl_Hash_Blake2s_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "python_hacl_namespaces.h" +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Streaming_Types.h" +#include "Hacl_Hash_Blake2b.h" + +#define HACL_HASH_BLAKE2S_BLOCK_BYTES (64U) + +#define HACL_HASH_BLAKE2S_OUT_BYTES (32U) + +#define HACL_HASH_BLAKE2S_KEY_BYTES (32U) + +#define HACL_HASH_BLAKE2S_SALT_BYTES (8U) + +#define HACL_HASH_BLAKE2S_PERSONAL_BYTES (8U) + +typedef struct K____uint32_t___uint32_t__s +{ + uint32_t *fst; + uint32_t *snd; +} +K____uint32_t___uint32_t_; + +typedef struct Hacl_Hash_Blake2s_block_state_t_s +{ + uint8_t fst; + uint8_t snd; + bool thd; + K____uint32_t___uint32_t_ f3; +} +Hacl_Hash_Blake2s_block_state_t; + +typedef struct Hacl_Hash_Blake2s_state_t_s +{ + Hacl_Hash_Blake2s_block_state_t block_state; + uint8_t *buf; + uint64_t total_len; +} +Hacl_Hash_Blake2s_state_t; + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 32 for S, 64 for B. +- The digest_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_state_t +*Hacl_Hash_Blake2s_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +); + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 32 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc_with_key(uint8_t *k, uint8_t kk); + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_malloc(void); + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2s_reset_with_key_and_params( + Hacl_Hash_Blake2s_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +); + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_reset_with_key(Hacl_Hash_Blake2s_state_t *s, uint8_t *k); + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_reset(Hacl_Hash_Blake2s_state_t *s); + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint32_t chunk_len); + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 32 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2S_32_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2s_digest(Hacl_Hash_Blake2s_state_t *s, uint8_t *dst); + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_info(Hacl_Hash_Blake2s_state_t *s); + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2s_free(Hacl_Hash_Blake2s_state_t *state); + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2s_state_t *Hacl_Hash_Blake2s_copy(Hacl_Hash_Blake2s_state_t *state); + +/** +Write the BLAKE2s digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2s_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +); + +/** +Write the BLAKE2s digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2s_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Hash_Blake2s_H_DEFINED +#endif diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c new file mode 100644 index 00000000000..a85b18a4d29 --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -0,0 +1,1294 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include "internal/Hacl_Hash_Blake2s_Simd128.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "lib_memzero0.h" + +static inline void +update_block( + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + bool flag, + bool last_node, + uint64_t totlen, + uint8_t *d +) +{ + uint32_t m_w[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + 0U, + 16U, + 1U, + uint32_t *os = m_w; + uint8_t *bj = d + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_zero; + uint32_t wv_14; + if (flag) + { + wv_14 = 0xFFFFFFFFU; + } + else + { + wv_14 = 0U; + } + uint32_t wv_15; + if (last_node) + { + wv_15 = 0xFFFFFFFFU; + } + else + { + wv_15 = 0U; + } + mask = + Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)totlen, + (uint32_t)(totlen >> 32U), + wv_14, + wv_15); + memcpy(wv, hash, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Lib_IntVector_Intrinsics_vec128 *wv3 = wv + 3U; + wv3[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv3[0U], mask); + KRML_MAYBE_FOR10(i, + 0U, + 10U, + 1U, + uint32_t start_idx = i % 10U * 16U; + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 m_st[4U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = m_st; + Lib_IntVector_Intrinsics_vec128 *r1 = m_st + 1U; + Lib_IntVector_Intrinsics_vec128 *r20 = m_st + 2U; + Lib_IntVector_Intrinsics_vec128 *r30 = m_st + 3U; + uint32_t s0 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 0U]; + uint32_t s1 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 1U]; + uint32_t s2 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 2U]; + uint32_t s3 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 3U]; + uint32_t s4 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 4U]; + uint32_t s5 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 5U]; + uint32_t s6 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 6U]; + uint32_t s7 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 7U]; + uint32_t s8 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 8U]; + uint32_t s9 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 9U]; + uint32_t s10 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 10U]; + uint32_t s11 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 11U]; + uint32_t s12 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 12U]; + uint32_t s13 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 13U]; + uint32_t s14 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 14U]; + uint32_t s15 = Hacl_Hash_Blake2b_sigmaTable[start_idx + 15U]; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s0], m_w[s2], m_w[s4], m_w[s6]); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s1], m_w[s3], m_w[s5], m_w[s7]); + r20[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s8], m_w[s10], m_w[s12], m_w[s14]); + r30[0U] = Lib_IntVector_Intrinsics_vec128_load32s(m_w[s9], m_w[s11], m_w[s13], m_w[s15]); + Lib_IntVector_Intrinsics_vec128 *x = m_st; + Lib_IntVector_Intrinsics_vec128 *y = m_st + 1U; + Lib_IntVector_Intrinsics_vec128 *z = m_st + 2U; + Lib_IntVector_Intrinsics_vec128 *w = m_st + 3U; + uint32_t a = 0U; + uint32_t b0 = 1U; + uint32_t c0 = 2U; + uint32_t d10 = 3U; + Lib_IntVector_Intrinsics_vec128 *wv_a0 = wv + a * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b0 = wv + b0 * 1U; + wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a0[0U], wv_b0[0U]); + wv_a0[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a0[0U], x[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a1 = wv + d10 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b1 = wv + a * 1U; + wv_a1[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a1[0U], wv_b1[0U]); + wv_a1[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a1[0U], 16U); + Lib_IntVector_Intrinsics_vec128 *wv_a2 = wv + c0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b2 = wv + d10 * 1U; + wv_a2[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a2[0U], wv_b2[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a3 = wv + b0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b3 = wv + c0 * 1U; + wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a3[0U], wv_b3[0U]); + wv_a3[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a3[0U], 12U); + Lib_IntVector_Intrinsics_vec128 *wv_a4 = wv + a * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b4 = wv + b0 * 1U; + wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a4[0U], wv_b4[0U]); + wv_a4[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a4[0U], y[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a5 = wv + d10 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b5 = wv + a * 1U; + wv_a5[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a5[0U], wv_b5[0U]); + wv_a5[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a5[0U], 8U); + Lib_IntVector_Intrinsics_vec128 *wv_a6 = wv + c0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b6 = wv + d10 * 1U; + wv_a6[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a6[0U], wv_b6[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a7 = wv + b0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b7 = wv + c0 * 1U; + wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a7[0U], wv_b7[0U]); + wv_a7[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a7[0U], 7U); + Lib_IntVector_Intrinsics_vec128 *r10 = wv + 1U; + Lib_IntVector_Intrinsics_vec128 *r21 = wv + 2U; + Lib_IntVector_Intrinsics_vec128 *r31 = wv + 3U; + Lib_IntVector_Intrinsics_vec128 v00 = r10[0U]; + Lib_IntVector_Intrinsics_vec128 + v1 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v00, 1U); + r10[0U] = v1; + Lib_IntVector_Intrinsics_vec128 v01 = r21[0U]; + Lib_IntVector_Intrinsics_vec128 + v10 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v01, 2U); + r21[0U] = v10; + Lib_IntVector_Intrinsics_vec128 v02 = r31[0U]; + Lib_IntVector_Intrinsics_vec128 + v11 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v02, 3U); + r31[0U] = v11; + uint32_t a0 = 0U; + uint32_t b = 1U; + uint32_t c = 2U; + uint32_t d1 = 3U; + Lib_IntVector_Intrinsics_vec128 *wv_a = wv + a0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b8 = wv + b * 1U; + wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], wv_b8[0U]); + wv_a[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a[0U], z[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a8 = wv + d1 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b9 = wv + a0 * 1U; + wv_a8[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a8[0U], wv_b9[0U]); + wv_a8[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a8[0U], 16U); + Lib_IntVector_Intrinsics_vec128 *wv_a9 = wv + c * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b10 = wv + d1 * 1U; + wv_a9[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a9[0U], wv_b10[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a10 = wv + b * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b11 = wv + c * 1U; + wv_a10[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a10[0U], wv_b11[0U]); + wv_a10[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a10[0U], 12U); + Lib_IntVector_Intrinsics_vec128 *wv_a11 = wv + a0 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b12 = wv + b * 1U; + wv_a11[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a11[0U], wv_b12[0U]); + wv_a11[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a11[0U], w[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a12 = wv + d1 * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b13 = wv + a0 * 1U; + wv_a12[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a12[0U], wv_b13[0U]); + wv_a12[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a12[0U], 8U); + Lib_IntVector_Intrinsics_vec128 *wv_a13 = wv + c * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b14 = wv + d1 * 1U; + wv_a13[0U] = Lib_IntVector_Intrinsics_vec128_add32(wv_a13[0U], wv_b14[0U]); + Lib_IntVector_Intrinsics_vec128 *wv_a14 = wv + b * 1U; + Lib_IntVector_Intrinsics_vec128 *wv_b = wv + c * 1U; + wv_a14[0U] = Lib_IntVector_Intrinsics_vec128_xor(wv_a14[0U], wv_b[0U]); + wv_a14[0U] = Lib_IntVector_Intrinsics_vec128_rotate_right32(wv_a14[0U], 7U); + Lib_IntVector_Intrinsics_vec128 *r11 = wv + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = wv + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = wv + 3U; + Lib_IntVector_Intrinsics_vec128 v0 = r11[0U]; + Lib_IntVector_Intrinsics_vec128 + v12 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v0, 3U); + r11[0U] = v12; + Lib_IntVector_Intrinsics_vec128 v03 = r2[0U]; + Lib_IntVector_Intrinsics_vec128 + v13 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v03, 2U); + r2[0U] = v13; + Lib_IntVector_Intrinsics_vec128 v04 = r3[0U]; + Lib_IntVector_Intrinsics_vec128 + v14 = Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(v04, 1U); + r3[0U] = v14;); + Lib_IntVector_Intrinsics_vec128 *s0 = hash; + Lib_IntVector_Intrinsics_vec128 *s1 = hash + 1U; + Lib_IntVector_Intrinsics_vec128 *r0 = wv; + Lib_IntVector_Intrinsics_vec128 *r1 = wv + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = wv + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = wv + 3U; + s0[0U] = Lib_IntVector_Intrinsics_vec128_xor(s0[0U], r0[0U]); + s0[0U] = Lib_IntVector_Intrinsics_vec128_xor(s0[0U], r2[0U]); + s1[0U] = Lib_IntVector_Intrinsics_vec128_xor(s1[0U], r1[0U]); + s1[0U] = Lib_IntVector_Intrinsics_vec128_xor(s1[0U], r3[0U]); +} + +void +Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t kk, uint32_t nn) +{ + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = 32U, .key_length = 0U, .fanout = 1U, .depth = 1U, .leaf_length = 0U, + .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, .personal = personal + }; + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = hash; + Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)(uint8_t)nn + ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + tmp[1U] = p.leaf_length; + tmp[2U] = (uint32_t)p.node_offset; + tmp[3U] = + (uint32_t)(p.node_offset >> 32U) + ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); +} + +static void +init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p) +{ + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = hash; + Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = p.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = p.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)p.digest_length + ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + tmp[1U] = p.leaf_length; + tmp[2U] = (uint32_t)p.node_offset; + tmp[3U] = + (uint32_t)(p.node_offset >> 32U) + ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); +} + +static void +update_key( + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + uint32_t kk, + uint8_t *k, + uint32_t ll +) +{ + uint64_t lb = (uint64_t)64U; + uint8_t b[64U] = { 0U }; + memcpy(b, k, kk * sizeof (uint8_t)); + if (ll == 0U) + { + update_block(wv, hash, true, false, lb, b); + } + else + { + update_block(wv, hash, false, false, lb, b); + } + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2s_Simd128_update_multi( + uint32_t len, + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + uint64_t prev, + uint8_t *blocks, + uint32_t nb +) +{ + KRML_MAYBE_UNUSED_VAR(len); + for (uint32_t i = 0U; i < nb; i++) + { + uint64_t totlen = prev + (uint64_t)((i + 1U) * 64U); + uint8_t *b = blocks + i * 64U; + update_block(wv, hash, false, false, totlen, b); + } +} + +void +Hacl_Hash_Blake2s_Simd128_update_last( + uint32_t len, + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + bool last_node, + uint64_t prev, + uint32_t rem, + uint8_t *d +) +{ + uint8_t b[64U] = { 0U }; + uint8_t *last = d + len - rem; + memcpy(b, last, rem * sizeof (uint8_t)); + uint64_t totlen = prev + (uint64_t)len; + update_block(wv, hash, true, last_node, totlen, b); + Lib_Memzero0_memzero(b, 64U, uint8_t, void *); +} + +static inline void +update_blocks( + uint32_t len, + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + uint64_t prev, + uint8_t *blocks +) +{ + uint32_t nb0 = len / 64U; + uint32_t rem0 = len % 64U; + uint32_t nb; + if (rem0 == 0U && nb0 > 0U) + { + nb = nb0 - 1U; + } + else + { + nb = nb0; + } + uint32_t rem; + if (rem0 == 0U && nb0 > 0U) + { + rem = 64U; + } + else + { + rem = rem0; + } + Hacl_Hash_Blake2s_Simd128_update_multi(len, wv, hash, prev, blocks, nb); + Hacl_Hash_Blake2s_Simd128_update_last(len, wv, hash, false, prev, rem, blocks); +} + +static inline void +update( + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + uint32_t kk, + uint8_t *k, + uint32_t ll, + uint8_t *d +) +{ + uint64_t lb = (uint64_t)64U; + if (kk > 0U) + { + update_key(wv, hash, kk, k, ll); + if (!(ll == 0U)) + { + update_blocks(ll, wv, hash, lb, d); + return; + } + return; + } + update_blocks(ll, wv, hash, (uint64_t)0U, d); +} + +void +Hacl_Hash_Blake2s_Simd128_finish( + uint32_t nn, + uint8_t *output, + Lib_IntVector_Intrinsics_vec128 *hash +) +{ + uint8_t b[32U] = { 0U }; + uint8_t *first = b; + uint8_t *second = b + 16U; + Lib_IntVector_Intrinsics_vec128 *row0 = hash; + Lib_IntVector_Intrinsics_vec128 *row1 = hash + 1U; + Lib_IntVector_Intrinsics_vec128_store32_le(first, row0[0U]); + Lib_IntVector_Intrinsics_vec128_store32_le(second, row1[0U]); + uint8_t *final = b; + memcpy(output, final, nn * sizeof (uint8_t)); + Lib_Memzero0_memzero(b, 32U, uint8_t, void *); +} + +void +Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32( + uint32_t *st32, + Lib_IntVector_Intrinsics_vec128 *st +) +{ + Lib_IntVector_Intrinsics_vec128 *r0 = st; + Lib_IntVector_Intrinsics_vec128 *r1 = st + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = st + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = st + 3U; + uint32_t *b0 = st32; + uint32_t *b1 = st32 + 4U; + uint32_t *b2 = st32 + 8U; + uint32_t *b3 = st32 + 12U; + uint8_t b8[16U] = { 0U }; + Lib_IntVector_Intrinsics_vec128_store32_le(b8, r0[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = b0; + uint8_t *bj = b8 + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + uint8_t b80[16U] = { 0U }; + Lib_IntVector_Intrinsics_vec128_store32_le(b80, r1[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = b1; + uint8_t *bj = b80 + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + uint8_t b81[16U] = { 0U }; + Lib_IntVector_Intrinsics_vec128_store32_le(b81, r2[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = b2; + uint8_t *bj = b81 + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + uint8_t b82[16U] = { 0U }; + Lib_IntVector_Intrinsics_vec128_store32_le(b82, r3[0U]); + KRML_MAYBE_FOR4(i, + 0U, + 4U, + 1U, + uint32_t *os = b3; + uint8_t *bj = b82 + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); +} + +void +Hacl_Hash_Blake2s_Simd128_load_state128s_from_state32( + Lib_IntVector_Intrinsics_vec128 *st, + uint32_t *st32 +) +{ + Lib_IntVector_Intrinsics_vec128 *r0 = st; + Lib_IntVector_Intrinsics_vec128 *r1 = st + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = st + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = st + 3U; + uint32_t *b0 = st32; + uint32_t *b1 = st32 + 4U; + uint32_t *b2 = st32 + 8U; + uint32_t *b3 = st32 + 12U; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(b0[0U], b0[1U], b0[2U], b0[3U]); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(b1[0U], b1[1U], b1[2U], b1[3U]); + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(b2[0U], b2[1U], b2[2U], b2[3U]); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(b3[0U], b3[1U], b3[2U], b3[3U]); +} + +Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void) +{ + Lib_IntVector_Intrinsics_vec128 + *buf = + (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16, + sizeof (Lib_IntVector_Intrinsics_vec128) * 4U); + memset(buf, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + return buf; +} + +static Hacl_Hash_Blake2s_Simd128_state_t +*malloc_raw(Hacl_Hash_Blake2b_index kk, Hacl_Hash_Blake2b_params_and_key key) +{ + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t)); + Lib_IntVector_Intrinsics_vec128 + *wv = + (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16, + sizeof (Lib_IntVector_Intrinsics_vec128) * 4U); + memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Lib_IntVector_Intrinsics_vec128 + *b = + (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16, + sizeof (Lib_IntVector_Intrinsics_vec128) * 4U); + memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Hacl_Hash_Blake2s_Simd128_block_state_t + block_state = + { + .fst = kk.key_length, + .snd = kk.digest_length, + .thd = kk.last_node, + .f3 = { .fst = wv, .snd = b } + }; + uint8_t kk10 = kk.key_length; + uint32_t ite; + if (kk10 != 0U) + { + ite = 64U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2s_Simd128_state_t + s = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + Hacl_Hash_Blake2s_Simd128_state_t + *p = + (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof ( + Hacl_Hash_Blake2s_Simd128_state_t + )); + p[0U] = s; + Hacl_Hash_Blake2b_blake2_params *p1 = key.fst; + uint8_t kk1 = p1->key_length; + uint8_t nn = p1->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; + init_with_params(block_state.f3.snd, pv); + return p; +} + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 128 for S, 64 for B. +- The digest_length must not exceed 128 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +) +{ + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + Hacl_Hash_Blake2b_index + i1 = { .key_length = pv.key_length, .digest_length = pv.digest_length, .last_node = last_node }; + return malloc_raw(i1, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 128 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk) +{ + uint8_t nn = 32U; + Hacl_Hash_Blake2b_index i = { .key_length = kk, .digest_length = nn, .last_node = false }; + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = i.digest_length, .key_length = i.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + Hacl_Hash_Blake2s_Simd128_state_t + *s = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(&p0, false, k); + return s; +} + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void) +{ + return Hacl_Hash_Blake2s_Simd128_malloc_with_key0(NULL, 0U); +} + +static Hacl_Hash_Blake2b_index index_of_state(Hacl_Hash_Blake2s_Simd128_state_t *s) +{ + Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk1 = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk1, .digest_length = nn, .last_node = last_node }); +} + +static void +reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and_key key) +{ + Hacl_Hash_Blake2s_Simd128_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state; + bool last_node0 = block_state.thd; + uint8_t nn0 = block_state.snd; + uint8_t kk10 = block_state.fst; + Hacl_Hash_Blake2b_index + i = { .key_length = kk10, .digest_length = nn0, .last_node = last_node0 }; + KRML_MAYBE_UNUSED_VAR(i); + Hacl_Hash_Blake2b_blake2_params *p = key.fst; + uint8_t kk1 = p->key_length; + uint8_t nn = p->digest_length; + bool last_node = block_state.thd; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t kk2 = (uint32_t)i1.key_length; + uint8_t *k_1 = key.snd; + if (!(kk2 == 0U)) + { + uint8_t *sub_b = buf + kk2; + memset(sub_b, 0U, (64U - kk2) * sizeof (uint8_t)); + memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + } + Hacl_Hash_Blake2b_blake2_params pv = p[0U]; + init_with_params(block_state.f3.snd, pv); + uint8_t kk11 = i.key_length; + uint32_t ite; + if (kk11 != 0U) + { + ite = 64U; + } + else + { + ite = 0U; + } + Hacl_Hash_Blake2s_Simd128_state_t + tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp; +} + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params( + Hacl_Hash_Blake2s_Simd128_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +) +{ + index_of_state(s); + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = p, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k) +{ + Hacl_Hash_Blake2b_index idx = index_of_state(s); + uint8_t salt[8U] = { 0U }; + uint8_t personal[8U] = { 0U }; + Hacl_Hash_Blake2b_blake2_params + p = + { + .digest_length = idx.digest_length, .key_length = idx.key_length, .fanout = 1U, .depth = 1U, + .leaf_length = 0U, .node_offset = 0ULL, .node_depth = 0U, .inner_length = 0U, .salt = salt, + .personal = personal + }; + Hacl_Hash_Blake2b_blake2_params p0 = p; + reset_raw(s, ((Hacl_Hash_Blake2b_params_and_key){ .fst = &p0, .snd = k })); +} + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s) +{ + Hacl_Hash_Blake2s_Simd128_reset_with_key(s, NULL); +} + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2s_Simd128_update( + Hacl_Hash_Blake2s_Simd128_state_t *state, + uint8_t *chunk, + uint32_t chunk_len +) +{ + Hacl_Hash_Blake2s_Simd128_state_t s = *state; + uint64_t total_len = s.total_len; + if ((uint64_t)chunk_len > 0xffffffffffffffffULL - total_len) + { + return Hacl_Streaming_Types_MaximumLengthExceeded; + } + uint32_t sz; + if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL) + { + sz = 64U; + } + else + { + sz = (uint32_t)(total_len % (uint64_t)64U); + } + if (chunk_len <= 64U - sz) + { + Hacl_Hash_Blake2s_Simd128_state_t s1 = *state; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + uint8_t *buf2 = buf + sz1; + memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); + uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; + *state + = + ( + (Hacl_Hash_Blake2s_Simd128_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len2 + } + ); + } + else if (sz == 0U) + { + Hacl_Hash_Blake2s_Simd128_state_t s1 = *state; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s1.block_state; + uint8_t *buf = s1.buf; + uint64_t total_len1 = s1.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec128 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec128 *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb); + } + uint32_t ite; + if ((uint64_t)chunk_len % (uint64_t)64U == 0ULL && (uint64_t)chunk_len > 0ULL) + { + ite = 64U; + } + else + { + ite = (uint32_t)((uint64_t)chunk_len % (uint64_t)64U); + } + uint32_t n_blocks = (chunk_len - ite) / 64U; + uint32_t data1_len = n_blocks * 64U; + uint32_t data2_len = chunk_len - data1_len; + uint8_t *data1 = chunk; + uint8_t *data2 = chunk + data1_len; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec128 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec128 *hash = acc.snd; + uint32_t nb = data1_len / 64U; + Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2s_Simd128_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)chunk_len + } + ); + } + else + { + uint32_t diff = 64U - sz; + uint8_t *chunk1 = chunk; + uint8_t *chunk2 = chunk + diff; + Hacl_Hash_Blake2s_Simd128_state_t s1 = *state; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state10 = s1.block_state; + uint8_t *buf0 = s1.buf; + uint64_t total_len10 = s1.total_len; + uint32_t sz10; + if (total_len10 % (uint64_t)64U == 0ULL && total_len10 > 0ULL) + { + sz10 = 64U; + } + else + { + sz10 = (uint32_t)(total_len10 % (uint64_t)64U); + } + uint8_t *buf2 = buf0 + sz10; + memcpy(buf2, chunk1, diff * sizeof (uint8_t)); + uint64_t total_len2 = total_len10 + (uint64_t)diff; + *state + = + ( + (Hacl_Hash_Blake2s_Simd128_state_t){ + .block_state = block_state10, + .buf = buf0, + .total_len = total_len2 + } + ); + Hacl_Hash_Blake2s_Simd128_state_t s10 = *state; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = s10.block_state; + uint8_t *buf = s10.buf; + uint64_t total_len1 = s10.total_len; + uint32_t sz1; + if (total_len1 % (uint64_t)64U == 0ULL && total_len1 > 0ULL) + { + sz1 = 64U; + } + else + { + sz1 = (uint32_t)(total_len1 % (uint64_t)64U); + } + if (!(sz1 == 0U)) + { + uint64_t prevlen = total_len1 - (uint64_t)sz1; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec128 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec128 *hash = acc.snd; + uint32_t nb = 1U; + Hacl_Hash_Blake2s_Simd128_update_multi(64U, wv, hash, prevlen, buf, nb); + } + uint32_t ite; + if + ((uint64_t)(chunk_len - diff) % (uint64_t)64U == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL) + { + ite = 64U; + } + else + { + ite = (uint32_t)((uint64_t)(chunk_len - diff) % (uint64_t)64U); + } + uint32_t n_blocks = (chunk_len - diff - ite) / 64U; + uint32_t data1_len = n_blocks * 64U; + uint32_t data2_len = chunk_len - diff - data1_len; + uint8_t *data1 = chunk2; + uint8_t *data2 = chunk2 + data1_len; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ acc = block_state1.f3; + Lib_IntVector_Intrinsics_vec128 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec128 *hash = acc.snd; + uint32_t nb = data1_len / 64U; + Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); + uint8_t *dst = buf; + memcpy(dst, data2, data2_len * sizeof (uint8_t)); + *state + = + ( + (Hacl_Hash_Blake2s_Simd128_state_t){ + .block_state = block_state1, + .buf = buf, + .total_len = total_len1 + (uint64_t)(chunk_len - diff) + } + ); + } + return Hacl_Streaming_Types_Success; +} + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst) +{ + Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = (*s).block_state; + bool last_node0 = block_state0.thd; + uint8_t nn0 = block_state0.snd; + uint8_t kk0 = block_state0.fst; + Hacl_Hash_Blake2b_index + i1 = { .key_length = kk0, .digest_length = nn0, .last_node = last_node0 }; + Hacl_Hash_Blake2s_Simd128_state_t scrut = *s; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state; + uint8_t *buf_ = scrut.buf; + uint64_t total_len = scrut.total_len; + uint32_t r; + if (total_len % (uint64_t)64U == 0ULL && total_len > 0ULL) + { + r = 64U; + } + else + { + r = (uint32_t)(total_len % (uint64_t)64U); + } + uint8_t *buf_1 = buf_; + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 wv0[4U] KRML_POST_ALIGN(16) = { 0U }; + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U }; + Hacl_Hash_Blake2s_Simd128_block_state_t + tmp_block_state = + { + .fst = i1.key_length, + .snd = i1.digest_length, + .thd = i1.last_node, + .f3 = { .fst = wv0, .snd = b } + }; + Lib_IntVector_Intrinsics_vec128 *src_b = block_state.f3.snd; + Lib_IntVector_Intrinsics_vec128 *dst_b = tmp_block_state.f3.snd; + memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + uint64_t prev_len = total_len - (uint64_t)r; + uint32_t ite; + if (r % 64U == 0U && r > 0U) + { + ite = 64U; + } + else + { + ite = r % 64U; + } + uint8_t *buf_last = buf_1 + r - ite; + uint8_t *buf_multi = buf_1; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ + acc0 = tmp_block_state.f3; + Lib_IntVector_Intrinsics_vec128 *wv1 = acc0.fst; + Lib_IntVector_Intrinsics_vec128 *hash0 = acc0.snd; + uint32_t nb = 0U; + Hacl_Hash_Blake2s_Simd128_update_multi(0U, wv1, hash0, prev_len, buf_multi, nb); + uint64_t prev_len_last = total_len - (uint64_t)r; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ + acc = tmp_block_state.f3; + bool last_node1 = tmp_block_state.thd; + Lib_IntVector_Intrinsics_vec128 *wv = acc.fst; + Lib_IntVector_Intrinsics_vec128 *hash = acc.snd; + Hacl_Hash_Blake2s_Simd128_update_last(r, wv, hash, last_node1, prev_len_last, r, buf_last); + uint8_t nn1 = tmp_block_state.snd; + Hacl_Hash_Blake2s_Simd128_finish((uint32_t)nn1, dst, tmp_block_state.f3.snd); + Hacl_Hash_Blake2s_Simd128_block_state_t block_state1 = (*s).block_state; + bool last_node = block_state1.thd; + uint8_t nn = block_state1.snd; + uint8_t kk = block_state1.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }).digest_length; +} + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s) +{ + Hacl_Hash_Blake2s_Simd128_block_state_t block_state = (*s).block_state; + bool last_node = block_state.thd; + uint8_t nn = block_state.snd; + uint8_t kk = block_state.fst; + return + ((Hacl_Hash_Blake2b_index){ .key_length = kk, .digest_length = nn, .last_node = last_node }); +} + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state) +{ + Hacl_Hash_Blake2s_Simd128_state_t scrut = *state; + uint8_t *buf = scrut.buf; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state = scrut.block_state; + Lib_IntVector_Intrinsics_vec128 *b = block_state.f3.snd; + Lib_IntVector_Intrinsics_vec128 *wv = block_state.f3.fst; + KRML_ALIGNED_FREE(wv); + KRML_ALIGNED_FREE(b); + KRML_HOST_FREE(buf); + KRML_HOST_FREE(state); +} + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state) +{ + Hacl_Hash_Blake2s_Simd128_state_t scrut = *state; + Hacl_Hash_Blake2s_Simd128_block_state_t block_state0 = scrut.block_state; + uint8_t *buf0 = scrut.buf; + uint64_t total_len0 = scrut.total_len; + bool last_node = block_state0.thd; + uint8_t nn = block_state0.snd; + uint8_t kk1 = block_state0.fst; + Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint8_t *buf = (uint8_t *)KRML_HOST_CALLOC(64U, sizeof (uint8_t)); + memcpy(buf, buf0, 64U * sizeof (uint8_t)); + Lib_IntVector_Intrinsics_vec128 + *wv = + (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16, + sizeof (Lib_IntVector_Intrinsics_vec128) * 4U); + memset(wv, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Lib_IntVector_Intrinsics_vec128 + *b = + (Lib_IntVector_Intrinsics_vec128 *)KRML_ALIGNED_MALLOC(16, + sizeof (Lib_IntVector_Intrinsics_vec128) * 4U); + memset(b, 0U, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Hacl_Hash_Blake2s_Simd128_block_state_t + block_state = + { + .fst = i.key_length, + .snd = i.digest_length, + .thd = i.last_node, + .f3 = { .fst = wv, .snd = b } + }; + Lib_IntVector_Intrinsics_vec128 *src_b = block_state0.f3.snd; + Lib_IntVector_Intrinsics_vec128 *dst_b = block_state.f3.snd; + memcpy(dst_b, src_b, 4U * sizeof (Lib_IntVector_Intrinsics_vec128)); + Hacl_Hash_Blake2s_Simd128_state_t + s = { .block_state = block_state, .buf = buf, .total_len = total_len0 }; + Hacl_Hash_Blake2s_Simd128_state_t + *p = + (Hacl_Hash_Blake2s_Simd128_state_t *)KRML_HOST_MALLOC(sizeof ( + Hacl_Hash_Blake2s_Simd128_state_t + )); + p[0U] = s; + return p; +} + +/** +Write the BLAKE2s digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2s_Simd128_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +) +{ + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U }; + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b1[4U] KRML_POST_ALIGN(16) = { 0U }; + Hacl_Hash_Blake2s_Simd128_init(b, key_len, output_len); + update(b1, b, key_len, key, input_len, input); + Hacl_Hash_Blake2s_Simd128_finish(output_len, output, b); + Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *); + Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *); +} + +/** +Write the BLAKE2s digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +) +{ + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b[4U] KRML_POST_ALIGN(16) = { 0U }; + KRML_PRE_ALIGN(16) Lib_IntVector_Intrinsics_vec128 b1[4U] KRML_POST_ALIGN(16) = { 0U }; + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = b; + Lib_IntVector_Intrinsics_vec128 *r1 = b + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = b + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = b + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = params.salt + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR2(i, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = params.personal + i * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + tmp[0U] = + (uint32_t)params.digest_length + ^ + ((uint32_t)params.key_length + << 8U + ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + tmp[1U] = params.leaf_length; + tmp[2U] = (uint32_t)params.node_offset; + tmp[3U] = + (uint32_t)(params.node_offset >> 32U) + ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); + update(b1, b, (uint32_t)params.key_length, key, input_len, input); + Hacl_Hash_Blake2s_Simd128_finish((uint32_t)params.digest_length, output, b); + Lib_Memzero0_memzero(b1, 4U, Lib_IntVector_Intrinsics_vec128, void *); + Lib_Memzero0_memzero(b, 4U, Lib_IntVector_Intrinsics_vec128, void *); +} + diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h new file mode 100644 index 00000000000..cd1654c9726 --- /dev/null +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h @@ -0,0 +1,230 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __Hacl_Hash_Blake2s_Simd128_H +#define __Hacl_Hash_Blake2s_Simd128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "python_hacl_namespaces.h" +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Streaming_Types.h" +#include "Hacl_Hash_Blake2b.h" +#include "libintvector.h" + +#define HACL_HASH_BLAKE2S_SIMD128_BLOCK_BYTES (64U) + +#define HACL_HASH_BLAKE2S_SIMD128_OUT_BYTES (32U) + +#define HACL_HASH_BLAKE2S_SIMD128_KEY_BYTES (32U) + +#define HACL_HASH_BLAKE2S_SIMD128_SALT_BYTES (8U) + +#define HACL_HASH_BLAKE2S_SIMD128_PERSONAL_BYTES (8U) + +typedef struct K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128__s +{ + Lib_IntVector_Intrinsics_vec128 *fst; + Lib_IntVector_Intrinsics_vec128 *snd; +} +K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_; + +typedef struct Hacl_Hash_Blake2s_Simd128_block_state_t_s +{ + uint8_t fst; + uint8_t snd; + bool thd; + K____Lib_IntVector_Intrinsics_vec128___Lib_IntVector_Intrinsics_vec128_ f3; +} +Hacl_Hash_Blake2s_Simd128_block_state_t; + +typedef struct Hacl_Hash_Blake2s_Simd128_state_t_s +{ + Hacl_Hash_Blake2s_Simd128_block_state_t block_state; + uint8_t *buf; + uint64_t total_len; +} +Hacl_Hash_Blake2s_Simd128_state_t; + +/** + General-purpose allocation function that gives control over all +Blake2 parameters, including the key. Further resettings of the state SHALL be +done with `reset_with_params_and_key`, and SHALL feature the exact same values +for the `key_length` and `digest_length` fields as passed here. In other words, +once you commit to a digest and key length, the only way to change these +parameters is to allocate a new object. + +The caller must satisfy the following requirements. +- The length of the key k MUST match the value of the field key_length in the + parameters. +- The key_length must not exceed 128 for S, 64 for B. +- The digest_length must not exceed 128 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key( + Hacl_Hash_Blake2b_blake2_params *p, + bool last_node, + uint8_t *k +); + +/** + Specialized allocation function that picks default values for all +parameters, except for the key_length. Further resettings of the state SHALL be +done with `reset_with_key`, and SHALL feature the exact same key length `kk` as +passed here. In other words, once you commit to a key length, the only way to +change this parameter is to allocate a new object. + +The caller must satisfy the following requirements. +- The key_length must not exceed 128 for S, 64 for B. + +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_malloc_with_key0(uint8_t *k, uint8_t kk); + +/** + Specialized allocation function that picks default values for all +parameters, and has no key. Effectively, this is what you want if you intend to +use Blake2 as a hash function. Further resettings of the state SHALL be done with `reset`. +*/ +Hacl_Hash_Blake2s_Simd128_state_t *Hacl_Hash_Blake2s_Simd128_malloc(void); + +/** + General-purpose re-initialization function with parameters and +key. You cannot change digest_length, key_length, or last_node, meaning those values in +the parameters object must be the same as originally decided via one of the +malloc functions. All other values of the parameter can be changed. The behavior +is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params( + Hacl_Hash_Blake2s_Simd128_state_t *s, + Hacl_Hash_Blake2b_blake2_params *p, + uint8_t *k +); + +/** + Specialized-purpose re-initialization function with no parameters, +and a key. The key length must be the same as originally decided via your choice +of malloc function. All other parameters are reset to their default values. The +original call to malloc MUST have set digest_length to the default value. The +behavior is unspecified if you violate this precondition. +*/ +void +Hacl_Hash_Blake2s_Simd128_reset_with_key(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *k); + +/** + Specialized-purpose re-initialization function with no parameters +and no key. This is what you want if you intend to use Blake2 as a hash +function. The key length and digest length must have been set to their +respective default values via your choice of malloc function (always true if you +used `malloc`). All other parameters are reset to their default values. The +behavior is unspecified if you violate this precondition. +*/ +void Hacl_Hash_Blake2s_Simd128_reset(Hacl_Hash_Blake2s_Simd128_state_t *s); + +/** + Update function; 0 = success, 1 = max length exceeded +*/ +Hacl_Streaming_Types_error_code +Hacl_Hash_Blake2s_Simd128_update( + Hacl_Hash_Blake2s_Simd128_state_t *state, + uint8_t *chunk, + uint32_t chunk_len +); + +/** + Digest function. This function expects the `output` array to hold +at least `digest_length` bytes, where `digest_length` was determined by your +choice of `malloc` function. Concretely, if you used `malloc` or +`malloc_with_key`, then the expected length is 128 for S, or 64 for B (default +digest length). If you used `malloc_with_params_and_key`, then the expected +length is whatever you chose for the `digest_length` field of your parameters. +For convenience, this function returns `digest_length`. When in doubt, callers +can pass an array of size HACL_BLAKE2S_128_OUT_BYTES, then use the return value +to see how many bytes were actually written. +*/ +uint8_t Hacl_Hash_Blake2s_Simd128_digest(Hacl_Hash_Blake2s_Simd128_state_t *s, uint8_t *dst); + +Hacl_Hash_Blake2b_index Hacl_Hash_Blake2s_Simd128_info(Hacl_Hash_Blake2s_Simd128_state_t *s); + +/** + Free state function when there is no key +*/ +void Hacl_Hash_Blake2s_Simd128_free(Hacl_Hash_Blake2s_Simd128_state_t *state); + +/** + Copying. This preserves all parameters. +*/ +Hacl_Hash_Blake2s_Simd128_state_t +*Hacl_Hash_Blake2s_Simd128_copy(Hacl_Hash_Blake2s_Simd128_state_t *state); + +/** +Write the BLAKE2s digest of message `input` using key `key` into `output`. + +@param output Pointer to `output_len` bytes of memory where the digest is written to. +@param output_len Length of the to-be-generated digest with 1 <= `output_len` <= 64. +@param input Pointer to `input_len` bytes of memory where the input message is read from. +@param input_len Length of the input message. +@param key Pointer to `key_len` bytes of memory where the key is read from. +@param key_len Length of the key. Can be 0. +*/ +void +Hacl_Hash_Blake2s_Simd128_hash_with_key( + uint8_t *output, + uint32_t output_len, + uint8_t *input, + uint32_t input_len, + uint8_t *key, + uint32_t key_len +); + +/** +Write the BLAKE2s digest of message `input` using key `key` and +parameters `params` into `output`. The `key` array must be of length +`params.key_length`. The `output` array must be of length +`params.digest_length`. +*/ +void +Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( + uint8_t *output, + uint8_t *input, + uint32_t input_len, + Hacl_Hash_Blake2b_blake2_params params, + uint8_t *key +); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Hash_Blake2s_Simd128_H_DEFINED +#endif diff --git a/Modules/_hacl/Hacl_Hash_SHA3.c b/Modules/_hacl/Hacl_Hash_SHA3.c index 4f502866fe0..9cf5abb330b 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.c +++ b/Modules/_hacl/Hacl_Hash_SHA3.c @@ -25,6 +25,151 @@ #include "internal/Hacl_Hash_SHA3.h" +const +uint32_t +Hacl_Hash_SHA3_keccak_rotc[24U] = + { + 1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U, + 39U, 61U, 20U, 44U + }; + +const +uint32_t +Hacl_Hash_SHA3_keccak_piln[24U] = + { + 10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U, + 22U, 9U, 6U, 1U + }; + +const +uint64_t +Hacl_Hash_SHA3_keccak_rndc[24U] = + { + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL + }; + +static void absorb_inner_32(uint8_t *b, uint64_t *s) +{ + uint64_t ws[32U] = { 0U }; + uint8_t *b1 = b; + uint64_t u = load64_le(b1); + ws[0U] = u; + uint64_t u0 = load64_le(b1 + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b1 + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b1 + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b1 + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b1 + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b1 + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b1 + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b1 + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b1 + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b1 + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b1 + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b1 + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b1 + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b1 + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b1 + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b1 + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b1 + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b1 + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b1 + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b1 + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b1 + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b1 + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b1 + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b1 + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b1 + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b1 + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b1 + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b1 + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b1 + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b1 + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b1 + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws[i]; + } + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0]; + s[0U] = s[0U] ^ c; + } +} + static uint32_t block_len(Spec_Hash_Definitions_hash_alg a) { switch (a) @@ -97,10 +242,17 @@ Hacl_Hash_SHA3_update_multi_sha3( uint32_t n_blocks ) { - for (uint32_t i = 0U; i < n_blocks; i++) + uint32_t l = block_len(a) * n_blocks; + for (uint32_t i = 0U; i < l / block_len(a); i++) { - uint8_t *block = blocks + i * block_len(a); - Hacl_Hash_SHA3_absorb_inner(block_len(a), block, s); + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = blocks; + uint8_t *bl0 = b_; + uint8_t *uu____0 = b0 + i * block_len(a); + memcpy(bl0, uu____0, block_len(a) * sizeof (uint8_t)); + block_len(a); + absorb_inner_32(b_, s); } } @@ -124,37 +276,272 @@ Hacl_Hash_SHA3_update_last_sha3( uint32_t len = block_len(a); if (input_len == len) { - Hacl_Hash_SHA3_absorb_inner(len, input, s); - uint8_t lastBlock_[200U] = { 0U }; - uint8_t *lastBlock = lastBlock_; - memcpy(lastBlock, input + input_len, 0U * sizeof (uint8_t)); - lastBlock[0U] = suffix; - Hacl_Hash_SHA3_loadState(len, lastBlock, s); - if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U == len - 1U) + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint8_t *b00 = input; + uint8_t *bl00 = b_; + memcpy(bl00, b00 + 0U * len, len * sizeof (uint8_t)); + absorb_inner_32(b_, s); + uint8_t b2[256U] = { 0U }; + uint8_t *b_0 = b2; + uint32_t rem = 0U % len; + uint8_t *b01 = input + input_len; + uint8_t *bl0 = b_0; + memcpy(bl0, b01 + 0U - rem, rem * sizeof (uint8_t)); + uint8_t *b02 = b_0; + b02[0U % len] = suffix; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_0; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) { - Hacl_Hash_SHA3_state_permute(s); + s[i] = s[i] ^ ws[i]; } - uint8_t nextBlock_[200U] = { 0U }; - uint8_t *nextBlock = nextBlock_; - nextBlock[len - 1U] = 0x80U; - Hacl_Hash_SHA3_loadState(len, nextBlock, s); - Hacl_Hash_SHA3_state_permute(s); + if (!(((uint32_t)suffix & 0x80U) == 0U) && 0U % len == len - 1U) + { + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0]; + s[0U] = s[0U] ^ c; + } + } + uint8_t b3[256U] = { 0U }; + uint8_t *b4 = b3; + uint8_t *b0 = b4; + b0[len - 1U] = 0x80U; + absorb_inner_32(b4, s); return; } - uint8_t lastBlock_[200U] = { 0U }; - uint8_t *lastBlock = lastBlock_; - memcpy(lastBlock, input, input_len * sizeof (uint8_t)); - lastBlock[input_len] = suffix; - Hacl_Hash_SHA3_loadState(len, lastBlock, s); - if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len == len - 1U) + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = input_len % len; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + input_len - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[input_len % len] = suffix; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) { - Hacl_Hash_SHA3_state_permute(s); + s[i] = s[i] ^ ws[i]; } - uint8_t nextBlock_[200U] = { 0U }; - uint8_t *nextBlock = nextBlock_; - nextBlock[len - 1U] = 0x80U; - Hacl_Hash_SHA3_loadState(len, nextBlock, s); - Hacl_Hash_SHA3_state_permute(s); + if (!(((uint32_t)suffix & 0x80U) == 0U) && input_len % len == len - 1U) + { + for (uint32_t i0 = 0U; i0 < 24U; i0++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i1, + 0U, + 5U, + 1U, + uint64_t uu____2 = _C[(i1 + 1U) % 5U]; + uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____2 << 1U | uu____2 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i1 + 5U * i] = s[i1 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____3 = current; + s[_Y] = uu____3 << r | uu____3 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0]; + s[0U] = s[0U] ^ c; + } + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[len - 1U] = 0x80U; + absorb_inner_32(b3, s); } typedef struct hash_buf2_s @@ -463,10 +850,139 @@ digest_( uint64_t *s = tmp_block_state.snd; if (a11 == Spec_Hash_Definitions_Shake128 || a11 == Spec_Hash_Definitions_Shake256) { - Hacl_Hash_SHA3_squeeze0(s, block_len(a11), l, output); + for (uint32_t i0 = 0U; i0 < l / block_len(a11); i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b0 = output; + uint8_t *uu____0 = hbuf; + memcpy(b0 + i0 * block_len(a11), uu____0, block_len(a11) * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____1 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____1 << 1U | uu____1 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____2 = current; + s[_Y] = uu____2 << r1 | uu____2 >> (64U - r1); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = l % block_len(a11); + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(output + l - remOut, hbuf, remOut * sizeof (uint8_t)); return; } - Hacl_Hash_SHA3_squeeze0(s, block_len(a11), hash_len(a11), output); + for (uint32_t i0 = 0U; i0 < hash_len(a11) / block_len(a11); i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b0 = output; + uint8_t *uu____3 = hbuf; + memcpy(b0 + i0 * block_len(a11), uu____3, block_len(a11) * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____4 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____4 << 1U | uu____4 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r1 = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____5 = current; + s[_Y] = uu____5 << r1 | uu____5 >> (64U - r1); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = hash_len(a11) % block_len(a11); + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *uu____6 = hbuf; + memcpy(output + hash_len(a11) - remOut, uu____6, remOut * sizeof (uint8_t)); } Hacl_Streaming_Types_error_code @@ -515,78 +1031,79 @@ bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s) return uu____0 == Spec_Hash_Definitions_Shake128 || uu____0 == Spec_Hash_Definitions_Shake256; } -void -Hacl_Hash_SHA3_shake128_hacl( - uint32_t inputByteLen, - uint8_t *input, - uint32_t outputByteLen, - uint8_t *output -) +void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s) { - Hacl_Hash_SHA3_keccak(1344U, 256U, inputByteLen, input, 0x1FU, outputByteLen, output); -} - -void -Hacl_Hash_SHA3_shake256_hacl( - uint32_t inputByteLen, - uint8_t *input, - uint32_t outputByteLen, - uint8_t *output -) -{ - Hacl_Hash_SHA3_keccak(1088U, 512U, inputByteLen, input, 0x1FU, outputByteLen, output); -} - -void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len) -{ - Hacl_Hash_SHA3_keccak(1152U, 448U, input_len, input, 0x06U, 28U, output); -} - -void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len) -{ - Hacl_Hash_SHA3_keccak(1088U, 512U, input_len, input, 0x06U, 32U, output); -} - -void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len) -{ - Hacl_Hash_SHA3_keccak(832U, 768U, input_len, input, 0x06U, 48U, output); -} - -void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len) -{ - Hacl_Hash_SHA3_keccak(576U, 1024U, input_len, input, 0x06U, 64U, output); -} - -static const -uint32_t -keccak_rotc[24U] = + KRML_MAYBE_UNUSED_VAR(rateInBytes); + uint64_t ws[32U] = { 0U }; + uint8_t *b1 = b; + uint64_t u = load64_le(b1); + ws[0U] = u; + uint64_t u0 = load64_le(b1 + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b1 + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b1 + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b1 + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b1 + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b1 + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b1 + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b1 + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b1 + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b1 + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b1 + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b1 + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b1 + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b1 + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b1 + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b1 + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b1 + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b1 + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b1 + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b1 + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b1 + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b1 + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b1 + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b1 + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b1 + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b1 + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b1 + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b1 + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b1 + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b1 + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b1 + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) { - 1U, 3U, 6U, 10U, 15U, 21U, 28U, 36U, 45U, 55U, 2U, 14U, 27U, 41U, 56U, 8U, 25U, 43U, 62U, 18U, - 39U, 61U, 20U, 44U - }; - -static const -uint32_t -keccak_piln[24U] = - { - 10U, 7U, 11U, 17U, 18U, 3U, 5U, 16U, 8U, 21U, 24U, 4U, 15U, 23U, 19U, 13U, 12U, 2U, 20U, 14U, - 22U, 9U, 6U, 1U - }; - -static const -uint64_t -keccak_rndc[24U] = - { - 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, - 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, - 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, - 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, - 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, - 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL - }; - -void Hacl_Hash_SHA3_state_permute(uint64_t *s) -{ + s[i] = s[i] ^ ws[i]; + } for (uint32_t i0 = 0U; i0 < 24U; i0++) { uint64_t _C[5U] = { 0U }; @@ -606,8 +1123,8 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s) uint64_t current = x; for (uint32_t i = 0U; i < 24U; i++) { - uint32_t _Y = keccak_piln[i]; - uint32_t r = keccak_rotc[i]; + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; uint64_t temp = s[_Y]; uint64_t uu____1 = current; s[_Y] = uu____1 << r | uu____1 >> (64U - r); @@ -627,108 +1144,1227 @@ void Hacl_Hash_SHA3_state_permute(uint64_t *s) s[2U + 5U * i] = v2; s[3U + 5U * i] = v3; s[4U + 5U * i] = v4;); - uint64_t c = keccak_rndc[i0]; + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i0]; s[0U] = s[0U] ^ c; } } -void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s) -{ - uint8_t block[200U] = { 0U }; - memcpy(block, input, rateInBytes * sizeof (uint8_t)); - for (uint32_t i = 0U; i < 25U; i++) - { - uint64_t u = load64_le(block + i * 8U); - uint64_t x = u; - s[i] = s[i] ^ x; - } -} - -static void storeState(uint32_t rateInBytes, uint64_t *s, uint8_t *res) -{ - uint8_t block[200U] = { 0U }; - for (uint32_t i = 0U; i < 25U; i++) - { - uint64_t sj = s[i]; - store64_le(block + i * 8U, sj); - } - memcpy(res, block, rateInBytes * sizeof (uint8_t)); -} - -void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s) -{ - Hacl_Hash_SHA3_loadState(rateInBytes, block, s); - Hacl_Hash_SHA3_state_permute(s); -} - -static void -absorb( - uint64_t *s, - uint32_t rateInBytes, - uint32_t inputByteLen, - uint8_t *input, - uint8_t delimitedSuffix -) -{ - uint32_t n_blocks = inputByteLen / rateInBytes; - uint32_t rem = inputByteLen % rateInBytes; - for (uint32_t i = 0U; i < n_blocks; i++) - { - uint8_t *block = input + i * rateInBytes; - Hacl_Hash_SHA3_absorb_inner(rateInBytes, block, s); - } - uint8_t *last = input + n_blocks * rateInBytes; - uint8_t lastBlock_[200U] = { 0U }; - uint8_t *lastBlock = lastBlock_; - memcpy(lastBlock, last, rem * sizeof (uint8_t)); - lastBlock[rem] = delimitedSuffix; - Hacl_Hash_SHA3_loadState(rateInBytes, lastBlock, s); - if (!(((uint32_t)delimitedSuffix & 0x80U) == 0U) && rem == rateInBytes - 1U) - { - Hacl_Hash_SHA3_state_permute(s); - } - uint8_t nextBlock_[200U] = { 0U }; - uint8_t *nextBlock = nextBlock_; - nextBlock[rateInBytes - 1U] = 0x80U; - Hacl_Hash_SHA3_loadState(rateInBytes, nextBlock, s); - Hacl_Hash_SHA3_state_permute(s); -} - void -Hacl_Hash_SHA3_squeeze0( - uint64_t *s, - uint32_t rateInBytes, +Hacl_Hash_SHA3_shake128( + uint8_t *output, uint32_t outputByteLen, - uint8_t *output -) -{ - uint32_t outBlocks = outputByteLen / rateInBytes; - uint32_t remOut = outputByteLen % rateInBytes; - uint8_t *last = output + outputByteLen - remOut; - uint8_t *blocks = output; - for (uint32_t i = 0U; i < outBlocks; i++) - { - storeState(rateInBytes, s, blocks + i * rateInBytes); - Hacl_Hash_SHA3_state_permute(s); - } - storeState(remOut, s, last); -} - -void -Hacl_Hash_SHA3_keccak( - uint32_t rate, - uint32_t capacity, - uint32_t inputByteLen, uint8_t *input, - uint8_t delimitedSuffix, - uint32_t outputByteLen, - uint8_t *output + uint32_t inputByteLen ) { - KRML_MAYBE_UNUSED_VAR(capacity); - uint32_t rateInBytes = rate / 8U; + uint8_t *ib = input; + uint8_t *rb = output; uint64_t s[25U] = { 0U }; - absorb(s, rateInBytes, inputByteLen, input, delimitedSuffix); - Hacl_Hash_SHA3_squeeze0(s, rateInBytes, outputByteLen, output); + uint32_t rateInBytes1 = 168U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x1FU; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = outputByteLen % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +void +Hacl_Hash_SHA3_shake256( + uint8_t *output, + uint32_t outputByteLen, + uint8_t *input, + uint32_t inputByteLen +) +{ + uint8_t *ib = input; + uint8_t *rb = output; + uint64_t s[25U] = { 0U }; + uint32_t rateInBytes1 = 136U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x1FU; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < outputByteLen / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = outputByteLen % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen) +{ + uint8_t *ib = input; + uint8_t *rb = output; + uint64_t s[25U] = { 0U }; + uint32_t rateInBytes1 = 144U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x06U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < 28U / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = 28U % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + 28U - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen) +{ + uint8_t *ib = input; + uint8_t *rb = output; + uint64_t s[25U] = { 0U }; + uint32_t rateInBytes1 = 136U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x06U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < 32U / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = 32U % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + 32U - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen) +{ + uint8_t *ib = input; + uint8_t *rb = output; + uint64_t s[25U] = { 0U }; + uint32_t rateInBytes1 = 104U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x06U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < 48U / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = 48U % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + 48U - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen) +{ + uint8_t *ib = input; + uint8_t *rb = output; + uint64_t s[25U] = { 0U }; + uint32_t rateInBytes1 = 72U; + for (uint32_t i = 0U; i < inputByteLen / rateInBytes1; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * rateInBytes1, rateInBytes1 * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b_, s); + } + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % rateInBytes1; + uint8_t *b00 = ib; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % rateInBytes1] = 0x06U; + uint64_t ws0[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws0[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws0[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws0[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws0[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws0[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws0[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws0[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws0[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws0[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws0[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws0[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws0[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws0[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws0[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws0[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws0[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws0[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws0[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws0[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws0[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws0[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws0[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws0[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws0[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws0[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws0[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws0[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws0[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws0[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws0[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws0[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws0[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + s[i] = s[i] ^ ws0[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[rateInBytes1 - 1U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(rateInBytes1, b3, s); + for (uint32_t i0 = 0U; i0 < 64U / rateInBytes1; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b02 = rb; + memcpy(b02 + i0 * rateInBytes1, hbuf, rateInBytes1 * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = s[i + 0U] ^ (s[i + 5U] ^ (s[i + 10U] ^ (s[i + 15U] ^ s[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, s[i2 + 5U * i] = s[i2 + 5U * i] ^ _D;);); + uint64_t x = s[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = s[_Y]; + uint64_t uu____1 = current; + s[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = s[0U + 5U * i] ^ (~s[1U + 5U * i] & s[2U + 5U * i]); + uint64_t v1 = s[1U + 5U * i] ^ (~s[2U + 5U * i] & s[3U + 5U * i]); + uint64_t v2 = s[2U + 5U * i] ^ (~s[3U + 5U * i] & s[4U + 5U * i]); + uint64_t v3 = s[3U + 5U * i] ^ (~s[4U + 5U * i] & s[0U + 5U * i]); + uint64_t v4 = s[4U + 5U * i] ^ (~s[0U + 5U * i] & s[1U + 5U * i]); + s[0U + 5U * i] = v0; + s[1U + 5U * i] = v1; + s[2U + 5U * i] = v2; + s[3U + 5U * i] = v3; + s[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + s[0U] = s[0U] ^ c; + } + } + uint32_t remOut = 64U % rateInBytes1; + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, s, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + memcpy(rb + 64U - remOut, hbuf, remOut * sizeof (uint8_t)); +} + +/** +Allocate state buffer of 200-bytes +*/ +uint64_t *Hacl_Hash_SHA3_state_malloc(void) +{ + uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t)); + return buf; +} + +/** +Free state buffer +*/ +void Hacl_Hash_SHA3_state_free(uint64_t *s) +{ + KRML_HOST_FREE(s); +} + +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It processes an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ +void +Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen) +{ + for (uint32_t i = 0U; i < inputByteLen / 168U; i++) + { + uint8_t b[256U] = { 0U }; + uint8_t *b_ = b; + uint8_t *b0 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b0 + i * 168U, 168U * sizeof (uint8_t)); + Hacl_Hash_SHA3_absorb_inner_32(168U, b_, state); + } +} + +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It processes a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ +void +Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen) +{ + uint8_t b1[256U] = { 0U }; + uint8_t *b_ = b1; + uint32_t rem = inputByteLen % 168U; + uint8_t *b00 = input; + uint8_t *bl0 = b_; + memcpy(bl0, b00 + inputByteLen - rem, rem * sizeof (uint8_t)); + uint8_t *b01 = b_; + b01[inputByteLen % 168U] = 0x1FU; + uint64_t ws[32U] = { 0U }; + uint8_t *b = b_; + uint64_t u = load64_le(b); + ws[0U] = u; + uint64_t u0 = load64_le(b + 8U); + ws[1U] = u0; + uint64_t u1 = load64_le(b + 16U); + ws[2U] = u1; + uint64_t u2 = load64_le(b + 24U); + ws[3U] = u2; + uint64_t u3 = load64_le(b + 32U); + ws[4U] = u3; + uint64_t u4 = load64_le(b + 40U); + ws[5U] = u4; + uint64_t u5 = load64_le(b + 48U); + ws[6U] = u5; + uint64_t u6 = load64_le(b + 56U); + ws[7U] = u6; + uint64_t u7 = load64_le(b + 64U); + ws[8U] = u7; + uint64_t u8 = load64_le(b + 72U); + ws[9U] = u8; + uint64_t u9 = load64_le(b + 80U); + ws[10U] = u9; + uint64_t u10 = load64_le(b + 88U); + ws[11U] = u10; + uint64_t u11 = load64_le(b + 96U); + ws[12U] = u11; + uint64_t u12 = load64_le(b + 104U); + ws[13U] = u12; + uint64_t u13 = load64_le(b + 112U); + ws[14U] = u13; + uint64_t u14 = load64_le(b + 120U); + ws[15U] = u14; + uint64_t u15 = load64_le(b + 128U); + ws[16U] = u15; + uint64_t u16 = load64_le(b + 136U); + ws[17U] = u16; + uint64_t u17 = load64_le(b + 144U); + ws[18U] = u17; + uint64_t u18 = load64_le(b + 152U); + ws[19U] = u18; + uint64_t u19 = load64_le(b + 160U); + ws[20U] = u19; + uint64_t u20 = load64_le(b + 168U); + ws[21U] = u20; + uint64_t u21 = load64_le(b + 176U); + ws[22U] = u21; + uint64_t u22 = load64_le(b + 184U); + ws[23U] = u22; + uint64_t u23 = load64_le(b + 192U); + ws[24U] = u23; + uint64_t u24 = load64_le(b + 200U); + ws[25U] = u24; + uint64_t u25 = load64_le(b + 208U); + ws[26U] = u25; + uint64_t u26 = load64_le(b + 216U); + ws[27U] = u26; + uint64_t u27 = load64_le(b + 224U); + ws[28U] = u27; + uint64_t u28 = load64_le(b + 232U); + ws[29U] = u28; + uint64_t u29 = load64_le(b + 240U); + ws[30U] = u29; + uint64_t u30 = load64_le(b + 248U); + ws[31U] = u30; + for (uint32_t i = 0U; i < 25U; i++) + { + state[i] = state[i] ^ ws[i]; + } + uint8_t b2[256U] = { 0U }; + uint8_t *b3 = b2; + uint8_t *b0 = b3; + b0[167U] = 0x80U; + Hacl_Hash_SHA3_absorb_inner_32(168U, b3, state); +} + +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ +void +Hacl_Hash_SHA3_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen +) +{ + for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++) + { + uint8_t hbuf[256U] = { 0U }; + uint64_t ws[32U] = { 0U }; + memcpy(ws, state, 25U * sizeof (uint64_t)); + for (uint32_t i = 0U; i < 32U; i++) + { + store64_le(hbuf + i * 8U, ws[i]); + } + uint8_t *b0 = output; + memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t)); + for (uint32_t i1 = 0U; i1 < 24U; i1++) + { + uint64_t _C[5U] = { 0U }; + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + _C[i] = + state[i + + 0U] + ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + KRML_MAYBE_FOR5(i2, + 0U, + 5U, + 1U, + uint64_t uu____0 = _C[(i2 + 1U) % 5U]; + uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U); + KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;);); + uint64_t x = state[1U]; + uint64_t current = x; + for (uint32_t i = 0U; i < 24U; i++) + { + uint32_t _Y = Hacl_Hash_SHA3_keccak_piln[i]; + uint32_t r = Hacl_Hash_SHA3_keccak_rotc[i]; + uint64_t temp = state[_Y]; + uint64_t uu____1 = current; + state[_Y] = uu____1 << r | uu____1 >> (64U - r); + current = temp; + } + KRML_MAYBE_FOR5(i, + 0U, + 5U, + 1U, + uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]); + uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]); + uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]); + uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]); + uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]); + state[0U + 5U * i] = v0; + state[1U + 5U * i] = v1; + state[2U + 5U * i] = v2; + state[3U + 5U * i] = v3; + state[4U + 5U * i] = v4;); + uint64_t c = Hacl_Hash_SHA3_keccak_rndc[i1]; + state[0U] = state[0U] ^ c; + } + } } diff --git a/Modules/_hacl/Hacl_Hash_SHA3.h b/Modules/_hacl/Hacl_Hash_SHA3.h index 678e9f2fbe1..4d85bb6902c 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.h +++ b/Modules/_hacl/Hacl_Hash_SHA3.h @@ -78,49 +78,90 @@ uint32_t Hacl_Hash_SHA3_hash_len(Hacl_Hash_SHA3_state_t *s); bool Hacl_Hash_SHA3_is_shake(Hacl_Hash_SHA3_state_t *s); +void Hacl_Hash_SHA3_absorb_inner_32(uint32_t rateInBytes, uint8_t *b, uint64_t *s); + void -Hacl_Hash_SHA3_shake128_hacl( - uint32_t inputByteLen, - uint8_t *input, +Hacl_Hash_SHA3_shake128( + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); void -Hacl_Hash_SHA3_shake256_hacl( - uint32_t inputByteLen, - uint8_t *input, +Hacl_Hash_SHA3_shake256( + uint8_t *output, uint32_t outputByteLen, - uint8_t *output + uint8_t *input, + uint32_t inputByteLen ); -void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t input_len); +void Hacl_Hash_SHA3_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t input_len); +void Hacl_Hash_SHA3_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t input_len); +void Hacl_Hash_SHA3_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t input_len); +void Hacl_Hash_SHA3_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen); -void Hacl_Hash_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s); +/** +Allocate state buffer of 200-bytes +*/ +uint64_t *Hacl_Hash_SHA3_state_malloc(void); +/** +Free state buffer +*/ +void Hacl_Hash_SHA3_state_free(uint64_t *s); + +/** +Absorb number of input blocks and write the output state + + This function is intended to receive a hash state and input buffer. + It processes an input of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] +*/ void -Hacl_Hash_SHA3_squeeze0( - uint64_t *s, - uint32_t rateInBytes, - uint32_t outputByteLen, - uint8_t *output -); +Hacl_Hash_SHA3_shake128_absorb_nblocks(uint64_t *state, uint8_t *input, uint32_t inputByteLen); +/** +Absorb a final partial block of input and write the output state + + This function is intended to receive a hash state and input buffer. + It processes a sequence of bytes at end of input buffer that is less + than 168-bytes (SHAKE128 block size), + any bytes of full blocks at start of input buffer are ignored. + + The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25] + The argument `input` (IN) points to `inputByteLen` bytes of valid memory, + i.e., uint8_t[inputByteLen] + + Note: Full size of input buffer must be passed to `inputByteLen` including + the number of full-block bytes at start of input buffer that are ignored +*/ void -Hacl_Hash_SHA3_keccak( - uint32_t rate, - uint32_t capacity, - uint32_t inputByteLen, - uint8_t *input, - uint8_t delimitedSuffix, - uint32_t outputByteLen, - uint8_t *output +Hacl_Hash_SHA3_shake128_absorb_final(uint64_t *state, uint8_t *input, uint32_t inputByteLen); + +/** +Squeeze a hash state to output buffer + + This function is intended to receive a hash state and output buffer. + It produces an output of multiple of 168-bytes (SHAKE128 block size), + any additional bytes of final partial block are ignored. + + The argument `state` (IN) points to hash state, i.e., uint64_t[25] + The argument `output` (OUT) points to `outputByteLen` bytes of valid memory, + i.e., uint8_t[outputByteLen] +*/ +void +Hacl_Hash_SHA3_shake128_squeeze_nblocks( + uint64_t *state, + uint8_t *output, + uint32_t outputByteLen ); #if defined(__cplusplus) diff --git a/Modules/_hacl/Lib_Memzero0.c b/Modules/_hacl/Lib_Memzero0.c new file mode 100644 index 00000000000..3d8a1e5f265 --- /dev/null +++ b/Modules/_hacl/Lib_Memzero0.c @@ -0,0 +1,54 @@ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +#ifdef _WIN32 +#include +#endif + +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__linux__) +#define __STDC_WANT_LIB_EXT1__ 1 +#include +#endif + +#ifdef __FreeBSD__ +#include +#endif + +#include +#include +#include +#include + +/* This is now a hand-written header */ +#include "lib_memzero0.h" +#include "krml/internal/target.h" + +/* The F* formalization talks about the number of elements in the array. The C + implementation wants a number of bytes in the array. KaRaMeL is aware of this + and inserts a sizeof multiplication. */ +void Lib_Memzero0_memzero0(void *dst, uint64_t len) { + /* This is safe: karamel checks at run-time (if needed) that all object sizes + fit within a size_t, so the size we receive has been checked at + allocation-time, possibly via KRML_CHECK_SIZE, to fit in a size_t. */ + size_t len_ = (size_t) len; + + #ifdef _WIN32 + SecureZeroMemory(dst, len); + #elif defined(__APPLE__) && defined(__MACH__) + memset_s(dst, len_, 0, len_); + #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__) + explicit_bzero(dst, len_); + #elif defined(__NetBSD__) + explicit_memset(dst, 0, len_); + #else + /* Default implementation for platforms with no particular support. */ + #warning "Your platform does not support any safe implementation of memzero -- consider a pull request!" + volatile unsigned char *volatile dst_ = (volatile unsigned char *volatile) dst; + size_t i = 0U; + while (i < len) + dst_[i++] = 0U; + #endif +} diff --git a/Modules/_hacl/include/krml/internal/target.h b/Modules/_hacl/include/krml/internal/target.h index c7fcc0151e6..292adc14235 100644 --- a/Modules/_hacl/include/krml/internal/target.h +++ b/Modules/_hacl/include/krml/internal/target.h @@ -69,6 +69,14 @@ # endif #endif +#ifndef KRML_ATTRIBUTE_TARGET +# if defined(__GNUC__) +# define KRML_ATTRIBUTE_TARGET(x) __attribute__((target(x))) +# else +# define KRML_ATTRIBUTE_TARGET(x) +# endif +#endif + #ifndef KRML_NOINLINE # if defined(_MSC_VER) # define KRML_NOINLINE __declspec(noinline) @@ -82,6 +90,67 @@ # endif #endif +#ifndef KRML_MUSTINLINE +# if defined(_MSC_VER) +# define KRML_MUSTINLINE inline __forceinline +# elif defined (__GNUC__) +# define KRML_MUSTINLINE inline __attribute__((always_inline)) +# else +# define KRML_MUSTINLINE inline +# warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!" +# warning "Please locate target.h and try to fill it out with a suitable definition for this compiler." +# endif +#endif + +#ifndef KRML_PRE_ALIGN +# ifdef _MSC_VER +# define KRML_PRE_ALIGN(X) __declspec(align(X)) +# else +# define KRML_PRE_ALIGN(X) +# endif +#endif + +#ifndef KRML_POST_ALIGN +# ifdef _MSC_VER +# define KRML_POST_ALIGN(X) +# else +# define KRML_POST_ALIGN(X) __attribute__((aligned(X))) +# endif +#endif + +/* MinGW-W64 does not support C11 aligned_alloc, but it supports + * MSVC's _aligned_malloc. + */ +#ifndef KRML_ALIGNED_MALLOC +# ifdef __MINGW32__ +# include <_mingw.h> +# endif +# if ( \ + defined(_MSC_VER) || \ + (defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR))) +# define KRML_ALIGNED_MALLOC(X, Y) _aligned_malloc(Y, X) +# else +# define KRML_ALIGNED_MALLOC(X, Y) aligned_alloc(X, Y) +# endif +#endif + +/* Since aligned allocations with MinGW-W64 are done with + * _aligned_malloc (see above), such pointers must be freed with + * _aligned_free. + */ +#ifndef KRML_ALIGNED_FREE +# ifdef __MINGW32__ +# include <_mingw.h> +# endif +# if ( \ + defined(_MSC_VER) || \ + (defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR))) +# define KRML_ALIGNED_FREE(X) _aligned_free(X) +# else +# define KRML_ALIGNED_FREE(X) free(X) +# endif +#endif + /* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of * *elements*. Do an ugly, run-time check (some of which KaRaMeL can eliminate). */ diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2b.h b/Modules/_hacl/internal/Hacl_Hash_Blake2b.h new file mode 100644 index 00000000000..8ee70282f4e --- /dev/null +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2b.h @@ -0,0 +1,78 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __internal_Hacl_Hash_Blake2b_H +#define __internal_Hacl_Hash_Blake2b_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "../Hacl_Hash_Blake2b.h" + +typedef struct Hacl_Hash_Blake2b_params_and_key_s +{ + Hacl_Hash_Blake2b_blake2_params *fst; + uint8_t *snd; +} +Hacl_Hash_Blake2b_params_and_key; + +void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn); + +void +Hacl_Hash_Blake2b_update_multi( + uint32_t len, + uint64_t *wv, + uint64_t *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks, + uint32_t nb +); + +void +Hacl_Hash_Blake2b_update_last( + uint32_t len, + uint64_t *wv, + uint64_t *hash, + bool last_node, + FStar_UInt128_uint128 prev, + uint32_t rem, + uint8_t *d +); + +void Hacl_Hash_Blake2b_finish(uint32_t nn, uint8_t *output, uint64_t *hash); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_Blake2b_H_DEFINED +#endif diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h b/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h new file mode 100644 index 00000000000..ab329b92c36 --- /dev/null +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h @@ -0,0 +1,93 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __internal_Hacl_Hash_Blake2b_Simd256_H +#define __internal_Hacl_Hash_Blake2b_Simd256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "../Hacl_Hash_Blake2b_Simd256.h" +#include "libintvector.h" + +void +Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t kk, uint32_t nn); + +void +Hacl_Hash_Blake2b_Simd256_update_multi( + uint32_t len, + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + FStar_UInt128_uint128 prev, + uint8_t *blocks, + uint32_t nb +); + +void +Hacl_Hash_Blake2b_Simd256_update_last( + uint32_t len, + Lib_IntVector_Intrinsics_vec256 *wv, + Lib_IntVector_Intrinsics_vec256 *hash, + bool last_node, + FStar_UInt128_uint128 prev, + uint32_t rem, + uint8_t *d +); + +void +Hacl_Hash_Blake2b_Simd256_finish( + uint32_t nn, + uint8_t *output, + Lib_IntVector_Intrinsics_vec256 *hash +); + +void +Hacl_Hash_Blake2b_Simd256_load_state256b_from_state32( + Lib_IntVector_Intrinsics_vec256 *st, + uint64_t *st32 +); + +void +Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32( + uint64_t *st32, + Lib_IntVector_Intrinsics_vec256 *st +); + +Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_Blake2b_Simd256_malloc_with_key(void); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_Blake2b_Simd256_H_DEFINED +#endif diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2s.h b/Modules/_hacl/internal/Hacl_Hash_Blake2s.h new file mode 100644 index 00000000000..6494075b60a --- /dev/null +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2s.h @@ -0,0 +1,72 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __internal_Hacl_Hash_Blake2s_H +#define __internal_Hacl_Hash_Blake2s_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "../Hacl_Hash_Blake2s.h" + +void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn); + +void +Hacl_Hash_Blake2s_update_multi( + uint32_t len, + uint32_t *wv, + uint32_t *hash, + uint64_t prev, + uint8_t *blocks, + uint32_t nb +); + +void +Hacl_Hash_Blake2s_update_last( + uint32_t len, + uint32_t *wv, + uint32_t *hash, + bool last_node, + uint64_t prev, + uint32_t rem, + uint8_t *d +); + +void Hacl_Hash_Blake2s_finish(uint32_t nn, uint8_t *output, uint32_t *hash); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_Blake2s_H_DEFINED +#endif diff --git a/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h b/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h new file mode 100644 index 00000000000..60c09a67b44 --- /dev/null +++ b/Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h @@ -0,0 +1,93 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __internal_Hacl_Hash_Blake2s_Simd128_H +#define __internal_Hacl_Hash_Blake2s_Simd128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Impl_Blake2_Constants.h" +#include "internal/Hacl_Hash_Blake2b.h" +#include "../Hacl_Hash_Blake2s_Simd128.h" +#include "libintvector.h" + +void +Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t kk, uint32_t nn); + +void +Hacl_Hash_Blake2s_Simd128_update_multi( + uint32_t len, + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + uint64_t prev, + uint8_t *blocks, + uint32_t nb +); + +void +Hacl_Hash_Blake2s_Simd128_update_last( + uint32_t len, + Lib_IntVector_Intrinsics_vec128 *wv, + Lib_IntVector_Intrinsics_vec128 *hash, + bool last_node, + uint64_t prev, + uint32_t rem, + uint8_t *d +); + +void +Hacl_Hash_Blake2s_Simd128_finish( + uint32_t nn, + uint8_t *output, + Lib_IntVector_Intrinsics_vec128 *hash +); + +void +Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32( + uint32_t *st32, + Lib_IntVector_Intrinsics_vec128 *st +); + +void +Hacl_Hash_Blake2s_Simd128_load_state128s_from_state32( + Lib_IntVector_Intrinsics_vec128 *st, + uint32_t *st32 +); + +Lib_IntVector_Intrinsics_vec128 *Hacl_Hash_Blake2s_Simd128_malloc_with_key(void); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_Blake2s_Simd128_H_DEFINED +#endif diff --git a/Modules/_hacl/internal/Hacl_Hash_SHA3.h b/Modules/_hacl/internal/Hacl_Hash_SHA3.h index b80e81fafb9..0a152b4c622 100644 --- a/Modules/_hacl/internal/Hacl_Hash_SHA3.h +++ b/Modules/_hacl/internal/Hacl_Hash_SHA3.h @@ -37,6 +37,12 @@ extern "C" { #include "../Hacl_Hash_SHA3.h" +extern const uint32_t Hacl_Hash_SHA3_keccak_rotc[24U]; + +extern const uint32_t Hacl_Hash_SHA3_keccak_piln[24U]; + +extern const uint64_t Hacl_Hash_SHA3_keccak_rndc[24U]; + void Hacl_Hash_SHA3_update_multi_sha3( Spec_Hash_Definitions_hash_alg a, @@ -53,10 +59,6 @@ Hacl_Hash_SHA3_update_last_sha3( uint32_t input_len ); -void Hacl_Hash_SHA3_state_permute(uint64_t *s); - -void Hacl_Hash_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s); - #if defined(__cplusplus) } #endif diff --git a/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h b/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h new file mode 100644 index 00000000000..f4cf516124a --- /dev/null +++ b/Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h @@ -0,0 +1,73 @@ +/* MIT License + * + * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation + * Copyright (c) 2022-2023 HACL* Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef __internal_Hacl_Impl_Blake2_Constants_H +#define __internal_Hacl_Impl_Blake2_Constants_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include "krml/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +static const +uint32_t +Hacl_Hash_Blake2b_sigmaTable[160U] = + { + 0U, 1U, 2U, 3U, 4U, 5U, 6U, 7U, 8U, 9U, 10U, 11U, 12U, 13U, 14U, 15U, 14U, 10U, 4U, 8U, 9U, 15U, + 13U, 6U, 1U, 12U, 0U, 2U, 11U, 7U, 5U, 3U, 11U, 8U, 12U, 0U, 5U, 2U, 15U, 13U, 10U, 14U, 3U, 6U, + 7U, 1U, 9U, 4U, 7U, 9U, 3U, 1U, 13U, 12U, 11U, 14U, 2U, 6U, 5U, 10U, 4U, 0U, 15U, 8U, 9U, 0U, + 5U, 7U, 2U, 4U, 10U, 15U, 14U, 1U, 11U, 12U, 6U, 8U, 3U, 13U, 2U, 12U, 6U, 10U, 0U, 11U, 8U, 3U, + 4U, 13U, 7U, 5U, 15U, 14U, 1U, 9U, 12U, 5U, 1U, 15U, 14U, 13U, 4U, 10U, 0U, 7U, 6U, 3U, 9U, 2U, + 8U, 11U, 13U, 11U, 7U, 14U, 12U, 1U, 3U, 9U, 5U, 0U, 15U, 4U, 8U, 6U, 2U, 10U, 6U, 15U, 14U, 9U, + 11U, 3U, 0U, 8U, 12U, 2U, 13U, 7U, 1U, 4U, 10U, 5U, 10U, 2U, 8U, 4U, 7U, 6U, 1U, 5U, 15U, 11U, + 9U, 14U, 3U, 12U, 13U + }; + +static const +uint32_t +Hacl_Hash_Blake2b_ivTable_S[8U] = + { + 0x6A09E667U, 0xBB67AE85U, 0x3C6EF372U, 0xA54FF53AU, 0x510E527FU, 0x9B05688CU, 0x1F83D9ABU, + 0x5BE0CD19U + }; + +static const +uint64_t +Hacl_Hash_Blake2b_ivTable_B[8U] = + { + 0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL, 0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL, + 0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL, 0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL + }; + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Impl_Blake2_Constants_H_DEFINED +#endif diff --git a/Modules/_hacl/lib_memzero0.h b/Modules/_hacl/lib_memzero0.h new file mode 100644 index 00000000000..fea3e41c907 --- /dev/null +++ b/Modules/_hacl/lib_memzero0.h @@ -0,0 +1,5 @@ +#include + +void Lib_Memzero0_memzero0(void *dst, uint64_t len); + +#define Lib_Memzero0_memzero(dst, len, t, _ret_t) Lib_Memzero0_memzero0(dst, len * sizeof(t)) diff --git a/Modules/_hacl/libintvector.h b/Modules/_hacl/libintvector.h new file mode 100644 index 00000000000..99d11336942 --- /dev/null +++ b/Modules/_hacl/libintvector.h @@ -0,0 +1,936 @@ +#ifndef __Vec_Intrin_H +#define __Vec_Intrin_H + +#include + +/* We include config.h here to ensure that the various feature-flags are + * properly brought into scope. Users can either run the configure script, or + * write a config.h themselves and put it under version control. */ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +/* # DEBUGGING: + * ============ + * It is possible to debug the current definitions by using libintvector_debug.h + * See the include at the bottom of the file. */ + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1) + +#if defined(__x86_64__) || defined(_M_X64) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include +#include + +typedef __m128i Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ + (_mm_aesenc_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ + (_mm_aesenclast_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ + (_mm_aeskeygenassist_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ + (_mm_clmulepi64_si128(x0, x1, x2)) + + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (_mm_xor_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (_mm_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (_mm_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (_mm_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (_mm_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (_mm_or_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (_mm_and_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (_mm_xor_si128(x0, _mm_set1_epi32(-1))) + + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (_mm_slli_si128(x0, (x1)/8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (_mm_srli_si128(x0, (x1)/8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (_mm_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (_mm_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (_mm_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (_mm_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(12,15,14,13,8,11,10,9,4,7,6,5,0,3,2,1))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0,x1) \ + (((x1) == 8? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : \ + ((x1) == 16? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : \ + ((x1) == 24? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : \ + _mm_xor_si128(_mm_slli_epi32(x0,x1),_mm_srli_epi32(x0,32-(x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0,x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0,32-(x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4,x3,x2,x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1+3)%4,(x1+2)%4,(x1+1)%4,x1%4))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2*x1+3)%4,(2*x1+2)%4,(2*x1+1)%4,(2*x1)%4))) + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) + + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + + + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (_mm_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (_mm_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (_mm_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (_mm_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (_mm_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (_mm_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (_mm_setzero_si128()) + + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (_mm_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (_mm_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (_mm_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (_mm_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (_mm_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (_mm_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((__m128i)x) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (_mm_set1_epi64x(x)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \ + (_mm_set_epi64x(x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (_mm_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (_mm_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (_mm_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (_mm_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (_mm_unpackhi_epi64(x1, x2)) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#if defined(HACL_CAN_COMPILE_VEC256) + +#include + +typedef __m256i Lib_IntVector_Intrinsics_vec256; + + +#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ + (_mm256_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ + (_mm256_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ + (_mm256_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ + (_mm256_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ + (_mm256_xor_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ + (_mm256_or_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ + (_mm256_and_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ + (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ + (_mm256_slli_si256(x0, (x1)/8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ + (_mm256_srli_si256(x0, (x1)/8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ + (_mm256_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ + (_mm256_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ + (_mm256_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ + (_mm256_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3,14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2,13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12,15,14,13,8,11,10,9,4,7,6,5,0,3,2,1,12,15,14,13,8,11,10,9,4,7,6,5,0,3,2,1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0,x1) \ + ((x1 == 8? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : \ + (x1 == 16? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : \ + (x1 == 24? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : \ + _mm256_or_si256(_mm256_slli_epi32(x0,x1),_mm256_srli_epi32(x0,32-(x1))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0,x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0,32-(x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8,15,14,13,12,11,10,9,0,7,6,5,4,3,2,1,8,15,14,13,12,11,10,9,0,7,6,5,4,3,2,1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9,8,15,14,13,12,11,10,1,0,7,6,5,4,3,2,9,8,15,14,13,12,11,10,1,0,7,6,5,4,3,2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10,9,8,15,14,13,12,11,2,1,0,7,6,5,4,3,10,9,8,15,14,13,12,11,2,1,0,7,6,5,4,3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12,11,10,9,8,15,14,13,4,3,2,1,0,7,6,5,12,11,10,9,8,15,14,13,4,3,2,1,0,7,6,5))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6,13,12,11,10,9,8,15,14,5,4,3,2,1,0,7,6))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14,13,12,11,10,9,8,15,6,5,4,3,2,1,0,7,14,13,12,11,10,9,8,15,6,5,4,3,2,1,0,7))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0,x1) \ + ((x1 == 8? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : \ + (x1 == 16? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : \ + (x1 == 24? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : \ + (x1 == 32? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : \ + (x1 == 40? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : \ + (x1 == 48? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : \ + (x1 == 56? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : \ + _mm256_xor_si256(_mm256_srli_epi64((x0),(x1)),_mm256_slli_epi64((x0),(64-(x1)))))))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0,x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0,64-(x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4,x3,x2,x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8,x7,x6,x5,x4,x3,x2,x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1+7)%8,(x1+6)%8,(x1+5)%8,(x1+4)%8,(x1+3%8),(x1+2)%8,(x1+1)%8,x1%8))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1+3)%4,(x1+2)%4,(x1+1)%4,x1%4))) + +#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + + +#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + + +#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ + (_mm256_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ + (_mm256_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ + (_mm256_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ + (_mm256_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ + (_mm256_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ + (_mm256_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_zero \ + (_mm256_setzero_si256()) + +#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ + (_mm256_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ + (_mm256_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ + (_mm256_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ + (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) + + +#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ + (_mm256_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ + (_mm256_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ + (_mm256_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ + (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) + + +#define Lib_IntVector_Intrinsics_vec256_load64(x1) \ + (_mm256_set1_epi64x(x1)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \ + (_mm256_set_epi64x(x3,x2,x1,x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load32(x) \ + (_mm256_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec256_load32s(x0,x1,x2,x3,x4, x5, x6, x7) \ + (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load128(x) \ + (_mm256_set_m128i((__m128i)x)) + +#define Lib_IntVector_Intrinsics_vec256_load128s(x0,x1) \ + (_mm256_set_m128i((__m128i)x1,(__m128i)x0)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ + (_mm256_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ + (_mm256_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ + (_mm256_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ + (_mm256_unpackhi_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x20)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x31)) + +#endif /* HACL_CAN_COMPILE_VEC256 */ + +#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) \ + && !defined(__ARM_32BIT_STATE) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include + +typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (veorq_u32(x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (vceqq_u32(x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (vceqq_u32(x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (vcgtq_u32(x0, x1)) + +#define high32(x0) \ + (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0),32))) + +#define low32(x0) \ + (vmovn_u64(vreinterpretq_u64_u32(x0))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0),high32(x1)),vand_u32(vceq_u32(high32(x0),high32(x1)),vcgt_u32(low32(x0),low32(x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (vorrq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (vandq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (vmvnq_u32(x0)) + + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), 16-(x1)/8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), (x1)/8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (vshlq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (vshrq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0,x1) \ + (((x1) == 16? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : \ + vsriq_n_u32(vshlq_n_u32((x0),(x1)),(x0),32-(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0,x1) \ + (((x1) == 16? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : \ + vsriq_n_u32(vshlq_n_u32((x0),32-(x1)),(x0),(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (vextq_u32(x0,x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (vextq_u64(x0,x0,x1)) + + +/* +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (vld1q_u32((const uint32_t*) (x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (vld1q_u32((const uint32_t*) (x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0),(x1))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0),(x1))) + +/* +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0))); + +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +/* +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0),(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0),(vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (vsetq_lane_u8(x1,x0,x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (vsetq_lane_u32(x1,x0,x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (vreinterpretq_u32_u64(vsetq_lane_u64(x1,vreinterpretq_u64_u32(x0),x2))) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (vgetq_lane_u8(x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (vgetq_lane_u32(x0,x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (vgetq_lane_u64(vreinterpretq_u64_u32(x0),x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (vdupq_n_u32(0)) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (vaddq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (vsubq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (vmulq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (vmulq_lane_u32(x0, vdupq_n_u32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((uint32x4_t)(x)) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (vdupq_n_u32(x)) /* hi lo */ + +static inline Lib_IntVector_Intrinsics_vec128 Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2){ + const uint64_t a[2] = {x1,x2}; + return vreinterpretq_u32_u64(vld1q_u64(a)); +} + +static inline Lib_IntVector_Intrinsics_vec128 Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4){ + const uint32_t a[4] = {x1,x2,x3,x4}; + return vld1q_u32(a); +} + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (vzip1q_u32(x1,x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (vzip2q_u32(x1,x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1,x2) \ + (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1),vreinterpretq_u64_u32(x2)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1,x2) \ + (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1),vreinterpretq_u64_u32(x2)))) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +/* IBM z architecture */ +#elif defined(__s390x__) /* this flag is for GCC only */ + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include + +/* The main vector 128 type + * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, + * unsigned int, unsigned long long: the compiler complains that the parameter + * combination is invalid. */ +typedef unsigned char vector128_8 __attribute__ ((vector_size(16))); +typedef unsigned int vector128_32 __attribute__ ((vector_size(16))); +typedef unsigned long long vector128_64 __attribute__ ((vector_size(16))); + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + (vector128) ((vector128_32) vec_revb(*((vector128_32*) (const uint8_t*)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ + (vector128) (*((vector128_32*) (const uint8_t*)(x))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + (vector128) ((vector128_64) vec_revb(*((vector128_64*) (const uint8_t*)(x)))) + +static inline +void Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) { + *((vector128_32*)x0) = vec_revb((vector128_32) x1); +} + +static inline +void Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) { + *((vector128_32*)x0) = (vector128_32) x1; +} + +static inline +void Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) { + *((vector128_64*)x0) = vec_revb((vector128_64) x1); +} + +#define Lib_IntVector_Intrinsics_vec128_add32(x0,x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0),(vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)),((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)),((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){(unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x)})) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){(unsigned int)(x0),(unsigned int)(x1),(unsigned int)(x2),(unsigned int)(x3)})) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x),(unsigned long long)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mulo((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0),(vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0,(uint32_t)(32-(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1%4)*4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ + ((vector128)((vector128_64){0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1)}))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64-(x1))))) & \ + ((vector128)((vector128_64){0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1)}))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32-(x1))))) & \ + ((vector128)((vector128_32){0xffffffff >> (x1), 0xffffffff >> (x1), \ + 0xffffffff >> (x1), 0xffffffff >> (x1)}))) + +/* Doesn't work with vec_splat_u64 */ +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0,((vector128_64){(unsigned long long)(x1),(unsigned long long)(x1)})))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +static inline +vector128 Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) { + return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); +} + + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include +#include // for memcpy +#include + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef vector unsigned char vector128_8; +typedef vector unsigned int vector128_32; +typedef vector unsigned long long vector128_64; + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*) ((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*) ((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_xst((vector128_32)(x1), 0, (unsigned int*) ((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_xst((vector128_64)(x1), 0, (unsigned long long*) ((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0,x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0),(vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)),((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)),((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){(unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x)})) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){(unsigned int)(x0),(unsigned int)(x1),(unsigned int)(x2),(unsigned int)(x3)})) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64){(unsigned long long)(x),(unsigned long long)(x)})) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0),(vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){(unsigned int)(x1),(unsigned int)(x1),(unsigned int)(x1),(unsigned int)(x1)}))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0,(uint32_t)(32-(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4-(x1))%4)*4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){(unsigned long)(x1),(unsigned long)(x1)}))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){(unsigned long)(x1),(unsigned long)(x1)}))) + +// Doesn't work with vec_splat_u64 +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0,((vector128_64){(unsigned long long)(x1),(unsigned long long)(x1)})))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#endif // PowerPC64 + +// DEBUGGING: +// If libintvector_debug.h exists, use it to debug the current implementations. +// Note that some flags must be enabled for the debugging to be effective: +// see libintvector_debug.h for more details. +#if defined(__has_include) +#if __has_include("libintvector_debug.h") +#include "libintvector_debug.h" +#endif +#endif + +#endif // __Vec_Intrin_H diff --git a/Modules/_hacl/python_hacl_namespaces.h b/Modules/_hacl/python_hacl_namespaces.h index 684e7fd2fbe..8a1f4aef384 100644 --- a/Modules/_hacl/python_hacl_namespaces.h +++ b/Modules/_hacl/python_hacl_namespaces.h @@ -6,7 +6,7 @@ * conflicts with builds linking or dynamically loading other code potentially * using HACL* libraries. * - * To make sure this is effective: cd Modules && nm -a *.o | grep Hacl + * Something like this to generate new entries for the list: nm *.o | grep Hacl | cut -c 20- | sort | uniq | grep -v python_hashlib | egrep ^_ | gsed 's/_\(.*\)/#define \1 python_hashlib_\1/' */ #define Hacl_Hash_SHA2_state_sha2_224_s python_hashlib_Hacl_Hash_SHA2_state_sha2_224_s @@ -86,4 +86,127 @@ #define Hacl_Hash_SHA3_update python_hashlib_Hacl_Hash_SHA3_update #define Hacl_Hash_SHA3_squeeze python_hashlib_Hacl_Hash_SHA3_squeeze +#define Hacl_Hash_Blake2b_Simd256_copy python_hashlib_Hacl_Hash_Blake2b_Simd256_copy +#define Hacl_Hash_Blake2b_Simd256_digest python_hashlib_Hacl_Hash_Blake2b_Simd256_digest +#define Hacl_Hash_Blake2b_Simd256_finish python_hashlib_Hacl_Hash_Blake2b_Simd256_finish +#define Hacl_Hash_Blake2b_Simd256_free python_hashlib_Hacl_Hash_Blake2b_Simd256_free +#define Hacl_Hash_Blake2b_Simd256_hash_with_key python_hashlib_Hacl_Hash_Blake2b_Simd256_hash_with_key +#define Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params python_hashlib_Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params +#define Hacl_Hash_Blake2b_Simd256_info python_hashlib_Hacl_Hash_Blake2b_Simd256_info +#define Hacl_Hash_Blake2b_Simd256_init python_hashlib_Hacl_Hash_Blake2b_Simd256_init +#define Hacl_Hash_Blake2b_Simd256_load_state256b_from_state32 python_hashlib_Hacl_Hash_Blake2b_Simd256_load_state256b_from_state32 +#define Hacl_Hash_Blake2b_Simd256_malloc python_hashlib_Hacl_Hash_Blake2b_Simd256_malloc +#define Hacl_Hash_Blake2b_Simd256_malloc_with_key python_hashlib_Hacl_Hash_Blake2b_Simd256_malloc_with_key +#define Hacl_Hash_Blake2b_Simd256_malloc_with_key0 python_hashlib_Hacl_Hash_Blake2b_Simd256_malloc_with_key0 +#define Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key python_hashlib_Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key +#define Hacl_Hash_Blake2b_Simd256_reset python_hashlib_Hacl_Hash_Blake2b_Simd256_reset +#define Hacl_Hash_Blake2b_Simd256_reset_with_key python_hashlib_Hacl_Hash_Blake2b_Simd256_reset_with_key +#define Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params python_hashlib_Hacl_Hash_Blake2b_Simd256_reset_with_key_and_params +#define Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32 python_hashlib_Hacl_Hash_Blake2b_Simd256_store_state256b_to_state32 +#define Hacl_Hash_Blake2b_Simd256_update python_hashlib_Hacl_Hash_Blake2b_Simd256_update +#define Hacl_Hash_Blake2b_Simd256_update_last python_hashlib_Hacl_Hash_Blake2b_Simd256_update_last +#define Hacl_Hash_Blake2b_Simd256_update_multi python_hashlib_Hacl_Hash_Blake2b_Simd256_update_multi +#define Hacl_Hash_Blake2b_copy python_hashlib_Hacl_Hash_Blake2b_copy +#define Hacl_Hash_Blake2b_digest python_hashlib_Hacl_Hash_Blake2b_digest +#define Hacl_Hash_Blake2b_finish python_hashlib_Hacl_Hash_Blake2b_finish +#define Hacl_Hash_Blake2b_free python_hashlib_Hacl_Hash_Blake2b_free +#define Hacl_Hash_Blake2b_hash_with_key python_hashlib_Hacl_Hash_Blake2b_hash_with_key +#define Hacl_Hash_Blake2b_hash_with_key_and_params python_hashlib_Hacl_Hash_Blake2b_hash_with_key_and_params +#define Hacl_Hash_Blake2b_info python_hashlib_Hacl_Hash_Blake2b_info +#define Hacl_Hash_Blake2b_init python_hashlib_Hacl_Hash_Blake2b_init +#define Hacl_Hash_Blake2b_malloc python_hashlib_Hacl_Hash_Blake2b_malloc +#define Hacl_Hash_Blake2b_malloc_with_key python_hashlib_Hacl_Hash_Blake2b_malloc_with_key +#define Hacl_Hash_Blake2b_malloc_with_params_and_key python_hashlib_Hacl_Hash_Blake2b_malloc_with_params_and_key +#define Hacl_Hash_Blake2b_reset python_hashlib_Hacl_Hash_Blake2b_reset +#define Hacl_Hash_Blake2b_reset_with_key python_hashlib_Hacl_Hash_Blake2b_reset_with_key +#define Hacl_Hash_Blake2b_reset_with_key_and_params python_hashlib_Hacl_Hash_Blake2b_reset_with_key_and_params +#define Hacl_Hash_Blake2b_update python_hashlib_Hacl_Hash_Blake2b_update +#define Hacl_Hash_Blake2b_update_last python_hashlib_Hacl_Hash_Blake2b_update_last +#define Hacl_Hash_Blake2b_update_multi python_hashlib_Hacl_Hash_Blake2b_update_multi +#define Hacl_Hash_Blake2s_Simd128_copy python_hashlib_Hacl_Hash_Blake2s_Simd128_copy +#define Hacl_Hash_Blake2s_Simd128_digest python_hashlib_Hacl_Hash_Blake2s_Simd128_digest +#define Hacl_Hash_Blake2s_Simd128_finish python_hashlib_Hacl_Hash_Blake2s_Simd128_finish +#define Hacl_Hash_Blake2s_Simd128_free python_hashlib_Hacl_Hash_Blake2s_Simd128_free +#define Hacl_Hash_Blake2s_Simd128_hash_with_key python_hashlib_Hacl_Hash_Blake2s_Simd128_hash_with_key +#define Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params python_hashlib_Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params +#define Hacl_Hash_Blake2s_Simd128_info python_hashlib_Hacl_Hash_Blake2s_Simd128_info +#define Hacl_Hash_Blake2s_Simd128_init python_hashlib_Hacl_Hash_Blake2s_Simd128_init +#define Hacl_Hash_Blake2s_Simd128_load_state128s_from_state32 python_hashlib_Hacl_Hash_Blake2s_Simd128_load_state128s_from_state32 +#define Hacl_Hash_Blake2s_Simd128_malloc python_hashlib_Hacl_Hash_Blake2s_Simd128_malloc +#define Hacl_Hash_Blake2s_Simd128_malloc_with_key python_hashlib_Hacl_Hash_Blake2s_Simd128_malloc_with_key +#define Hacl_Hash_Blake2s_Simd128_malloc_with_key0 python_hashlib_Hacl_Hash_Blake2s_Simd128_malloc_with_key0 +#define Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key python_hashlib_Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key +#define Hacl_Hash_Blake2s_Simd128_reset python_hashlib_Hacl_Hash_Blake2s_Simd128_reset +#define Hacl_Hash_Blake2s_Simd128_reset_with_key python_hashlib_Hacl_Hash_Blake2s_Simd128_reset_with_key +#define Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params python_hashlib_Hacl_Hash_Blake2s_Simd128_reset_with_key_and_params +#define Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32 python_hashlib_Hacl_Hash_Blake2s_Simd128_store_state128s_to_state32 +#define Hacl_Hash_Blake2s_Simd128_update python_hashlib_Hacl_Hash_Blake2s_Simd128_update +#define Hacl_Hash_Blake2s_Simd128_update_last python_hashlib_Hacl_Hash_Blake2s_Simd128_update_last +#define Hacl_Hash_Blake2s_Simd128_update_multi python_hashlib_Hacl_Hash_Blake2s_Simd128_update_multi +#define Hacl_Hash_Blake2s_copy python_hashlib_Hacl_Hash_Blake2s_copy +#define Hacl_Hash_Blake2s_digest python_hashlib_Hacl_Hash_Blake2s_digest +#define Hacl_Hash_Blake2s_finish python_hashlib_Hacl_Hash_Blake2s_finish +#define Hacl_Hash_Blake2s_free python_hashlib_Hacl_Hash_Blake2s_free +#define Hacl_Hash_Blake2s_hash_with_key python_hashlib_Hacl_Hash_Blake2s_hash_with_key +#define Hacl_Hash_Blake2s_hash_with_key_and_params python_hashlib_Hacl_Hash_Blake2s_hash_with_key_and_params +#define Hacl_Hash_Blake2s_info python_hashlib_Hacl_Hash_Blake2s_info +#define Hacl_Hash_Blake2s_init python_hashlib_Hacl_Hash_Blake2s_init +#define Hacl_Hash_Blake2s_malloc python_hashlib_Hacl_Hash_Blake2s_malloc +#define Hacl_Hash_Blake2s_malloc_with_key python_hashlib_Hacl_Hash_Blake2s_malloc_with_key +#define Hacl_Hash_Blake2s_malloc_with_params_and_key python_hashlib_Hacl_Hash_Blake2s_malloc_with_params_and_key +#define Hacl_Hash_Blake2s_reset python_hashlib_Hacl_Hash_Blake2s_reset +#define Hacl_Hash_Blake2s_reset_with_key python_hashlib_Hacl_Hash_Blake2s_reset_with_key +#define Hacl_Hash_Blake2s_reset_with_key_and_params python_hashlib_Hacl_Hash_Blake2s_reset_with_key_and_params +#define Hacl_Hash_Blake2s_update python_hashlib_Hacl_Hash_Blake2s_update +#define Hacl_Hash_Blake2s_update_last python_hashlib_Hacl_Hash_Blake2s_update_last +#define Hacl_Hash_Blake2s_update_multi python_hashlib_Hacl_Hash_Blake2s_update_multi +#define Hacl_Hash_MD5_finish python_hashlib_Hacl_Hash_MD5_finish +#define Hacl_Hash_MD5_hash_oneshot python_hashlib_Hacl_Hash_MD5_hash_oneshot +#define Hacl_Hash_MD5_reset python_hashlib_Hacl_Hash_MD5_reset +#define Hacl_Hash_MD5_update_last python_hashlib_Hacl_Hash_MD5_update_last +#define Hacl_Hash_MD5_update_multi python_hashlib_Hacl_Hash_MD5_update_multi +#define Hacl_Hash_SHA1_finish python_hashlib_Hacl_Hash_SHA1_finish +#define Hacl_Hash_SHA1_hash_oneshot python_hashlib_Hacl_Hash_SHA1_hash_oneshot +#define Hacl_Hash_SHA1_reset python_hashlib_Hacl_Hash_SHA1_reset +#define Hacl_Hash_SHA1_update_last python_hashlib_Hacl_Hash_SHA1_update_last +#define Hacl_Hash_SHA1_update_multi python_hashlib_Hacl_Hash_SHA1_update_multi +#define Hacl_Hash_SHA2_hash_224 python_hashlib_Hacl_Hash_SHA2_hash_224 +#define Hacl_Hash_SHA2_hash_256 python_hashlib_Hacl_Hash_SHA2_hash_256 +#define Hacl_Hash_SHA2_hash_384 python_hashlib_Hacl_Hash_SHA2_hash_384 +#define Hacl_Hash_SHA2_hash_512 python_hashlib_Hacl_Hash_SHA2_hash_512 +#define Hacl_Hash_SHA2_reset_224 python_hashlib_Hacl_Hash_SHA2_reset_224 +#define Hacl_Hash_SHA2_reset_256 python_hashlib_Hacl_Hash_SHA2_reset_256 +#define Hacl_Hash_SHA2_reset_384 python_hashlib_Hacl_Hash_SHA2_reset_384 +#define Hacl_Hash_SHA2_reset_512 python_hashlib_Hacl_Hash_SHA2_reset_512 +#define Hacl_Hash_SHA2_sha224_finish python_hashlib_Hacl_Hash_SHA2_sha224_finish +#define Hacl_Hash_SHA2_sha224_init python_hashlib_Hacl_Hash_SHA2_sha224_init +#define Hacl_Hash_SHA2_sha224_update_last python_hashlib_Hacl_Hash_SHA2_sha224_update_last +#define Hacl_Hash_SHA2_sha256_finish python_hashlib_Hacl_Hash_SHA2_sha256_finish +#define Hacl_Hash_SHA2_sha256_init python_hashlib_Hacl_Hash_SHA2_sha256_init +#define Hacl_Hash_SHA2_sha256_update_last python_hashlib_Hacl_Hash_SHA2_sha256_update_last +#define Hacl_Hash_SHA2_sha256_update_nblocks python_hashlib_Hacl_Hash_SHA2_sha256_update_nblocks +#define Hacl_Hash_SHA2_sha384_finish python_hashlib_Hacl_Hash_SHA2_sha384_finish +#define Hacl_Hash_SHA2_sha384_init python_hashlib_Hacl_Hash_SHA2_sha384_init +#define Hacl_Hash_SHA2_sha384_update_last python_hashlib_Hacl_Hash_SHA2_sha384_update_last +#define Hacl_Hash_SHA2_sha384_update_nblocks python_hashlib_Hacl_Hash_SHA2_sha384_update_nblocks +#define Hacl_Hash_SHA2_sha512_finish python_hashlib_Hacl_Hash_SHA2_sha512_finish +#define Hacl_Hash_SHA2_sha512_init python_hashlib_Hacl_Hash_SHA2_sha512_init +#define Hacl_Hash_SHA2_sha512_update_last python_hashlib_Hacl_Hash_SHA2_sha512_update_last +#define Hacl_Hash_SHA2_sha512_update_nblocks python_hashlib_Hacl_Hash_SHA2_sha512_update_nblocks +#define Hacl_Hash_SHA3_absorb_inner_32 python_hashlib_Hacl_Hash_SHA3_absorb_inner_32 +#define Hacl_Hash_SHA3_keccak_piln python_hashlib_Hacl_Hash_SHA3_keccak_piln +#define Hacl_Hash_SHA3_keccak_rndc python_hashlib_Hacl_Hash_SHA3_keccak_rndc +#define Hacl_Hash_SHA3_keccak_rotc python_hashlib_Hacl_Hash_SHA3_keccak_rotc +#define Hacl_Hash_SHA3_sha3_224 python_hashlib_Hacl_Hash_SHA3_sha3_224 +#define Hacl_Hash_SHA3_sha3_256 python_hashlib_Hacl_Hash_SHA3_sha3_256 +#define Hacl_Hash_SHA3_sha3_384 python_hashlib_Hacl_Hash_SHA3_sha3_384 +#define Hacl_Hash_SHA3_sha3_512 python_hashlib_Hacl_Hash_SHA3_sha3_512 +#define Hacl_Hash_SHA3_shake128 python_hashlib_Hacl_Hash_SHA3_shake128 +#define Hacl_Hash_SHA3_shake128_absorb_final python_hashlib_Hacl_Hash_SHA3_shake128_absorb_final +#define Hacl_Hash_SHA3_shake128_absorb_nblocks python_hashlib_Hacl_Hash_SHA3_shake128_absorb_nblocks +#define Hacl_Hash_SHA3_shake128_squeeze_nblocks python_hashlib_Hacl_Hash_SHA3_shake128_squeeze_nblocks +#define Hacl_Hash_SHA3_shake256 python_hashlib_Hacl_Hash_SHA3_shake256 +#define Hacl_Hash_SHA3_state_free python_hashlib_Hacl_Hash_SHA3_state_free +#define Hacl_Hash_SHA3_state_malloc python_hashlib_Hacl_Hash_SHA3_state_malloc + #endif // _PYTHON_HACL_NAMESPACES_H diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index 3878e02af31..44e18a15f96 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=bb3d0dc8d9d15a5cd51094d5b69e70aa09005ff0 +expected_hacl_star_rev=a6a09496d9cff652b567d26f2c3ab012321b632a hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" @@ -40,19 +40,35 @@ fi declare -a dist_files dist_files=( - Hacl_Hash_SHA2.h Hacl_Streaming_Types.h - Hacl_Hash_SHA1.h - internal/Hacl_Hash_SHA1.h Hacl_Hash_MD5.h + Hacl_Hash_SHA1.h + Hacl_Hash_SHA2.h Hacl_Hash_SHA3.h + Hacl_Hash_Blake2b.h + Hacl_Hash_Blake2s.h + Hacl_Hash_Blake2b_Simd256.h + Hacl_Hash_Blake2s_Simd128.h internal/Hacl_Hash_MD5.h - internal/Hacl_Hash_SHA3.h - Hacl_Hash_SHA2.c + internal/Hacl_Hash_SHA1.h internal/Hacl_Hash_SHA2.h - Hacl_Hash_SHA1.c + internal/Hacl_Hash_SHA3.h + internal/Hacl_Hash_Blake2b.h + internal/Hacl_Hash_Blake2s.h + internal/Hacl_Hash_Blake2b_Simd256.h + internal/Hacl_Hash_Blake2s_Simd128.h + internal/Hacl_Impl_Blake2_Constants.h Hacl_Hash_MD5.c + Hacl_Hash_SHA1.c + Hacl_Hash_SHA2.c Hacl_Hash_SHA3.c + Hacl_Hash_Blake2b.c + Hacl_Hash_Blake2s.c + Hacl_Hash_Blake2b_Simd256.c + Hacl_Hash_Blake2s_Simd128.c + libintvector.h + lib_memzero0.h + Lib_Memzero0.c ) declare -a include_files @@ -131,9 +147,13 @@ $sed -i -z 's!#include \n!#include \n#include "python_hacl_n # Finally, we remove a bunch of ifdefs from target.h that are, again, useful in # the general case, but not exercised by the subset of HACL* that we vendor. -$sed -z -i 's!#ifndef KRML_\(PRE_ALIGN\|POST_ALIGN\|ALIGNED_MALLOC\|ALIGNED_FREE\|HOST_TIME\)\n\(\n\|# [^\n]*\n\|[^#][^\n]*\n\)*#endif\n\n!!g' include/krml/internal/target.h +$sed -z -i 's!#ifndef KRML_\(HOST_TIME\)\n\(\n\|# [^\n]*\n\|[^#][^\n]*\n\)*#endif\n\n!!g' include/krml/internal/target.h $sed -z -i 's!\n\n\([^#][^\n]*\n\)*#define KRML_\(EABORT\|EXIT\)[^\n]*\(\n [^\n]*\)*!!g' include/krml/internal/target.h $sed -z -i 's!\n\n\([^#][^\n]*\n\)*#if [^\n]*\n\( [^\n]*\n\)*#define KRML_\(EABORT\|EXIT\|CHECK_SIZE\)[^\n]*\(\n [^\n]*\)*!!g' include/krml/internal/target.h $sed -z -i 's!\n\n\([^#][^\n]*\n\)*#if [^\n]*\n\( [^\n]*\n\)*# define _\?KRML_\(DEPRECATED\|HOST_SNPRINTF\)[^\n]*\n\([^#][^\n]*\n\|#el[^\n]*\n\|# [^\n]*\n\)*#endif!!g' include/krml/internal/target.h +# Step 3: trim whitespace (for the linter) + +find . -name '*.c' -or -name '*.h' | xargs $sed -i 's![[:space:]]\+$!!' + echo "Updated; verify all is okay using git diff and git status." diff --git a/Modules/blake2module.c b/Modules/blake2module.c new file mode 100644 index 00000000000..abe31a464a1 --- /dev/null +++ b/Modules/blake2module.c @@ -0,0 +1,930 @@ +/* + * Written in 2013 by Dmitry Chestnykh + * Modified for CPython by Christian Heimes + * Updated to use HACL* by Jonathan Protzenko + * + * To the extent possible under law, the author have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. http://creativecommons.org/publicdomain/zero/1.0/ + */ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "pyconfig.h" +#include "Python.h" +#include "hashlib.h" +#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_typeobject.h" +#include "pycore_moduleobject.h" + +// QUICK CPU AUTODETECTION +// +// See https://github.com/python/cpython/pull/119316 -- we only enable +// vectorized versions for Intel CPUs, even though HACL*'s "vec128" modules also +// run on ARM NEON. (We could enable them on POWER -- but I don't have access to +// a test machine to see if that speeds anything up.) +// +// Note that configure.ac and the rest of the build are written in such a way +// that if the configure script finds suitable flags to compile HACL's SIMD128 +// (resp. SIMD256) files, then Hacl_Hash_Blake2b_Simd128.c (resp. ...) will be +// pulled into the build automatically, and then only the CPU autodetection will +// need to be updated here. + +#if defined(__x86_64__) && defined(__GNUC__) +#include +#elif defined(_M_X64) +#include +#endif + +#include + +// ECX +#define ECX_SSE3 (1 << 0) +#define ECX_SSSE3 (1 << 9) +#define ECX_SSE4_1 (1 << 19) +#define ECX_SSE4_2 (1 << 20) +#define ECX_AVX (1 << 28) + +// EBX +#define EBX_AVX2 (1 << 5) + +// EDX +#define EDX_SSE (1 << 25) +#define EDX_SSE2 (1 << 26) +#define EDX_CMOV (1 << 15) + +// zero-initialized by default +typedef struct { + bool sse, sse2, sse3, sse41, sse42, cmov, avx, avx2; + bool done; +} cpu_flags; + +void detect_cpu_features(cpu_flags *flags) { + if (!flags->done) { + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +#if defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); +#elif defined(_M_X64) + int info1[4] = { 0 }; + int info7[4] = { 0 }; + __cpuidex(info1, 1, 0); + __cpuidex(info7, 7, 0); + eax1 = info1[0]; + ebx1 = info1[1]; + ecx1 = info1[2]; + edx1 = info1[3]; + eax7 = info7[0]; + ebx7 = info7[1]; + ecx7 = info7[2]; + edx7 = info7[3]; +#else + (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; + (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; +#endif + + flags->avx = (ecx1 & ECX_AVX) != 0; + + flags->avx2 = (ebx7 & EBX_AVX2) != 0; + + flags->sse = (edx1 & EDX_SSE) != 0; + flags->sse2 = (edx1 & EDX_SSE2) != 0; + flags->cmov = (edx1 & EDX_CMOV) != 0; + + flags->sse3 = (ecx1 & ECX_SSE3) != 0; + /* ssse3 = (ecx1 & ECX_SSSE3) != 0; */ + flags->sse41 = (ecx1 & ECX_SSE4_1) != 0; + flags->sse42 = (ecx1 & ECX_SSE4_2) != 0; + + flags->done = true; + } +} + +static inline bool has_simd128(cpu_flags *flags) { + // For now this is Intel-only, could conceivably be #ifdef'd to something + // else. + return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; +} + +static inline bool has_simd256(cpu_flags *flags) { + return flags->avx && flags->avx2; +} + +// Small mismatch between the variable names Python defines as part of configure +// at the ones HACL* expects to be set in order to enable those headers. +#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 +#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 + +#include "_hacl/Hacl_Hash_Blake2b.h" +#include "_hacl/Hacl_Hash_Blake2s.h" +#if HACL_CAN_COMPILE_SIMD256 +#include "_hacl/Hacl_Hash_Blake2b_Simd256.h" +#endif +#if HACL_CAN_COMPILE_SIMD128 +#include "_hacl/Hacl_Hash_Blake2s_Simd128.h" +#endif + +// MODULE TYPE SLOTS + +static PyType_Spec blake2b_type_spec; +static PyType_Spec blake2s_type_spec; + +PyDoc_STRVAR(blake2mod__doc__, +"_blake2b provides BLAKE2b for hashlib\n" +); + +typedef struct { + PyTypeObject* blake2b_type; + PyTypeObject* blake2s_type; + cpu_flags flags; +} Blake2State; + +static inline Blake2State* +blake2_get_state(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (Blake2State *)state; +} + +static inline Blake2State* +blake2_get_state_from_type(PyTypeObject *module) +{ + void *state = _PyType_GetModuleState(module); + assert(state != NULL); + return (Blake2State *)state; +} + +static struct PyMethodDef blake2mod_functions[] = { + {NULL, NULL} +}; + +static int +_blake2_traverse(PyObject *module, visitproc visit, void *arg) +{ + Blake2State *state = blake2_get_state(module); + Py_VISIT(state->blake2b_type); + Py_VISIT(state->blake2s_type); + return 0; +} + +static int +_blake2_clear(PyObject *module) +{ + Blake2State *state = blake2_get_state(module); + Py_CLEAR(state->blake2b_type); + Py_CLEAR(state->blake2s_type); + return 0; +} + +static void +_blake2_free(void *module) +{ + (void)_blake2_clear((PyObject *)module); +} + +#define ADD_INT(d, name, value) do { \ + PyObject *x = PyLong_FromLong(value); \ + if (!x) \ + return -1; \ + if (PyDict_SetItemString(d, name, x) < 0) { \ + Py_DECREF(x); \ + return -1; \ + } \ + Py_DECREF(x); \ +} while(0) + +#define ADD_INT_CONST(NAME, VALUE) do { \ + if (PyModule_AddIntConstant(m, NAME, VALUE) < 0) { \ + return -1; \ + } \ +} while (0) + +static int +blake2_exec(PyObject *m) +{ + Blake2State* st = blake2_get_state(m); + + // This is called at module initialization-time, and so appears to be as + // good a place as any to probe the CPU flags. + detect_cpu_features(&st->flags); + + st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( + m, &blake2b_type_spec, NULL); + + if (st->blake2b_type == NULL) { + return -1; + } + /* BLAKE2b */ + if (PyModule_AddType(m, st->blake2b_type) < 0) { + return -1; + } + + PyObject *d = st->blake2b_type->tp_dict; + ADD_INT(d, "SALT_SIZE", HACL_HASH_BLAKE2B_SALT_BYTES); + ADD_INT(d, "PERSON_SIZE", HACL_HASH_BLAKE2B_PERSONAL_BYTES); + ADD_INT(d, "MAX_KEY_SIZE", HACL_HASH_BLAKE2B_KEY_BYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", HACL_HASH_BLAKE2B_OUT_BYTES); + + ADD_INT_CONST("BLAKE2B_SALT_SIZE", HACL_HASH_BLAKE2B_SALT_BYTES); + ADD_INT_CONST("BLAKE2B_PERSON_SIZE", HACL_HASH_BLAKE2B_PERSONAL_BYTES); + ADD_INT_CONST("BLAKE2B_MAX_KEY_SIZE", HACL_HASH_BLAKE2B_KEY_BYTES); + ADD_INT_CONST("BLAKE2B_MAX_DIGEST_SIZE", HACL_HASH_BLAKE2B_OUT_BYTES); + + /* BLAKE2s */ + st->blake2s_type = (PyTypeObject *)PyType_FromModuleAndSpec( + m, &blake2s_type_spec, NULL); + + if (NULL == st->blake2s_type) + return -1; + + if (PyModule_AddType(m, st->blake2s_type) < 0) { + return -1; + } + + d = st->blake2s_type->tp_dict; + ADD_INT(d, "SALT_SIZE", HACL_HASH_BLAKE2S_SALT_BYTES); + ADD_INT(d, "PERSON_SIZE", HACL_HASH_BLAKE2S_PERSONAL_BYTES); + ADD_INT(d, "MAX_KEY_SIZE", HACL_HASH_BLAKE2S_KEY_BYTES); + ADD_INT(d, "MAX_DIGEST_SIZE", HACL_HASH_BLAKE2S_OUT_BYTES); + + ADD_INT_CONST("BLAKE2S_SALT_SIZE", HACL_HASH_BLAKE2S_SALT_BYTES); + ADD_INT_CONST("BLAKE2S_PERSON_SIZE", HACL_HASH_BLAKE2S_PERSONAL_BYTES); + ADD_INT_CONST("BLAKE2S_MAX_KEY_SIZE", HACL_HASH_BLAKE2S_KEY_BYTES); + ADD_INT_CONST("BLAKE2S_MAX_DIGEST_SIZE", HACL_HASH_BLAKE2S_OUT_BYTES); + + return 0; +} + +#undef ADD_INT +#undef ADD_INT_CONST + +static PyModuleDef_Slot _blake2_slots[] = { + {Py_mod_exec, blake2_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL} +}; + +static struct PyModuleDef blake2_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_blake2", + .m_doc = blake2mod__doc__, + .m_size = sizeof(Blake2State), + .m_methods = blake2mod_functions, + .m_slots = _blake2_slots, + .m_traverse = _blake2_traverse, + .m_clear = _blake2_clear, + .m_free = _blake2_free, +}; + +PyMODINIT_FUNC +PyInit__blake2(void) +{ + return PyModuleDef_Init(&blake2_module); +} + +// IMPLEMENTATION OF METHODS + +// The HACL* API does not offer an agile API that can deal with either Blake2S +// or Blake2B -- the reason is that the underlying states are optimized (uint32s +// for S, uint64s for B). Therefore, we use a tagged union in this module to +// correctly dispatch. Note that the previous incarnation of this code +// transformed the Blake2b implementation into the Blake2s one using a script, +// so this is an improvement. +// +// The 128 and 256 versions are only available if i) we were able to compile +// them, and ii) if the CPU we run on also happens to have the right instruction +// set. +typedef enum { Blake2s, Blake2b, Blake2s_128, Blake2b_256 } blake2_impl; + +static inline bool is_blake2b(blake2_impl impl) { + return impl == Blake2b || impl == Blake2b_256; +} + +static inline bool is_blake2s(blake2_impl impl) { + return !is_blake2b(impl); +} + +static inline blake2_impl type_to_impl(PyTypeObject *type) { + Blake2State* st = blake2_get_state_from_type(type); + if (!strcmp(type->tp_name, blake2b_type_spec.name)) { +#ifdef HACL_CAN_COMPILE_SIMD256 + if (has_simd256(&st->flags)) + return Blake2b_256; + else +#endif + return Blake2b; + } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { +#ifdef HACL_CAN_COMPILE_SIMD128 + if (has_simd128(&st->flags)) + return Blake2s_128; + else +#endif + return Blake2s; + } else { + Py_UNREACHABLE(); + } +} + +typedef struct { + PyObject_HEAD + union { + Hacl_Hash_Blake2s_state_t *blake2s_state; + Hacl_Hash_Blake2b_state_t *blake2b_state; +#ifdef HACL_CAN_COMPILE_SIMD128 + Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; +#endif +#ifdef HACL_CAN_COMPILE_SIMD256 + Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; +#endif + }; + blake2_impl impl; + bool use_mutex; + PyMutex mutex; +} Blake2Object; + +#include "clinic/blake2module.c.h" + +/*[clinic input] +module _blake2 +class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType" +class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/ + + +static Blake2Object * +new_Blake2Object(PyTypeObject *type) +{ + Blake2Object *self; + self = (Blake2Object *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + HASHLIB_INIT_MUTEX(self); + + return self; +} + +/* HACL* takes a uint32_t for the length of its parameter, but Py_ssize_t can be + * 64 bits so we loop in <4gig chunks when needed. */ + +#if PY_SSIZE_T_MAX > UINT32_MAX +#define HACL_UPDATE_LOOP(update,state,buf,len) \ + while (len > UINT32_MAX) { \ + update(state, buf, UINT32_MAX); \ + len -= UINT32_MAX; \ + buf += UINT32_MAX; \ + } +#else +#define HACL_UPDATE_LOOP(update,state,buf,len) +#endif + +#define HACL_UPDATE(update,state,buf,len) do { \ + /* Note: we explicitly ignore the error code on the basis that it would take > + * 1 billion years to overflow the maximum admissible length for SHA2-256 + * (namely, 2^61-1 bytes). */ \ + HACL_UPDATE_LOOP(update,state,buf,len) \ + /* Cast to uint32_t is safe: len <= UINT32_MAX at this point. */ \ + update(state, buf, (uint32_t) len); \ +} while (0) + +static void update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { + switch (self->impl) { + // These need to be ifdef'd out otherwise it's an unresolved symbol at + // link-time. +#ifdef HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len); + return; +#endif +#ifdef HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len); + return; +#endif + case Blake2b: + HACL_UPDATE(Hacl_Hash_Blake2b_update,self->blake2b_state, buf, len); + return; + case Blake2s: + HACL_UPDATE(Hacl_Hash_Blake2s_update,self->blake2s_state, buf, len); + return; + default: + Py_UNREACHABLE(); + } +} + +static PyObject * +py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size, int last_node, int usedforsecurity) + +{ + Blake2Object *self = NULL; + Py_buffer buf; + + self = new_Blake2Object(type); + if (self == NULL) { + goto error; + } + + self->impl = type_to_impl(type); + + // Using Blake2b because we statically know that these are greater than the + // Blake2s sizes -- this avoids a VLA. + uint8_t salt_[HACL_HASH_BLAKE2B_SALT_BYTES] = { 0 }; + uint8_t personal_[HACL_HASH_BLAKE2B_PERSONAL_BYTES] = { 0 }; + + /* Validate digest size. */ + if (digest_size <= 0 || + (unsigned) digest_size > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)) + { + PyErr_Format(PyExc_ValueError, + "digest_size for %s must be between 1 and %d bytes, here it is %d", + is_blake2b(self->impl) ? "Blake2b" : "Blake2s", + is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES, + digest_size); + goto error; + } + + /* Validate salt parameter. */ + if ((salt->obj != NULL) && salt->len) { + if (salt->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)) { + PyErr_Format(PyExc_ValueError, + "maximum salt length is %d bytes", + (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)); + goto error; + } + memcpy(salt_, salt->buf, salt->len); + } + + /* Validate personalization parameter. */ + if ((person->obj != NULL) && person->len) { + if (person->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)) { + PyErr_Format(PyExc_ValueError, + "maximum person length is %d bytes", + (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)); + goto error; + } + memcpy(personal_, person->buf, person->len); + } + + /* Validate tree parameters. */ + if (fanout < 0 || fanout > 255) { + PyErr_SetString(PyExc_ValueError, + "fanout must be between 0 and 255"); + goto error; + } + + if (depth <= 0 || depth > 255) { + PyErr_SetString(PyExc_ValueError, + "depth must be between 1 and 255"); + goto error; + } + + if (leaf_size > 0xFFFFFFFFU) { + PyErr_SetString(PyExc_OverflowError, "leaf_size is too large"); + goto error; + } + + if (is_blake2s(self->impl) && node_offset > 0xFFFFFFFFFFFFULL) { + /* maximum 2**48 - 1 */ + PyErr_SetString(PyExc_OverflowError, "node_offset is too large"); + goto error; + } + + if (node_depth < 0 || node_depth > 255) { + PyErr_SetString(PyExc_ValueError, + "node_depth must be between 0 and 255"); + goto error; + } + + if (inner_size < 0 || + (unsigned) inner_size > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)) { + PyErr_Format(PyExc_ValueError, + "inner_size must be between 0 and is %d", + (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_OUT_BYTES : HACL_HASH_BLAKE2S_OUT_BYTES)); + goto error; + } + + /* Set key length. */ + if ((key->obj != NULL) && key->len) { + if (key->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)) { + PyErr_Format(PyExc_ValueError, + "maximum key length is %d bytes", + (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)); + goto error; + } + } + + // Unlike the state types, the parameters share a single (client-friendly) + // structure. + + Hacl_Hash_Blake2b_blake2_params params = { + .digest_length = digest_size, + .key_length = (uint8_t)key->len, + .fanout = fanout, + .depth = depth, + .leaf_length = leaf_size, + .node_offset = node_offset, + .node_depth = node_depth, + .inner_length = inner_size, + .salt = salt_, + .personal = personal_ + }; + + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(¶ms, last_node, key->buf); + break; +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(¶ms, last_node, key->buf); + break; +#endif + case Blake2b: + self->blake2b_state = Hacl_Hash_Blake2b_malloc_with_params_and_key(¶ms, last_node, key->buf); + break; + case Blake2s: + self->blake2s_state = Hacl_Hash_Blake2s_malloc_with_params_and_key(¶ms, last_node, key->buf); + break; + default: + Py_UNREACHABLE(); + } + + /* Process initial data if any. */ + if (data != NULL) { + GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); + + if (buf.len >= HASHLIB_GIL_MINSIZE) { + Py_BEGIN_ALLOW_THREADS + update(self, buf.buf, buf.len); + Py_END_ALLOW_THREADS + } else { + update(self, buf.buf, buf.len); + } + PyBuffer_Release(&buf); + } + + return (PyObject *)self; +error: + Py_XDECREF(self); + return NULL; +} + +/*[clinic input] +@classmethod +_blake2.blake2b.__new__ as py_blake2b_new + data: object(c_default="NULL") = b'' + / + * + digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = _blake2.blake2b.MAX_DIGEST_SIZE + key: Py_buffer(c_default="NULL", py_default="b''") = None + salt: Py_buffer(c_default="NULL", py_default="b''") = None + person: Py_buffer(c_default="NULL", py_default="b''") = None + fanout: int = 1 + depth: int = 1 + leaf_size: unsigned_long = 0 + node_offset: unsigned_long_long = 0 + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + usedforsecurity: bool = True + +Return a new BLAKE2b hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size, int last_node, int usedforsecurity) +/*[clinic end generated code: output=32bfd8f043c6896f input=8fee2b7b11428b2d]*/ +{ + return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); +} + +/*[clinic input] +@classmethod +_blake2.blake2s.__new__ as py_blake2s_new + data: object(c_default="NULL") = b'' + / + * + digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = _blake2.blake2s.MAX_DIGEST_SIZE + key: Py_buffer(c_default="NULL", py_default="b''") = None + salt: Py_buffer(c_default="NULL", py_default="b''") = None + person: Py_buffer(c_default="NULL", py_default="b''") = None + fanout: int = 1 + depth: int = 1 + leaf_size: unsigned_long = 0 + node_offset: unsigned_long_long = 0 + node_depth: int = 0 + inner_size: int = 0 + last_node: bool = False + usedforsecurity: bool = True + +Return a new BLAKE2s hash object. +[clinic start generated code]*/ + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size, int last_node, int usedforsecurity) +/*[clinic end generated code: output=556181f73905c686 input=8165a11980eac7f3]*/ +{ + return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); +} + +/*[clinic input] +_blake2.blake2b.copy + +Return a copy of the hash object. +[clinic start generated code]*/ + +static PyObject * +_blake2_blake2b_copy_impl(Blake2Object *self) +/*[clinic end generated code: output=622d1c56b91c50d8 input=e383c2d199fd8a2e]*/ +{ + Blake2Object *cpy; + + if ((cpy = new_Blake2Object(Py_TYPE(self))) == NULL) + return NULL; + + ENTER_HASHLIB(self); + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state); + break; +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state); + break; +#endif + case Blake2b: + cpy->blake2b_state = Hacl_Hash_Blake2b_copy(self->blake2b_state); + break; + case Blake2s: + cpy->blake2s_state = Hacl_Hash_Blake2s_copy(self->blake2s_state); + break; + default: + Py_UNREACHABLE(); + } + cpy->impl = self->impl; + LEAVE_HASHLIB(self); + return (PyObject *)cpy; +} + +/*[clinic input] +_blake2.blake2b.update + + data: object + / + +Update this hash object's state with the provided bytes-like object. +[clinic start generated code]*/ + +static PyObject * +_blake2_blake2b_update(Blake2Object *self, PyObject *data) +/*[clinic end generated code: output=e6d1ac88471df308 input=ffc4aa6a6a225d31]*/ +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(data, &buf); + + if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { + self->use_mutex = true; + } + if (self->use_mutex) { + Py_BEGIN_ALLOW_THREADS + PyMutex_Lock(&self->mutex); + update(self, buf.buf, buf.len); + PyMutex_Unlock(&self->mutex); + Py_END_ALLOW_THREADS + } else { + update(self, buf.buf, buf.len); + } + + PyBuffer_Release(&buf); + + Py_RETURN_NONE; +} + +/*[clinic input] +_blake2.blake2b.digest + +Return the digest value as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_blake2_blake2b_digest_impl(Blake2Object *self) +/*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ +{ + uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; + + ENTER_HASHLIB(self); + uint8_t digest_length = 0; + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); + break; +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); + break; +#endif + case Blake2b: + digest_length = Hacl_Hash_Blake2b_digest(self->blake2b_state, digest); + break; + case Blake2s: + digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); + break; + default: + Py_UNREACHABLE(); + } + LEAVE_HASHLIB(self); + return PyBytes_FromStringAndSize((const char *)digest, digest_length); +} + +/*[clinic input] +_blake2.blake2b.hexdigest + +Return the digest value as a string of hexadecimal digits. +[clinic start generated code]*/ + +static PyObject * +_blake2_blake2b_hexdigest_impl(Blake2Object *self) +/*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/ +{ + uint8_t digest[HACL_HASH_BLAKE2B_OUT_BYTES]; + + ENTER_HASHLIB(self); + uint8_t digest_length = 0; + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); + break; +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); + break; +#endif + case Blake2b: + digest_length = Hacl_Hash_Blake2b_digest(self->blake2b_state, digest); + break; + case Blake2s: + digest_length = Hacl_Hash_Blake2s_digest(self->blake2s_state, digest); + break; + default: + Py_UNREACHABLE(); + } + LEAVE_HASHLIB(self); + return _Py_strhex((const char *)digest, digest_length); +} + + +static PyMethodDef py_blake2b_methods[] = { + _BLAKE2_BLAKE2B_COPY_METHODDEF + _BLAKE2_BLAKE2B_DIGEST_METHODDEF + _BLAKE2_BLAKE2B_HEXDIGEST_METHODDEF + _BLAKE2_BLAKE2B_UPDATE_METHODDEF + {NULL, NULL} +}; + + +static PyObject * +py_blake2b_get_name(Blake2Object *self, void *closure) +{ + return PyUnicode_FromString(is_blake2b(self->impl) ? "blake2b" : "blake2s"); +} + + + +static PyObject * +py_blake2b_get_block_size(Blake2Object *self, void *closure) +{ + return PyLong_FromLong(is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_BLOCK_BYTES : HACL_HASH_BLAKE2S_BLOCK_BYTES); +} + + + +static PyObject * +py_blake2b_get_digest_size(Blake2Object *self, void *closure) +{ + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length); +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length); +#endif + case Blake2b: + return PyLong_FromLong(Hacl_Hash_Blake2b_info(self->blake2b_state).digest_length); + case Blake2s: + return PyLong_FromLong(Hacl_Hash_Blake2s_info(self->blake2s_state).digest_length); + default: + Py_UNREACHABLE(); + } +} + + +static PyGetSetDef py_blake2b_getsetters[] = { + {"name", (getter)py_blake2b_get_name, + NULL, NULL, NULL}, + {"block_size", (getter)py_blake2b_get_block_size, + NULL, NULL, NULL}, + {"digest_size", (getter)py_blake2b_get_digest_size, + NULL, NULL, NULL}, + {NULL} +}; + + +static void +py_blake2b_dealloc(Blake2Object *self) +{ + switch (self->impl) { +#if HACL_CAN_COMPILE_SIMD256 + case Blake2b_256: + if (self->blake2b_256_state != NULL) + Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state); + break; +#endif +#if HACL_CAN_COMPILE_SIMD128 + case Blake2s_128: + if (self->blake2s_128_state != NULL) + Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state); + break; +#endif + case Blake2b: + // This happens if we hit "goto error" in the middle of the + // initialization function. We leverage the fact that tp_alloc + // guarantees that the contents of the object are NULL-initialized + // (see documentation for PyType_GenericAlloc) to detect this case. + if (self->blake2b_state != NULL) + Hacl_Hash_Blake2b_free(self->blake2b_state); + break; + case Blake2s: + if (self->blake2s_state != NULL) + Hacl_Hash_Blake2s_free(self->blake2s_state); + break; + default: + Py_UNREACHABLE(); + } + + PyTypeObject *type = Py_TYPE(self); + PyObject_Free(self); + Py_DECREF(type); +} + +static PyType_Slot blake2b_type_slots[] = { + {Py_tp_dealloc, py_blake2b_dealloc}, + {Py_tp_doc, (char *)py_blake2b_new__doc__}, + {Py_tp_methods, py_blake2b_methods}, + {Py_tp_getset, py_blake2b_getsetters}, + {Py_tp_new, py_blake2b_new}, + {0,0} +}; + +static PyType_Slot blake2s_type_slots[] = { + {Py_tp_dealloc, py_blake2b_dealloc}, + {Py_tp_doc, (char *)py_blake2s_new__doc__}, + {Py_tp_methods, py_blake2b_methods}, + {Py_tp_getset, py_blake2b_getsetters}, + // only the constructor differs, so that it can receive a clinic-generated + // default digest length suitable for blake2s + {Py_tp_new, py_blake2s_new}, + {0,0} +}; + +static PyType_Spec blake2b_type_spec = { + .name = "_blake2.blake2b", + .basicsize = sizeof(Blake2Object), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, + .slots = blake2b_type_slots +}; + +static PyType_Spec blake2s_type_spec = { + .name = "_blake2.blake2s", + .basicsize = sizeof(Blake2Object), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, + .slots = blake2s_type_slots +}; diff --git a/Modules/_blake2/clinic/blake2b_impl.c.h b/Modules/clinic/blake2module.c.h similarity index 54% rename from Modules/_blake2/clinic/blake2b_impl.c.h rename to Modules/clinic/blake2module.c.h index 47d62717eb7..50478bcbecf 100644 --- a/Modules/_blake2/clinic/blake2b_impl.c.h +++ b/Modules/clinic/blake2module.c.h @@ -59,7 +59,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; PyObject *data = NULL; - int digest_size = BLAKE2B_OUTBYTES; + int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; Py_buffer person = {NULL, NULL}; @@ -203,6 +203,200 @@ exit: return return_value; } +PyDoc_STRVAR(py_blake2s_new__doc__, +"blake2s(data=b\'\', /, *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" +" key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" +" node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" +" usedforsecurity=True)\n" +"--\n" +"\n" +"Return a new BLAKE2s hash object."); + +static PyObject * +py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, + Py_buffer *key, Py_buffer *salt, Py_buffer *person, + int fanout, int depth, unsigned long leaf_size, + unsigned long long node_offset, int node_depth, + int inner_size, int last_node, int usedforsecurity); + +static PyObject * +py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 12 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "blake2s", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[13]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *data = NULL; + int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES; + Py_buffer key = {NULL, NULL}; + Py_buffer salt = {NULL, NULL}; + Py_buffer person = {NULL, NULL}; + int fanout = 1; + int depth = 1; + unsigned long leaf_size = 0; + unsigned long long node_offset = 0; + int node_depth = 0; + int inner_size = 0; + int last_node = 0; + int usedforsecurity = 1; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 1, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (nargs < 1) { + goto skip_optional_posonly; + } + noptargs--; + data = fastargs[0]; +skip_optional_posonly: + if (!noptargs) { + goto skip_optional_kwonly; + } + if (fastargs[1]) { + digest_size = PyLong_AsInt(fastargs[1]); + if (digest_size == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[2]) { + if (PyObject_GetBuffer(fastargs[2], &key, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[3]) { + if (PyObject_GetBuffer(fastargs[3], &salt, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[4]) { + if (PyObject_GetBuffer(fastargs[4], &person, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[5]) { + fanout = PyLong_AsInt(fastargs[5]); + if (fanout == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[6]) { + depth = PyLong_AsInt(fastargs[6]); + if (depth == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[7]) { + if (!_PyLong_UnsignedLong_Converter(fastargs[7], &leaf_size)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[8]) { + if (!_PyLong_UnsignedLongLong_Converter(fastargs[8], &node_offset)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[9]) { + node_depth = PyLong_AsInt(fastargs[9]); + if (node_depth == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[10]) { + inner_size = PyLong_AsInt(fastargs[10]); + if (inner_size == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[11]) { + last_node = PyObject_IsTrue(fastargs[11]); + if (last_node < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + +exit: + /* Cleanup for key */ + if (key.obj) { + PyBuffer_Release(&key); + } + /* Cleanup for salt */ + if (salt.obj) { + PyBuffer_Release(&salt); + } + /* Cleanup for person */ + if (person.obj) { + PyBuffer_Release(&person); + } + + return return_value; +} + PyDoc_STRVAR(_blake2_blake2b_copy__doc__, "copy($self, /)\n" "--\n" @@ -213,10 +407,10 @@ PyDoc_STRVAR(_blake2_blake2b_copy__doc__, {"copy", (PyCFunction)_blake2_blake2b_copy, METH_NOARGS, _blake2_blake2b_copy__doc__}, static PyObject * -_blake2_blake2b_copy_impl(BLAKE2bObject *self); +_blake2_blake2b_copy_impl(Blake2Object *self); static PyObject * -_blake2_blake2b_copy(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +_blake2_blake2b_copy(Blake2Object *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_copy_impl(self); } @@ -240,10 +434,10 @@ PyDoc_STRVAR(_blake2_blake2b_digest__doc__, {"digest", (PyCFunction)_blake2_blake2b_digest, METH_NOARGS, _blake2_blake2b_digest__doc__}, static PyObject * -_blake2_blake2b_digest_impl(BLAKE2bObject *self); +_blake2_blake2b_digest_impl(Blake2Object *self); static PyObject * -_blake2_blake2b_digest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +_blake2_blake2b_digest(Blake2Object *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_digest_impl(self); } @@ -258,11 +452,11 @@ PyDoc_STRVAR(_blake2_blake2b_hexdigest__doc__, {"hexdigest", (PyCFunction)_blake2_blake2b_hexdigest, METH_NOARGS, _blake2_blake2b_hexdigest__doc__}, static PyObject * -_blake2_blake2b_hexdigest_impl(BLAKE2bObject *self); +_blake2_blake2b_hexdigest_impl(Blake2Object *self); static PyObject * -_blake2_blake2b_hexdigest(BLAKE2bObject *self, PyObject *Py_UNUSED(ignored)) +_blake2_blake2b_hexdigest(Blake2Object *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl(self); } -/*[clinic end generated code: output=e18eeaee40623bfc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d1a351f44e20e273 input=a9049054013a1b77]*/ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 7991eb93aa2..dbb18ba96d6 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -417,9 +417,10 @@ - - - + + HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) + HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + @@ -429,6 +430,17 @@ + + + + + HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + /arch:AVX2 + + + HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + /arch:AVX + diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 1b000c3b16a..88f311bf6b4 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -69,9 +69,6 @@ PACKAGE_TO_FILES = { "Lib/ctypes/macholib/fetch_macholib.bat", ], ), - "libb2": PackageFiles( - include=["Modules/_blake2/impl/**"] - ), "hacl-star": PackageFiles( include=["Modules/_hacl/**"], exclude=[ diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 4623f2c8d67..4b75dca86ed 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -120,11 +120,14 @@ glob dirname Modules/_decimal/**/*.c Modules/_decimal/libmpdec Modules/_elementtree.c Modules/expat Modules/_hacl/*.c Modules/_hacl/include +Modules/_hacl/*.c Modules/_hacl/ Modules/_hacl/*.h Modules/_hacl/include +Modules/_hacl/*.h Modules/_hacl/ Modules/md5module.c Modules/_hacl/include Modules/sha1module.c Modules/_hacl/include Modules/sha2module.c Modules/_hacl/include Modules/sha3module.c Modules/_hacl/include +Modules/blake2module.c Modules/_hacl/include Objects/stringlib/*.h Objects # possible system-installed headers, just in case diff --git a/configure b/configure index a0fbebcb044..66312fc9832 100755 --- a/configure +++ b/configure @@ -713,6 +713,10 @@ MODULE__CURSES_FALSE MODULE__CURSES_TRUE MODULE__CTYPES_FALSE MODULE__CTYPES_TRUE +LIBHACL_SIMD256_OBJS +LIBHACL_SIMD256_FLAGS +LIBHACL_SIMD128_OBJS +LIBHACL_SIMD128_FLAGS MODULE__BLAKE2_FALSE MODULE__BLAKE2_TRUE MODULE__SHA3_FALSE @@ -817,8 +821,6 @@ MODULE__IO_FALSE MODULE__IO_TRUE MODULE_BUILDTYPE TEST_MODULES -LIBB2_LIBS -LIBB2_CFLAGS OPENSSL_LDFLAGS OPENSSL_LIBS OPENSSL_INCLUDES @@ -1175,9 +1177,7 @@ LIBEDIT_LIBS CURSES_CFLAGS CURSES_LIBS PANEL_CFLAGS -PANEL_LIBS -LIBB2_CFLAGS -LIBB2_LIBS' +PANEL_LIBS' # Initialize some variables set by options. @@ -2022,9 +2022,6 @@ Some influential environment variables: PANEL_CFLAGS C compiler flags for PANEL, overriding pkg-config PANEL_LIBS linker flags for PANEL, overriding pkg-config - LIBB2_CFLAGS - C compiler flags for LIBB2, overriding pkg-config - LIBB2_LIBS linker flags for LIBB2, overriding pkg-config Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. @@ -28749,88 +28746,6 @@ esac done IFS=$as_save_IFS -if test "x$with_builtin_blake2" = xyes -then : - - -pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libb2" >&5 -printf %s "checking for libb2... " >&6; } - -if test -n "$LIBB2_CFLAGS"; then - pkg_cv_LIBB2_CFLAGS="$LIBB2_CFLAGS" - elif test -n "$PKG_CONFIG"; then - if test -n "$PKG_CONFIG" && \ - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libb2\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libb2") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then - pkg_cv_LIBB2_CFLAGS=`$PKG_CONFIG --cflags "libb2" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes -else - pkg_failed=yes -fi - else - pkg_failed=untried -fi -if test -n "$LIBB2_LIBS"; then - pkg_cv_LIBB2_LIBS="$LIBB2_LIBS" - elif test -n "$PKG_CONFIG"; then - if test -n "$PKG_CONFIG" && \ - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libb2\""; } >&5 - ($PKG_CONFIG --exists --print-errors "libb2") 2>&5 - ac_status=$? - printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then - pkg_cv_LIBB2_LIBS=`$PKG_CONFIG --libs "libb2" 2>/dev/null` - test "x$?" != "x0" && pkg_failed=yes -else - pkg_failed=yes -fi - else - pkg_failed=untried -fi - - - -if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - -if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then - _pkg_short_errors_supported=yes -else - _pkg_short_errors_supported=no -fi - if test $_pkg_short_errors_supported = yes; then - LIBB2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libb2" 2>&1` - else - LIBB2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libb2" 2>&1` - fi - # Put the nasty error message in config.log where it belongs - echo "$LIBB2_PKG_ERRORS" >&5 - - have_libb2=no -elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 -printf "%s\n" "no" >&6; } - have_libb2=no -else - LIBB2_CFLAGS=$pkg_cv_LIBB2_CFLAGS - LIBB2_LIBS=$pkg_cv_LIBB2_LIBS - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf "%s\n" "yes" >&6; } - - have_libb2=yes - -printf "%s\n" "#define HAVE_LIBB2 1" >>confdefs.h - - -fi - -fi - # Check whether to disable test modules. Once set, setup.py will not build # test extension modules and "make install" will not install test suites. { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --disable-test-modules" >&5 @@ -30340,7 +30255,7 @@ fi if test "x$py_cv_module__md5" = xyes then : - as_fn_append MODULE_BLOCK "MODULE__MD5_CFLAGS=-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE$as_nl" + fi @@ -30378,7 +30293,7 @@ fi if test "x$py_cv_module__sha1" = xyes then : - as_fn_append MODULE_BLOCK "MODULE__SHA1_CFLAGS=-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE$as_nl" + fi @@ -30416,7 +30331,7 @@ fi if test "x$py_cv_module__sha2" = xyes then : - as_fn_append MODULE_BLOCK "MODULE__SHA2_CFLAGS=-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE$as_nl" + fi @@ -30492,8 +30407,8 @@ fi if test "x$py_cv_module__blake2" = xyes then : - as_fn_append MODULE_BLOCK "MODULE__BLAKE2_CFLAGS=$LIBB2_CFLAGS$as_nl" - as_fn_append MODULE_BLOCK "MODULE__BLAKE2_LDFLAGS=$LIBB2_LIBS$as_nl" + + fi if test "$py_cv_module__blake2" = yes; then @@ -30508,6 +30423,102 @@ fi printf "%s\n" "$py_cv_module__blake2" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse -msse2 -msse3 -msse4.1 -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=yes +else $as_nop + ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" = xyes +then : + + LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" + LIBHACL_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o" + +printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h + + +else $as_nop + : +fi + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx2=yes +else $as_nop + ax_cv_check_cflags__Werror__mavx2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes +then : + + LIBHACL_SIMD256_FLAGS="-mavx2" + LIBHACL_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o" + +printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h + + +else $as_nop + : +fi + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for stdlib extension module _ctypes" >&5 printf %s "checking for stdlib extension module _ctypes... " >&6; } diff --git a/configure.ac b/configure.ac index 9a17fc279e5..6df55eabad7 100644 --- a/configure.ac +++ b/configure.ac @@ -7438,15 +7438,6 @@ for builtin_hash in $with_builtin_hashlib_hashes; do done IFS=$as_save_IFS -dnl libb2 for blake2. _blake2 module falls back to vendored copy. -AS_VAR_IF([with_builtin_blake2], [yes], [ - PKG_CHECK_MODULES([LIBB2], [libb2], [ - have_libb2=yes - AC_DEFINE([HAVE_LIBB2], [1], - [Define to 1 if you want to build _blake2 module with libb2]) - ], [have_libb2=no]) -]) - # Check whether to disable test modules. Once set, setup.py will not build # test extension modules and "make install" will not install test suites. AC_MSG_CHECKING([for --disable-test-modules]) @@ -7767,19 +7758,30 @@ PY_STDLIB_MOD_SIMPLE([unicodedata]) dnl By default we always compile these even when OpenSSL is available dnl (issue #14693). The modules are small. -PY_STDLIB_MOD([_md5], - [test "$with_builtin_md5" = yes], [], - [-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE]) -PY_STDLIB_MOD([_sha1], - [test "$with_builtin_sha1" = yes], [], - [-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE]) -PY_STDLIB_MOD([_sha2], - [test "$with_builtin_sha2" = yes], [], - [-I\$(srcdir)/Modules/_hacl/include -I\$(srcdir)/Modules/_hacl/internal -D_BSD_SOURCE -D_DEFAULT_SOURCE]) +PY_STDLIB_MOD([_md5], [test "$with_builtin_md5" = yes]) +PY_STDLIB_MOD([_sha1], [test "$with_builtin_sha1" = yes]) +PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) -PY_STDLIB_MOD([_blake2], - [test "$with_builtin_blake2" = yes], [], - [$LIBB2_CFLAGS], [$LIBB2_LIBS]) +PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) + +dnl This can be extended here to detect e.g. Power8, which HACL* should also support. +AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ + [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] + [LIBHACL_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o"] + AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) +], [], [-Werror]) + +AC_SUBST([LIBHACL_SIMD128_FLAGS]) +AC_SUBST([LIBHACL_SIMD128_OBJS]) + +AX_CHECK_COMPILE_FLAG([-mavx2],[ + [LIBHACL_SIMD256_FLAGS="-mavx2"] + [LIBHACL_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o"] + AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) +], [], [-Werror]) + +AC_SUBST([LIBHACL_SIMD256_FLAGS]) +AC_SUBST([LIBHACL_SIMD256_OBJS]) PY_STDLIB_MOD([_ctypes], [], [test "$have_libffi" = yes], diff --git a/pyconfig.h.in b/pyconfig.h.in index 8fbba7ed3b9..39978d11e8c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -54,6 +54,12 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG +/* HACL* library can compile SIMD128 implementations */ +#undef HACL_CAN_COMPILE_SIMD128 + +/* HACL* library can compile SIMD256 implementations */ +#undef HACL_CAN_COMPILE_SIMD256 + /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -670,9 +676,6 @@ /* Define to 1 if you have the `lchown' function. */ #undef HAVE_LCHOWN -/* Define to 1 if you want to build _blake2 module with libb2 */ -#undef HAVE_LIBB2 - /* Define to 1 if you have the `db' library (-ldb). */ #undef HAVE_LIBDB