From 528bbab96feadbfabb798547e5bb2ad52070fb73 Mon Sep 17 00:00:00 2001 From: Jonathan Protzenko Date: Thu, 17 Oct 2024 08:08:43 -0700 Subject: [PATCH] GH-99108: Make vectorized versions of Blake2 available on x86, too (#125244) Accomplished by updating HACL* vendored code from hacl-star/hacl-star@a6a09496d9cff652b567d26f2c3ab012321b632a to hacl-star/hacl-star@315a9e491d2bc347b9dae99e0ea506995ea84d9d Co-authored-by: Victor Stinner Co-authored-by: Zachary Ware --- Misc/sbom.spdx.json | 48 ++-- Modules/_hacl/Hacl_Hash_Blake2b.c | 260 +++++++++++------- Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c | 225 +++++++++------ Modules/_hacl/Hacl_Hash_Blake2s.c | 231 ++++++++++------ Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c | 196 ++++++++----- .../include/krml/FStar_UInt128_Verified.h | 2 +- .../include/krml/FStar_UInt_8_16_32_64.h | 2 +- .../krml/fstar_uint128_struct_endianness.h | 2 +- Modules/_hacl/include/krml/internal/target.h | 6 +- .../_hacl/include/krml/lowstar_endianness.h | 2 +- Modules/_hacl/libintvector.h | 2 +- Modules/_hacl/refresh.sh | 2 +- 12 files changed, 619 insertions(+), 359 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index f07ad9423d9..cc73e93009b 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -300,11 +300,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c96cba53034348537ac423a220803b06cd9f0a43" + "checksumValue": "a34e821b68ef5334eccf4f729b28bb7bb65b965e" }, { "algorithm": "SHA256", - "checksumValue": "9f4fb5c70678638cfd163cc990be1def356cf7b65b75faa4666db8c5f8593530" + "checksumValue": "4582db9143c0810b98838a5357c577e0b32ae77f3018486159df4e0dfd3fce3c" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" @@ -328,11 +328,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "e11e2d1771e56c0afbdb0673906898b3a67e0cc3" + "checksumValue": "0ffe60c6d5eed5dd222515e820d461d319d16b1f" }, { "algorithm": "SHA256", - "checksumValue": "d5bf29d995f7cb9861841b813aa01206664895a1c5aa166a4796785c02117bf4" + "checksumValue": "4804cb3ce68bfdcf98853d6f1d77b4a844a3c2796f776b39770ba327e400d402" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" @@ -370,11 +370,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "5422517af799cf74b194821fb2a1f39e3b02c54d" + "checksumValue": "cf035ffeff875bc74345a47373ce25dc408ea9dc" }, { "algorithm": "SHA256", - "checksumValue": "c66adab0259f2c2229e010cd635a982e8c2b8836e59e43e7867992d4148e4d9a" + "checksumValue": "579059b002c45fab0fed6381e85c3f5eaf1d959400ca64b103542ac6c35bade3" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" @@ -398,11 +398,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "7822db8e7c2f60dd64a18e112a1bc369e7f7a0ff" + "checksumValue": "9bb53022d158a9c349edb52a8def8aac7d098a4e" }, { "algorithm": "SHA256", - "checksumValue": "94b0cd3cf1f7385325ee878d2ef06affc8d6412af9302ca47d1aa6d858182050" + "checksumValue": "2abde0c6b5da0402e91b4bedfe786c24b908fbdc04e08e74651c7624729254d9" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" @@ -580,11 +580,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "12c0c680c93b8112b97cc575faacbb3cbbd315b1" + "checksumValue": "7665829b9396f72e7f8098080d6d6773565468e9" }, { "algorithm": "SHA256", - "checksumValue": "455e94f24a0900deda7e6e36f4714e4253d32cea077f97e23f90c569a717bc48" + "checksumValue": "ca7357ee70365c690664a44f6522e526636151d9ed2da8d0d29da15bb8556530" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt128_Verified.h" @@ -594,11 +594,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "62b44acbbdc77b749c36c242cda027bacf7679f8" + "checksumValue": "a2db924d0e8f7df3139e9a20355ffa520aded479" }, { "algorithm": "SHA256", - "checksumValue": "65decdb74c24049aa19430462a51219250cfc65d8c162778e42df88b3142fa42" + "checksumValue": "f1de79fb4c763b215c823f44471bbae6b65e6bb533eb52a5863d551d5e2e6748" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h" @@ -608,11 +608,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "1987119a563a8fdc5966286e274f716dbcea77ee" + "checksumValue": "7f23693151d5409623cbe886e5b45a0e4f0d3c72" }, { "algorithm": "SHA256", - "checksumValue": "fe57e1bc5ce3224d106e36cb8829b5399c63a68a70b0ccd0c91d82a4565c8869" + "checksumValue": "1c9bee7ac4b987c73cc3aba6b7ceed8ec7e75c9a741810e4411f35602490e0d8" } ], "fileName": "Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h" @@ -622,11 +622,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "81872ecdbd39b09cd813dee6e1dbed113a81aa4a" + "checksumValue": "9881567f43deb32bae77a84b2d349858a24b6685" }, { "algorithm": "SHA256", - "checksumValue": "1eef18295d412129007816fe65b7f15c0be8ad32840ef5e3dfaa5b67317e1b51" + "checksumValue": "3382156e32fcb376009177d3d2dc9712ff7c8c02afb97b3e16d98b41a2114f84" } ], "fileName": "Modules/_hacl/include/krml/internal/target.h" @@ -636,11 +636,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "964e09bd99ff2366afd6193b59863fc925e7fb05" + "checksumValue": "e18efc9239a5df0f222b5f7b0a65f72509d7e304" }, { "algorithm": "SHA256", - "checksumValue": "3734c7942bec9a434e16df069fa45bdcb84b130f14417bc5f7bfe8546272d9f5" + "checksumValue": "47dd5a7d21b5302255f9fff28884f65d3056fc3f54471ed62ec85fa1904f8aa5" } ], "fileName": "Modules/_hacl/include/krml/lowstar_endianness.h" @@ -804,11 +804,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "d5d85ee8f0bd52781fe470d0bf73ec388ddb3999" + "checksumValue": "f4a33ad535768b860362ab0bd033a70da0b524b7" }, { "algorithm": "SHA256", - "checksumValue": "9a421b998add98fe366374641c4edb27617ff539a59f0963879f345065d3d39d" + "checksumValue": "433cdf4ba80bc72e0cea5d4b420ff18676baeafdb5ba19adf5b7fb33e90b424b" } ], "fileName": "Modules/_hacl/libintvector.h" @@ -1640,14 +1640,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "988a74f5fbb59baca2d54e41447997ada92f4ebc59888dfb717438013f859117" + "checksumValue": "935ae51d0ff0bf1403f0ecc1ff02b8f685d09053618558c07fbe4bd2abbc5dd1" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/a6a09496d9cff652b567d26f2c3ab012321b632a.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/315a9e491d2bc347b9dae99e0ea506995ea84d9d.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:a6a09496d9cff652b567d26f2c3ab012321b632a:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:315a9e491d2bc347b9dae99e0ea506995ea84d9d:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1655,7 +1655,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "a6a09496d9cff652b567d26f2c3ab012321b632a" + "versionInfo": "315a9e491d2bc347b9dae99e0ea506995ea84d9d" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index e13f16fd971..cd3b9777e09 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -575,86 +575,6 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint64_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - uint64_t *r0 = hash; - uint64_t *r1 = hash + 4U; - uint64_t *r2 = hash + 8U; - uint64_t *r3 = hash + 12U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint64_t *wv, uint64_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { FStar_UInt128_uint128 lb = FStar_UInt128_uint64_to_uint128((uint64_t)128U); @@ -811,16 +731,92 @@ static Hacl_Hash_Blake2b_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -918,16 +914,92 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + uint64_t *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + uint64_t *r0 = h; + uint64_t *r1 = h + 4U; + uint64_t *r2 = h + 8U; + uint64_t *r3 = h + 12U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -939,8 +1011,8 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2b_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index 35608aea71a..92b2e8f5390 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -298,75 +298,6 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); } -static void -init_with_params(Lib_IntVector_Intrinsics_vec256 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint64_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec256 *r0 = hash; - Lib_IntVector_Intrinsics_vec256 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec256 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec256 *r3 = hash + 3U; - uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; - uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; - uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; - uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; - uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; - uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; - uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; - uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); - uint8_t kk = p.key_length; - uint8_t nn = p.digest_length; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint64_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x;); - tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); - tmp[1U] = p.node_offset; - tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; - tmp[3U] = 0ULL; - uint64_t tmp0 = tmp[0U]; - uint64_t tmp1 = tmp[1U]; - uint64_t tmp2 = tmp[2U]; - uint64_t tmp3 = tmp[3U]; - uint64_t tmp4 = tmp[4U]; - uint64_t tmp5 = tmp[5U]; - uint64_t tmp6 = tmp[6U]; - uint64_t tmp7 = tmp[7U]; - uint64_t iv0_ = iv0 ^ tmp0; - uint64_t iv1_ = iv1 ^ tmp1; - uint64_t iv2_ = iv2 ^ tmp2; - uint64_t iv3_ = iv3 ^ tmp3; - uint64_t iv4_ = iv4 ^ tmp4; - uint64_t iv5_ = iv5 ^ tmp5; - uint64_t iv6_ = iv6 ^ tmp6; - uint64_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); -} - static void update_key( Lib_IntVector_Intrinsics_vec256 *wv, @@ -647,16 +578,80 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r4 = u; + uint64_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -757,16 +752,80 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; - uint32_t kk2 = (uint32_t)i1.key_length; + Lib_IntVector_Intrinsics_vec256 *h = block_state.f3.snd; + uint32_t kk20 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; - if (!(kk2 == 0U)) + if (!(kk20 == 0U)) { - uint8_t *sub_b = buf + kk2; - memset(sub_b, 0U, (128U - kk2) * sizeof (uint8_t)); - memcpy(buf, k_1, kk2 * sizeof (uint8_t)); + uint8_t *sub_b = buf + kk20; + memset(sub_b, 0U, (128U - kk20) * sizeof (uint8_t)); + memcpy(buf, k_1, kk20 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint64_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec256 *r0 = h; + Lib_IntVector_Intrinsics_vec256 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec256 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec256 *r3 = h + 3U; + uint64_t iv0 = Hacl_Hash_Blake2b_ivTable_B[0U]; + uint64_t iv1 = Hacl_Hash_Blake2b_ivTable_B[1U]; + uint64_t iv2 = Hacl_Hash_Blake2b_ivTable_B[2U]; + uint64_t iv3 = Hacl_Hash_Blake2b_ivTable_B[3U]; + uint64_t iv4 = Hacl_Hash_Blake2b_ivTable_B[4U]; + uint64_t iv5 = Hacl_Hash_Blake2b_ivTable_B[5U]; + uint64_t iv6 = Hacl_Hash_Blake2b_ivTable_B[6U]; + uint64_t iv7 = Hacl_Hash_Blake2b_ivTable_B[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4, iv5, iv6, iv7); + uint8_t kk2 = pv.key_length; + uint8_t nn1 = pv.digest_length; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint64_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i0] = x;); + tmp[0U] = + (uint64_t)nn1 + ^ + ((uint64_t)kk2 + << 8U + ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + tmp[1U] = pv.node_offset; + tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; + tmp[3U] = 0ULL; + uint64_t tmp0 = tmp[0U]; + uint64_t tmp1 = tmp[1U]; + uint64_t tmp2 = tmp[2U]; + uint64_t tmp3 = tmp[3U]; + uint64_t tmp4 = tmp[4U]; + uint64_t tmp5 = tmp[5U]; + uint64_t tmp6 = tmp[6U]; + uint64_t tmp7 = tmp[7U]; + uint64_t iv0_ = iv0 ^ tmp0; + uint64_t iv1_ = iv1 ^ tmp1; + uint64_t iv2_ = iv2 ^ tmp2; + uint64_t iv3_ = iv3 ^ tmp3; + uint64_t iv4_ = iv4 ^ tmp4; + uint64_t iv5_ = iv5 ^ tmp5; + uint64_t iv6_ = iv6 ^ tmp6; + uint64_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec256_load64s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -778,8 +837,8 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2b_Simd256_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 167f38fbd1c..e5e0ecd0bfd 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -573,83 +573,6 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) r1[3U] = iv7_; } -static void init_with_params(uint32_t *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - uint32_t *r0 = hash; - uint32_t *r1 = hash + 4U; - uint32_t *r2 = hash + 8U; - uint32_t *r3 = hash + 12U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = iv0; - r2[1U] = iv1; - r2[2U] = iv2; - r2[3U] = iv3; - r3[0U] = iv4; - r3[1U] = iv5; - r3[2U] = iv6; - r3[3U] = iv7; - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = iv0_; - r0[1U] = iv1_; - r0[2U] = iv2_; - r0[3U] = iv3_; - r1[0U] = iv4_; - r1[1U] = iv5_; - r1[2U] = iv6_; - r1[3U] = iv7_; -} - static void update_key(uint32_t *wv, uint32_t *hash, uint32_t kk, uint8_t *k, uint32_t ll) { uint64_t lb = (uint64_t)64U; @@ -796,6 +719,7 @@ static Hacl_Hash_Blake2s_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -805,7 +729,79 @@ static Hacl_Hash_Blake2s_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; return p; } @@ -903,6 +899,7 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + uint32_t *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -912,7 +909,79 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + uint32_t *r0 = h; + uint32_t *r1 = h + 4U; + uint32_t *r2 = h + 8U; + uint32_t *r3 = h + 12U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = iv0; + r2[1U] = iv1; + r2[2U] = iv2; + r2[3U] = iv3; + r3[0U] = iv4; + r3[1U] = iv5; + r3[2U] = iv6; + r3[3U] = iv7; + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = iv0_; + r0[1U] = iv1_; + r0[2U] = iv2_; + r0[3U] = iv3_; + r1[0U] = iv4_; + r1[1U] = iv5_; + r1[2U] = iv6_; + r1[3U] = iv7_; uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -924,8 +993,8 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params ite = 0U; } Hacl_Hash_Blake2s_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index a85b18a4d29..f675a7f14f1 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -295,72 +295,6 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); } -static void -init_with_params(Lib_IntVector_Intrinsics_vec128 *hash, Hacl_Hash_Blake2b_blake2_params p) -{ - uint32_t tmp[8U] = { 0U }; - Lib_IntVector_Intrinsics_vec128 *r0 = hash; - Lib_IntVector_Intrinsics_vec128 *r1 = hash + 1U; - Lib_IntVector_Intrinsics_vec128 *r2 = hash + 2U; - Lib_IntVector_Intrinsics_vec128 *r3 = hash + 3U; - uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; - uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; - uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; - uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; - uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; - uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; - uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; - uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; - r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); - r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 4U; - uint8_t *bj = p.salt + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - KRML_MAYBE_FOR2(i, - 0U, - 2U, - 1U, - uint32_t *os = tmp + 6U; - uint8_t *bj = p.personal + i * 4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x;); - tmp[0U] = - (uint32_t)p.digest_length - ^ ((uint32_t)p.key_length << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); - tmp[1U] = p.leaf_length; - tmp[2U] = (uint32_t)p.node_offset; - tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); - uint32_t tmp0 = tmp[0U]; - uint32_t tmp1 = tmp[1U]; - uint32_t tmp2 = tmp[2U]; - uint32_t tmp3 = tmp[3U]; - uint32_t tmp4 = tmp[4U]; - uint32_t tmp5 = tmp[5U]; - uint32_t tmp6 = tmp[6U]; - uint32_t tmp7 = tmp[7U]; - uint32_t iv0_ = iv0 ^ tmp0; - uint32_t iv1_ = iv1 ^ tmp1; - uint32_t iv2_ = iv2 ^ tmp2; - uint32_t iv3_ = iv3 ^ tmp3; - uint32_t iv4_ = iv4 ^ tmp4; - uint32_t iv5_ = iv5 ^ tmp5; - uint32_t iv6_ = iv6 ^ tmp6; - uint32_t iv7_ = iv7 ^ tmp7; - r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); - r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); -} - static void update_key( Lib_IntVector_Intrinsics_vec128 *wv, @@ -637,6 +571,7 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint8_t nn = p1->digest_length; bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -646,7 +581,67 @@ static Hacl_Hash_Blake2s_Simd128_state_t memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p1[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r4 = u; + uint32_t x = r4; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); return p; } @@ -747,6 +742,7 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and bool last_node = block_state.thd; Hacl_Hash_Blake2b_index i1 = { .key_length = kk1, .digest_length = nn, .last_node = last_node }; + Lib_IntVector_Intrinsics_vec128 *h = block_state.f3.snd; uint32_t kk2 = (uint32_t)i1.key_length; uint8_t *k_1 = key.snd; if (!(kk2 == 0U)) @@ -756,7 +752,67 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and memcpy(buf, k_1, kk2 * sizeof (uint8_t)); } Hacl_Hash_Blake2b_blake2_params pv = p[0U]; - init_with_params(block_state.f3.snd, pv); + uint32_t tmp[8U] = { 0U }; + Lib_IntVector_Intrinsics_vec128 *r0 = h; + Lib_IntVector_Intrinsics_vec128 *r1 = h + 1U; + Lib_IntVector_Intrinsics_vec128 *r2 = h + 2U; + Lib_IntVector_Intrinsics_vec128 *r3 = h + 3U; + uint32_t iv0 = Hacl_Hash_Blake2b_ivTable_S[0U]; + uint32_t iv1 = Hacl_Hash_Blake2b_ivTable_S[1U]; + uint32_t iv2 = Hacl_Hash_Blake2b_ivTable_S[2U]; + uint32_t iv3 = Hacl_Hash_Blake2b_ivTable_S[3U]; + uint32_t iv4 = Hacl_Hash_Blake2b_ivTable_S[4U]; + uint32_t iv5 = Hacl_Hash_Blake2b_ivTable_S[5U]; + uint32_t iv6 = Hacl_Hash_Blake2b_ivTable_S[6U]; + uint32_t iv7 = Hacl_Hash_Blake2b_ivTable_S[7U]; + r2[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0, iv1, iv2, iv3); + r3[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4, iv5, iv6, iv7); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 4U; + uint8_t *bj = pv.salt + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + KRML_MAYBE_FOR2(i0, + 0U, + 2U, + 1U, + uint32_t *os = tmp + 6U; + uint8_t *bj = pv.personal + i0 * 4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i0] = x;); + tmp[0U] = + (uint32_t)pv.digest_length + ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + tmp[1U] = pv.leaf_length; + tmp[2U] = (uint32_t)pv.node_offset; + tmp[3U] = + (uint32_t)(pv.node_offset >> 32U) + ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + uint32_t tmp0 = tmp[0U]; + uint32_t tmp1 = tmp[1U]; + uint32_t tmp2 = tmp[2U]; + uint32_t tmp3 = tmp[3U]; + uint32_t tmp4 = tmp[4U]; + uint32_t tmp5 = tmp[5U]; + uint32_t tmp6 = tmp[6U]; + uint32_t tmp7 = tmp[7U]; + uint32_t iv0_ = iv0 ^ tmp0; + uint32_t iv1_ = iv1 ^ tmp1; + uint32_t iv2_ = iv2 ^ tmp2; + uint32_t iv3_ = iv3 ^ tmp3; + uint32_t iv4_ = iv4 ^ tmp4; + uint32_t iv5_ = iv5 ^ tmp5; + uint32_t iv6_ = iv6 ^ tmp6; + uint32_t iv7_ = iv7 ^ tmp7; + r0[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv0_, iv1_, iv2_, iv3_); + r1[0U] = Lib_IntVector_Intrinsics_vec128_load32s(iv4_, iv5_, iv6_, iv7_); uint8_t kk11 = i.key_length; uint32_t ite; if (kk11 != 0U) @@ -768,8 +824,8 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and ite = 0U; } Hacl_Hash_Blake2s_Simd128_state_t - tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; - state[0U] = tmp; + tmp8 = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)ite }; + state[0U] = tmp8; } /** diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index bdf25898f2b..659745b2426 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h index 1bdec972a2f..68bac0b3f0a 100644 --- a/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h +++ b/Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h @@ -1,6 +1,6 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. + Licensed under the Apache 2.0 and MIT Licenses. */ diff --git a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h index e2b6d62859a..bb736add318 100644 --- a/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h +++ b/Modules/_hacl/include/krml/fstar_uint128_struct_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H #define FSTAR_UINT128_STRUCT_ENDIANNESS_H diff --git a/Modules/_hacl/include/krml/internal/target.h b/Modules/_hacl/include/krml/internal/target.h index 292adc14235..fd74d3da684 100644 --- a/Modules/_hacl/include/krml/internal/target.h +++ b/Modules/_hacl/include/krml/internal/target.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __KRML_TARGET_H #define __KRML_TARGET_H @@ -82,6 +82,8 @@ # define KRML_NOINLINE __declspec(noinline) # elif defined (__GNUC__) # define KRML_NOINLINE __attribute__((noinline,unused)) +# elif defined (__SUNPRO_C) +# define KRML_NOINLINE __attribute__((noinline)) # else # define KRML_NOINLINE # warning "The KRML_NOINLINE macro is not defined for this toolchain!" @@ -95,6 +97,8 @@ # define KRML_MUSTINLINE inline __forceinline # elif defined (__GNUC__) # define KRML_MUSTINLINE inline __attribute__((always_inline)) +# elif defined (__SUNPRO_C) +# define KRML_MUSTINLINE inline __attribute__((always_inline)) # else # define KRML_MUSTINLINE inline # warning "The KRML_MUSTINLINE macro defaults to plain inline for this toolchain!" diff --git a/Modules/_hacl/include/krml/lowstar_endianness.h b/Modules/_hacl/include/krml/lowstar_endianness.h index 1aa2ccd644c..af6b882cf25 100644 --- a/Modules/_hacl/include/krml/lowstar_endianness.h +++ b/Modules/_hacl/include/krml/lowstar_endianness.h @@ -1,5 +1,5 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ + Licensed under the Apache 2.0 and MIT Licenses. */ #ifndef __LOWSTAR_ENDIANNESS_H #define __LOWSTAR_ENDIANNESS_H diff --git a/Modules/_hacl/libintvector.h b/Modules/_hacl/libintvector.h index 99d11336942..11e914f7e16 100644 --- a/Modules/_hacl/libintvector.h +++ b/Modules/_hacl/libintvector.h @@ -19,7 +19,7 @@ #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x) & 1) -#if defined(__x86_64__) || defined(_M_X64) +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) #if defined(HACL_CAN_COMPILE_VEC128) diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index 44e18a15f96..6234fea9f17 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=a6a09496d9cff652b567d26f2c3ab012321b632a +expected_hacl_star_rev=315a9e491d2bc347b9dae99e0ea506995ea84d9d hacl_dir="$(realpath "$1")" cd "$(dirname "$0")"