libbb/yescrypt: more efficient byteswapping

function old new delta static.smix1 631 604 -27 static.smix2 452 420 -32 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-59) Total: -59 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2025-07-06 18:27:16 +0200
committer: Denys Vlasenko <vda.linux@googlemail.com> 2025-07-06 18:27:16 +0200
commit: b8f76001662ad4b3073945589a0cb270b627c994 (patch)
tree: 2e2c11206cd48e9b56b760846e0f259c965e9fc1 /libbb/yescrypt
parent: 9c4cd75d12894cd9139d549dfe445f18572ee7d1 (diff)
3 files changed, 8 insertions, 132 deletions
diff --git a/libbb/yescrypt/alg-sha256.c b/libbb/yescrypt/alg-sha256.c
index a17028b6b..28d8c5296 100644
--- a/libbb/yescrypt/alg-sha256.c
+++ b/libbb/yescrypt/alg-sha256.c
@@ -114,16 +114,12 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen,
 {
 	HMAC_SHA256_CTX Phctx, PShctx, hctx;
 	size_t i;
-	uint8_t ivec[4];
 	uint8_t U[32];
 	uint8_t T[32];
 	uint64_t j;
 	int k;
 	size_t clen;
 
-	/* Sanity-check. */
-	assert(dkLen <= 32 * (size_t)(UINT32_MAX));
-
 	/* Compute HMAC state after processing P. */
 	HMAC_SHA256_Init(&Phctx, passwd, passwdlen);
 
@@ -133,12 +129,14 @@ PBKDF2_SHA256(const uint8_t *passwd, size_t passwdlen,
 
 	/* Iterate through the blocks. */
 	for (i = 0; i * 32 < dkLen; i++) {
+		uint32_t ivec;
+
 		/* Generate INT(i + 1). */
-		be32enc(ivec, (uint32_t)(i + 1));
+		ivec = SWAP_BE32((uint32_t)(i + 1));
 
 		/* Compute U_1 = PRF(P, S || INT(i)). */
 		memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
-		HMAC_SHA256_Update(&hctx, ivec, 4);
+		HMAC_SHA256_Update(&hctx, &ivec, 4);
 		HMAC_SHA256_Final(&hctx, T);
 
 		if (c > 1) {
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
index 5c1f1006a..ee8fb408e 100644
--- a/libbb/yescrypt/alg-yescrypt-kdf.c
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -501,7 +501,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags,
 		salsa20_blk_t *dst = &X[i];
 		size_t k;
 		for (k = 0; k < 16; k++)
-			tmp->w[k] = le32dec((const uint8_t *) &src->w[k]);
+			tmp->w[k] = SWAP_LE32(src->w[k]);
 		salsa20_simd_shuffle(tmp, dst);
 	}
 
@@ -591,7 +591,7 @@ static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags,
 		salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
 		size_t k;
 		for (k = 0; k < 16; k++)
-			le32enc((uint8_t *)&tmp->w[k], src->w[k]);
+			tmp->w[k] = SWAP_LE32(src->w[k]);
 		salsa20_simd_unshuffle(tmp, dst);
 	}
 }
@@ -621,7 +621,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop,
 		salsa20_blk_t *dst = &X[i];
 		size_t k;
 		for (k = 0; k < 16; k++)
-			tmp->w[k] = le32dec((const uint8_t *)&src->w[k]);
+			tmp->w[k] = SWAP_LE32(src->w[k]);
 		salsa20_simd_shuffle(tmp, dst);
 	}
 
@@ -676,7 +676,7 @@ static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop,
 		salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64];
 		size_t k;
 		for (k = 0; k < 16; k++)
-			le32enc((uint8_t *)&tmp->w[k], src->w[k]);
+			tmp->w[k] = SWAP_LE32(src->w[k]);
 		salsa20_simd_unshuffle(tmp, dst);
 	}
 }
diff --git a/libbb/yescrypt/y.c b/libbb/yescrypt/y.c
index 2c6afd4f8..92c6eb7a8 100644
--- a/libbb/yescrypt/y.c
+++ b/libbb/yescrypt/y.c
@@ -2,128 +2,6 @@
 
 #include <libbb.h>
 
-#include <assert.h>
-
-static inline void
-cpu_to_le32 (unsigned char *buf, uint32_t n)
-{
-  buf[0] = (unsigned char)((n & 0x000000FFu) >>  0);
-  buf[1] = (unsigned char)((n & 0x0000FF00u) >>  8);
-  buf[2] = (unsigned char)((n & 0x00FF0000u) >> 16);
-  buf[3] = (unsigned char)((n & 0xFF000000u) >> 24);
-}
-static inline uint32_t
-le32_to_cpu (const unsigned char *buf)
-{
-  return ((((uint32_t)buf[0]) <<  0) |
-          (((uint32_t)buf[1]) <<  8) |
-          (((uint32_t)buf[2]) << 16) |
-          (((uint32_t)buf[3]) << 24) );
-}
-
-/* Alternative names used in code derived from Colin Percival's
-   cryptography libraries.  */
-#define le32enc cpu_to_le32
-#define le32dec le32_to_cpu
-#define le64enc cpu_to_le64
-#define le64dec le64_to_cpu
-
-#define be32enc cpu_to_be32
-#define be32dec be32_to_cpu
-#define be64enc cpu_to_be64
-#define be64dec be64_to_cpu
-
-#define be32enc_vect cpu_to_be32_vect
-#define be32dec_vect be32_to_cpu_vect
-#define be64enc_vect cpu_to_be64_vect
-#define be64dec_vect be64_to_cpu_vect
-
-
-//USED ONY BY SHA256 for be32_to_cpu_vect():
-static inline void
-cpu_to_be32(unsigned char *buf, uint32_t n)
-{
-  buf[0] = (unsigned char)((n & 0xFF000000u) >> 24);
-  buf[1] = (unsigned char)((n & 0x00FF0000u) >> 16);
-  buf[2] = (unsigned char)((n & 0x0000FF00u) >>  8);
-  buf[3] = (unsigned char)((n & 0x000000FFu) >>  0);
-}
-static inline void
-cpu_to_be64 (unsigned char *buf, uint64_t n)
-{
-  buf[0] = (unsigned char)((n & 0xFF00000000000000ull) >> 56);
-  buf[1] = (unsigned char)((n & 0x00FF000000000000ull) >> 48);
-  buf[2] = (unsigned char)((n & 0x0000FF0000000000ull) >> 40);
-  buf[3] = (unsigned char)((n & 0x000000FF00000000ull) >> 32);
-  buf[4] = (unsigned char)((n & 0x00000000FF000000ull) >> 24);
-  buf[5] = (unsigned char)((n & 0x0000000000FF0000ull) >> 16);
-  buf[6] = (unsigned char)((n & 0x000000000000FF00ull) >>  8);
-  buf[7] = (unsigned char)((n & 0x00000000000000FFull) >>  0);
-}
-static inline uint32_t
-be32_to_cpu (const unsigned char *buf)
-{
-  return ((((uint32_t)buf[0]) << 24) |
-          (((uint32_t)buf[1]) << 16) |
-          (((uint32_t)buf[2]) <<  8) |
-          (((uint32_t)buf[3]) <<  0) );
-}
-static inline uint64_t
-be64_to_cpu (const unsigned char *buf)
-{
-  return ((((uint64_t)buf[0]) << 56) |
-          (((uint64_t)buf[1]) << 48) |
-          (((uint64_t)buf[2]) << 40) |
-          (((uint64_t)buf[3]) << 32) |
-          (((uint64_t)buf[4]) << 24) |
-          (((uint64_t)buf[5]) << 16) |
-          (((uint64_t)buf[6]) <<  8) |
-          (((uint64_t)buf[7]) <<  0) );
-}
-/* Template: Define a function named cpu_to_<END><BITS>_vect that
-   takes a vector SRC of LEN integers, each of type uint<BITS>_t, and
-   writes them to the buffer DST in the endianness defined by END.
-   Caution: LEN is the number of vector elements, not the total size
-   of the buffers.  */
-#define VECTOR_CPU_TO(end, bits) VECTOR_CPU_TO_(end##bits, uint##bits##_t)
-#define VECTOR_CPU_TO_(prim, stype)                                     \
-  static inline void                                                    \
-  cpu_to_##prim##_vect(uint8_t *dst, const stype *src, size_t len)      \
-  {                                                                     \
-    while (len)                                                         \
-      {                                                                 \
-        cpu_to_##prim(dst, *src);                                       \
-        src += 1;                                                       \
-        dst += sizeof(stype);                                           \
-        len -= 1;                                                       \
-      }                                                                 \
-  } struct _swallow_semicolon
-/* Template: Define a function named <END><BITS>_to_cpu_vect that
-   reads a vector of LEN integers, each of type uint<BITS>_t, from the
-   buffer SRC, in the endianness defined by END, and writes them to
-   the vector DST.  Caution: LEN is the number of vector elements, not
-   the total size of the buffers.  */
-#define VECTOR_TO_CPU(end, bits) VECTOR_TO_CPU_(end##bits, uint##bits##_t)
-#define VECTOR_TO_CPU_(prim, dtype)                                     \
-  static inline void                                                    \
-  prim##_to_cpu_vect(dtype *dst, const uint8_t *src, size_t len)        \
-  {                                                                     \
-    while (len)                                                         \
-      {                                                                 \
-        *dst = prim##_to_cpu(src);                                      \
-        src += sizeof(dtype);                                           \
-        dst += 1;                                                       \
-        len -= 1;                                                       \
-      }                                                                 \
-  } struct _swallow_semicolon
-/* These are the vectorized endianness-conversion functions that are
-   presently used.  Add more as necessary.  */
-VECTOR_CPU_TO(be,32);
-VECTOR_CPU_TO(be,64);
-VECTOR_TO_CPU(be,32);
-VECTOR_TO_CPU(be,64);
-
-
 #define YESCRYPT_INTERNAL
 #include "alg-sha256.h"
 #include "alg-yescrypt.h"
author	Denys Vlasenko <vda.linux@googlemail.com>	2025-07-06 18:27:16 +0200
committer	Denys Vlasenko <vda.linux@googlemail.com>	2025-07-06 18:27:16 +0200
commit	b8f76001662ad4b3073945589a0cb270b627c994 (patch)
tree	2e2c11206cd48e9b56b760846e0f259c965e9fc1 /libbb/yescrypt
parent	9c4cd75d12894cd9139d549dfe445f18572ee7d1 (diff)