[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

fast hash



Since there's some interest, I'd like to contribute the following code
for comment.  It was an exercise I set myself after timing MD5 on a
64-bit architecture and playing around with optimizing the byte
loading.  First I modified the algorithm to use ALU operations on
64-bits at a time, so the constants became 64 bits long, etc.  Then, I
began removing computation from the MD5Transform a little at a time,
trying to get it to run fast.  Most of it disappeared.  Then I tried
adding in more operations, looking for ways to mix the data in quickly.
The result is something that is only vaguely related to the original
algorithm.  It runs at 400 Mbps on a 175 MHz DEC Alpha if the byte order
is not important.  Half that speed if the bytes must be rearranged
(code for this is not provided in this note).

I've got no idea how well this resists cryptanalysis, and I realize
that answering that question is probably much harder than coming up
with the algorithm.  It has the basic property of each output bit
being statistically sensitive to each input bit.  I present it in the
hope that it might provoke some discussion about design or analysis
methods for a class of algorithms that aspire to sit on the very brink
of insecurity in exchange for acceptable speed.

=======================C Code below this line============================

typedef unsigned long int UINT8;

/* Constants for HKOTransform routine.
 */
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21

#define U11 29
#define U12 41
#define U13 39
#define U14 61
#define U21 14
#define U22 23
#define U23 34
#define U24 54
#define U31 15
#define U32 26
#define U33 39
#define U34 62
#define U41 16
#define U42 26
#define U43 36
#define U44 57

/* ROTATE_LEFT rotates x left n bits.
 */
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (64-(n))))

/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.

Rotation is separate from addition to prevent recomputation.
 */
#define FF(a, y, z1, z2, z3, z4, b, c, d, x, s, t, u, ac) { \
 (a) ^= y; \
 (a) += F ((b), (c), (d)) + ROTATE_LEFT(x, u) + (UINT8)(ac); \
 (a) = ROTATE_LEFT ((a), (s)) + (b); \
 (d) += (b) ^ (~(z1)) + ~(ROTATE_LEFT ((a), (t))); \
 tmp = (a) ^ (x); \
 (c) -= (d) ^ (~(z2)) ^ ~((z4) + (ROTATE_LEFT (tmp, (t+11)))); \
 tmp = (a) ^ (d); \
 (b) ^= (c) + (~(z3)) + ~(ROTATE_LEFT (tmp, (t+23))); \
  }
#define GG(a, y, z1, z2, z3, z4, b, c, d, x, s, t, u, ac) { \
 (a) ^= y; \
 (a) += G ((b), (c), (d)) + ROTATE_LEFT(x, u) + (UINT8)(ac); \
 (a) = ROTATE_LEFT ((a), (s)) + (b); \
 (d) ^= (b) + ((z1) ^ ((z4) + ~(ROTATE_LEFT ((a), (t))))); \
 tmp = (a) ^ (x); \
 (c) += (d) ^ (z2) ^ ~(ROTATE_LEFT (tmp, (t+11))); \
 tmp = (a) ^ (d); \
 (b) -= (c) ^ (z3) ^ ~(ROTATE_LEFT (tmp, (t+23))); \
  }
#define HH(a, y, z1, z2, z3, z4, b, c, d, x, s, t, u, ac) { \
 (a) ^= y; \
 (a) += H ((b), (c), (d)) + ROTATE_LEFT(x, u) + (UINT8)(ac); \
 (a) = ROTATE_LEFT ((a), (s)) + (b); \
 (d) -= (b) ^ ((~(z1)) + ((z4) ^ ~(ROTATE_LEFT ((a), (t))))); \
 tmp = (a) ^ (x); \
 (c) ^= (d) + (~(z2)) + ~(ROTATE_LEFT (tmp, (t+11))); \
 tmp = (a) ^ (d); \
 (b) += (c) ^ (~(z3)) ^ ~(ROTATE_LEFT (tmp, (t+23))); \
  }
#define II(a, y, z1, z2, z3, z4, b, c, d, x, s, t, u, ac) { \
 (a) ^= y; \
 (a) += I ((b), (c), (d)) +  ROTATE_LEFT(x, u) + (UINT8)(ac); \
 (a) = ROTATE_LEFT ((a), (s)) + (b); \
 (d) ^= (~b) + (z1) + ((z4) ^ ~(ROTATE_LEFT ((a), (t)))); \
 tmp = (a) ^ (x); \
 (c) += (~d) ^ (z2) ^ ~(ROTATE_LEFT (tmp, (t+11))); \
 tmp = (a) ^ (d); \
 (b) -= (~c) ^ (z3) ^ ~(ROTATE_LEFT (tmp, (t+23))); \
  }


static void HKOTransform (state, block)
UINT4 state[4];
unsigned char block[64];
{
  register UINT8 x01, x23, x45, x67, x89, x1011, x1213, x1415;
  register UINT8 a = state[0], b = state[1], c = state[2], d = state[3];
  register UINT8 tmp;

  x01 = *(UINT8 *) ( &block[0]    );
  x23 = *(UINT8 *) (&block[8]);
  x45 = *(UINT8 *) (&block[16]);
  x67 = *(UINT8 *) (&block[24]);
  x89 = *(UINT8 *) (&block[32]);
  x1011 = *(UINT8 *) (&block[40]);
  x1213 = *(UINT8 *) (&block[48]);
  x1415 = *(UINT8 *) (&block[56]);

  /* Round 1 */
  FF (a, x89, x23, x67, x45, x1011, b, c, d, x01, S11, S12, U11, 0xd76aa478e8c7b756UL); /* 1 */

 /* Round 2 */

  GG (b, x45, x01, x23, x1415, x89, c, d, a, x1213, S21, S22, U21, 0xd62f105d2441453UL); /* 21 */

  /* Round 3 */
  HH (c, x1011, x67, x1415, x1213, x01, d, a, b, x23, S33, S34, U32, 0x6d9d6122fde5380cUL); /* 35 */

  /* Round 4 */

  II (d, x67, x45, x89, x1011, x67, a, b, c, x1415, S43, S44, U42, 0xffeff47d85845dd1UL); /* 55 */

  state[0] += (a ^ (b>>32));
  state[1] += (b ^ (c>>32));
  state[2] += (c ^ (d>>32));
  state[3] += (d ^ (a>>32));
}