Snippets Project Page
Author: maroon
Added: 6y
Updated: Never
mIRC: used 6.35 & 7.52
Hits: 1,427
Downloads: 8
Review: entropy
Size: 11.6KB
1 0
Login to vote.
Hash functions: lookup3 & One-at-a-Time
v1.0
Non-Crypto Hash Functions - Bob Jenkins' "lookup3" and 2 ways for "One at a Time" to handle unicode 128+ characters.
Not much to say that isn't in comments in the script.
Download
JSON
▲ Review
▼ Source
; $lookup3() $oat() and $oat2() are hashes designed to avoid collisions and avalanche the message bits across the hash ; If you need a 1-way crypto hash, try using a hash from the SHA2 family, such as $sha256() ; ; Bob Jenkins' non-crypto hashlittle2 by maroon, from http://burtleburtle.net/bob/c/lookup3.c (little means little-endian) ; This hash is designed for languages supporting circular rotate, so is slightly cumbersome in mSL ; Appears intended for long strings or needing hashes with 64 bits, but has slightly better qualities than $oat(). ; $OAT should be faster than $LOOKUP3, but benchmark test shows that in mSL it's actually slower for a 12-byte string ; If speed is important and 32-bit hash is long enough, consider Bob Jenkins' "one at a time" hash in $oat() or $oat2() ; Syntax: $lookup3(text string [[[,bits][h] ,pc] ,pb] ) ; Syntax: $lookup3(&binvar , b[bits][h] [[,pc] ,pb] ) ; $2 is for optional parameters for the hash output ; bits = 1-64 (default=32 when not used) length of hash returned ; b = $1 is a &binary variable not a string beginning with ampersand ; h = output should be hexadecimal not numeric. Output isn't padded with 0's unless enough leftmost bits of hash are 0. ; //bset -t &v 1 abc | echo -a $lookup3(&v,b40) same as $lookup3(abc,40) $base($lookup3(abc,40),10,16) $lookup3(&v,40bh) $lookup3(&v,32bh) ; $3 = *pc and/or $4 = *pb. Both would be 32bit numbers 0-4294967295 to 'key' or 'seed' the hash. ; If you have only 1 value, best to use it as pc in $3 not pb in $4 ; //echo -a $lookup3(abc,64h) $lookup3(abc,h) $lookup3(abc,h,12345678) $lookup3(abc,h,12345678,901234567) ; ; hash lengths above 32 obtained by returning all/part of %b as bits 33-64. ; bits 33-53 returns ((lowest N-32 bits of %b) multiplied by 2^32) plus %c ; mIRC's math is inaccurate above 2^53, so it uses my $bigint_mul() and $bigint_add() snippets to calculate N=54-64 bits. ; alternatively, you can request the hash as hexadecimal, which doesn't require math above 2^32 ; ; Instead of extra code to handle lengths not exact multiple of 12, padded the input with 12 0x00's after obtaining the length. ; Note that initial values are altered by $1's length, preventing additional 0x00's at end of string producing an identical hash alias lookup3 { if (b isin $2) { if (!$bvar($1,0)) return | var %bits $remove($2,b) | bcopy -tc &lookup3_k 1 $1 1 -1 } else { var %bits $2 | bset -tc &lookup3_k 1 $1 } ;a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; ;c += *pb; (don't need mod2^32 here because next is addition followed by mod2^32) var %len = $bvar(&lookup3_k,0) , %i = 1 , %a = $calc(3735928559 + %len + $3) , %b = %a , %c = %a + 0 $+ $4 bset &lookup3_k $calc(1+%len) 0 0 0 0 0 0 0 0 0 0 0 0 while (%len > 12) { ; a += k[i]; b+=k[i+1]; c+=k[i+2]; (tbd: might not need all mod2^32's) (for bigendian hash, change .long's to .nlong's) var %a = %a + $bvar(&lookup3_k,%i).long | var %a = %a % 4294967296 | inc %i 4 var %b = %b + $bvar(&lookup3_k,%i).long | var %b = %b % 4294967296 | inc %i 4 var %c = %c + $bvar(&lookup3_k,%i).long | var %c = %c % 4294967296 | inc %i 4 ;mix ;a -= c; a ^= rot(c, 4); c += b; var %a = %a - %c | if (%a < 0) inc %a 4294967296 | var %t $calc((%c % 268435456) * 16) + $int($calc(%c / 268435456)) | var %a = $xor(%a,%t) | var %c = %c + %b | var %c = %c % 4294967296 ;b -= a; b ^= rot(a, 6); a += c; var %b = %b - %a | if (%b < 0) inc %b 4294967296 | var %t $calc((%a % 67108864) * 64) + $int($calc(%a / 67108864)) | var %b = $xor(%b,%t) | var %a = %a + %c | var %a = %a % 4294967296 ;c -= b; c ^= rot(b, 8); b += a; var %c = %c - %b | if (%c < 0) inc %c 4294967296 | var %t $calc((%b % 16777216) * 256) + $int($calc(%b / 16777216)) | var %c = $xor(%c,%t) | var %b = %b + %a | var %b = %b % 4294967296 ;a -= c; a ^= rot(c,16); c += b; var %a = %a - %c | if (%a < 0) inc %a 4294967296 | var %t $calc((%c % 65536) * 65536) + $int($calc(%c / 65536)) | var %a = $xor(%a,%t) | var %c = %c + %b | var %c = %c % 4294967296 ;b -= a; b ^= rot(a,19); a += c; var %b = %b - %a | if (%b < 0) inc %b 4294967296 | var %t $calc((%a % 8192) * 524288) + $int($calc(%a / 8192)) | var %b = $xor(%b,%t) | var %a = %a + %c | var %a = %a % 4294967296 ;c -= b; c ^= rot(b, 4); b += a; var %c = %c - %b | if (%c < 0) inc %c 4294967296 | var %t $calc((%b % 268435456) * 16) + $int($calc(%b / 268435456)) | var %c = $xor(%c,%t) | var %b = %b + %a | var %b = %b % 4294967296 dec %len 12 } ; a += k[i]; b+=k[i+1]; c+=k[i+2]; var %a = %a + $bvar(&lookup3_k,%i).long | var %a = %a % 4294967296 | inc %i 4 var %b = %b + $bvar(&lookup3_k,%i).long | var %b = %b % 4294967296 | inc %i 4 var %c = %c + $bvar(&lookup3_k,%i).long | var %c = %c % 4294967296 ;final ;c ^= b; c -= rot(b,14); var %c $xor(%c,%b) | var %t $calc((%b % 262144) * 16384) + $int($calc(%b / 262144)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296 ;a ^= c; a -= rot(c,11); var %a $xor(%a,%c) | var %t $calc((%c % 2097152) * 2048) + $int($calc(%c / 2097152)) | var %a = %a - %t | if (%a < 0) inc %a 4294967296 ;b ^= a; b -= rot(a,25); var %b $xor(%b,%a) | var %t $calc((%a % 128) * 33554432) + $int($calc(%a / 128)) | var %b = %b - %t | if (%b < 0) inc %b 4294967296 ;c ^= b; c -= rot(b,16); var %c $xor(%c,%b) | var %t $calc((%b % 65536) * 65536) + $int($calc(%b / 65536)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296 ;a ^= c; a -= rot(c,4); var %a $xor(%a,%c) | var %t $calc((%c % 268435456) * 16) + $int($calc(%c / 268435456)) | var %a = %a - %t | if (%a < 0) inc %a 4294967296 ;b ^= a; b -= rot(a,14); var %b $xor(%b,%a) | var %t $calc((%a % 262144) * 16384) + $int($calc(%a / 262144)) | var %b = %b - %t | if (%b < 0) inc %b 4294967296 ;c ^= b; c -= rot(b,24); var %c $xor(%c,%b) | var %t $calc((%b % 256) * 16777216) + $int($calc(%b / 256)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296 if (h isin %bits) { var %bits $remove(%bits,h) | if ((!%bits) || (%bits == 32)) return $base(%c,10,16,8) var %p $ceil($calc(%bits /4)) if (%bits isnum 1-32) return $base($and(%c, $calc(2^%bits -1)),10,16,%p) dec %p 8 | dec %bits 32 if (%bits isnum 1-32) return $base($and(%b, $calc(2^%bits -1)),10,16,%p) $+ $base(%c,10,16,8) } else { if ((!%bits) || (%bits == 32)) return %c if (%bits isnum 1-32) return $and(%c, $calc(2^%bits -1)) var %i = $and(%b, $calc(2^ (%bits -32) -1)) if (%bits isnum 33-53) { return $calc(%i * 4294967296 + %c) } if (%bits isnum 54-64) { return $bigint_add($bigint_mul(%i,4294967296),%c) } } } alias benchlookup3long { var %bench $ticks | var %x 1 | var %a $str(a,100) , %j 1000 | while (%j) { noop $lookup3(%a) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms } alias bench_oat_long { var %bench $ticks | var %x 1 | var %a $str(a,100) , %j 1000 | while (%j) { noop $oat(%a) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms } alias benchlookup3short { var %bench $ticks | var %x 1 | var %j 1000 | while (%j) { noop $lookup3(abcabcabcabc) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms } alias bench_oat_short { var %bench $ticks | var %x 1 | var %j 1000 | while (%j) { noop $oat(abcabcabcabc) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms } ; Bob Jenkins' non-crypto "One at a time" hash by maroon, as described at https://en.wikipedia.org/wiki/Jenkins_hash_function ; It hashes a string of arbitrary length to a 1-32 bit hash. ; One usage includes reducing memory by storing hash of entries for a dictionary that's too bulky to keep in memory. ; If $oat(string) isn't in the array of hash values, then string isn't in disk dictionary. ; (Though obviously the presence of $oat(string) in the array isn't a guarantee that the string *is* in the ; disk dictionary, but it's a way to filter strings to reduce disk accesses.) ; Syntax: $oat(text string [[,bits],initial_val]) ; Syntax: $oat(&binvar ,[bits] b [,initial_val]) Requires enough free memory to make a clone of the &binvar ; $1 is the string being hashed, ; $2 is for optional parameters for the hash output ; bits = 1-32 (default=32 when not used) length of hash returned ; b = $1 is a &binary variable not a string beginning with ampersand ; $3 is optional initial value for hash constant, for 'keying' or 'seeding' the hash ; ; $oat2(text string [[,bits],initial_val]) ; Similar to $oat() except does not accept binary strings, because it uses unicode characters above 127 as ; numbers in the rang 128-65535 instead of multiple bytes. ; //bset -t &v 1 $chr(10004) | echo -a $bvar(&v,1-) vs $asc($chr(10004)) ; Otherwise, $oat2() returns identical hashes to $oat() for 7-bit strings ; (mIRC 6.35 doesn't recognize codepoints 256+, and doesn't handle codepoints 128-255 the same without changing 'bset -t' into 'bset -ta') ; ; Hashes stored as decimal use up to 10 digits. As hex, hash can be stored as 8 digits max, i.e. $base(%h,10,16,8). ; As base36, can shorten to 7 digits, or 6 digits if limited to 31 bits (or 12 digits for 62 bits) ; //bset -t &v 1 abc | echo -a $oat(abc) $oat(&v,b) $oat(&v) $base($oat(abc,32),10,16,8) $base($oat(&v,b31),10,36) ; As Mime-64, if a 24-bit hash is enough, can store 24 bits as 4 encoded text characters, but the decimal value ; would need to be translated to a binary format which allows it to be stored in a binary variable as 4 bytes. ; //bset &a 1 $regsubex($base(4123456789,10,16,8),/(\S)(\S)/g,$base(\1\2,16,10) $chr(32)) | echo -a $bvar(&a,1-) | noop $encode(&a,bm) | echo -a $left($bvar(&a,1-).text,4) ; You can determine how many digits your hash takes when translated to BASE-36 by changing the first number of ; //echo -a $calc( 62 * $log(2) / $log(36) ) ... then round up any decimals ; Because a string of 1-or-more 0x00's always has a hash of zero, if that is of concern to you, ; either use a $3 'seed' value or alter the hash to initialize '%h = %len' instead of '%h = $3'. ; (extra 0x00's do affect the hash when the string is not entirely 0x00's) alias oat { if (b isin $2) { if (!$bvar($1,0)) return | var %bits $remove($2,b) | bcopy -tc &oat_key 1 $1 1 -1 } else { var %bits $2 | bset -tc &oat_key 1 $1 } var %len = $bvar(&oat_key,0) , %i = 0 , %h = $3 while (%i < %len) { !inc %i var %h = $calc(1025 * (%h + $bvar(&oat_key,%i)) % 4294967296) var %h = $xor(%h, $calc(%h / 64)) } var %h = $calc((%h * 9) % 4294967296) var %h = $calc(($xor(%h, $calc(%h / 2048)) * 32769) % 4294967296) if (%bits !isnum 1-31) return %h | else return $and(%h, $calc(2^ %bits - 1)) } alias oat2 { var %len = $len($1) , %i = 0 , %h = $3 while (%i < %len) { !inc %i var %h = $calc(1025 * (%h + $asc($mid($1,%i,1))) % 4294967296) var %h = $xor(%h, $calc(%h / 64)) } var %h = $calc((%h * 9) % 4294967296) var %h = $calc(($xor(%h, $calc(%h / 2048)) * 32769) % 4294967296) if ($2 !isnum 1-31) return %h | else return $and(%h, $calc(2^ $2 - 1)) }
Changelog:
0
0