-/* ------------------------------------------------
-Copyright 2014 AT&T Intellectual Property
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ------------------------------------------- */
-
-#ifndef _STRINGHASH_H
-#define _STRINGHASH_H
-#include <stdlib.h>
-#include <stdio.h>
-#include <limits.h>
-#include <stdint.h>
-#include <string.h>
-
-//Provides 5-universal hash functions.
-//Both a direct polynomial based one,
-//and the much faster tabulation based one.
-//
-//It also provides 2-universal string hashing.
-//For most applicatoins, introdcing a slight error,
-//if you want almost 5-uninversal hashing for strings,
-//you can do a 2-universal hashing into 32-bit integer
-//getting very few collissoins, and then use the above
-//5-universal one.
-
-
-
-typedef gs_uint8_t INT8;
-typedef gs_uint16_t INT16;
-typedef gs_uint32_t INT32;
-typedef gs_uint64_t INT64;
-
-typedef INT64 * Hash61bit5univID;
-typedef INT32** HashTab32bit5univID;
-typedef INT32** HashTab32bitLPID;
-typedef INT64** HashTab64bitLPID;
-typedef INT64 * Hash32bit2univID;
-typedef INT32 Hash32bitUnivID;
-
-const INT32 MAX_INT32 = UINT_MAX;
-
-typedef struct {
- INT64 str_compr_hid[16];
- INT64 string_hid;
- INT64 length_factor;
- // INT64 last_compr_hid;
-} VISHID;
-
-typedef VISHID * VarINT32StringHash32bit2univID;
-
-
-typedef struct {
- INT32 int_str[16];
- INT64 str_compr_hid[16];
- INT64 string_hid;
- INT64 length_factor;
- // INT64 last_compr_hid;
-} VSHID;
-
-typedef VSHID * VarStringHash32bit2univID;
-
-
-
-
-
-// different views of a 64-bit double word
-typedef union {
- INT64 as_int64;
- INT32 as_int32s[2];
- INT16 as_int16s[4];
-} int64views;
-
-const INT64 LowOnes = (((INT64)1)<<32)-1;
-
-#define LOW(x) ((x)&LowOnes) // extract lower 32 bits from INT64
-#define HIGH(x) ((x)>>32) // extract higher 32 bits from INT64
-
-const INT64 Prime61 = (((INT64)1)<<61) - 1;
-
-
-//Random numbers from random.org based on atmostpheric noise.
-INT64 Random64[200]= {0x285eb7722a62ce6eull,
- 0xa84c7463e2b7856bull,
- 0xb29100d6abcc8666ull,
- 0xf9bfca5b7461fb1full,
- 0x51c8dafc30c88dadull,
- 0x0687468c365ec51dull,
- 0x2bf2cd3ad64b6218ull,
- 0xc20565a4d1f00f9eull,
- 0x7d533575d313c658ull,
- 0xbf2fba6b00725b85ull,
- 0x4cfc2f6557e722beull,
- 0xedbce818556dfb2bull,
- 0xd9df508027db1bbeull,
- 0x21f2d922f712f48bull,
- 0xcd8b289d83a65804ull,
- 0x4a19cfb02445a6d2ull,
- 0xc95e56b1e19a4f94ull,
- 0xcfeeeaccaf477248ull,
- 0x4eec3378b73bb25cull,
- 0x18a3f38d1c48b2beull,
- 0x71b79ab5cb1e3730ull,
- 0x79cdb30e2f38309dull,
- 0xb41d4983bdbc8d6full,
- 0xba9f57c01b66b7e3ull,
- 0xe400503c95c16568ull,
- 0x5977bfd4630294f1ull,
- 0x57d4d7940099676full,
- 0xd945de9268f4b191ull,
- 0x4034711421eaf806ull,
- 0x6d8108a4a6d58c22ull,
- 0x5c421818ddbdd4eeull,
- 0xbd9b7e4071713c13ull,
- 0xa60d1d6e793e5eb2ull,
- 0x7443fb031b8ec6c7ull,
- 0xd8290c7120e05d4aull,
- 0x797fb1d9a6a8d27full,
- 0x543ec268ab1f2e45ull,
- 0xcaf2a6139701f320ull,
- 0x9519611d130bee47ull,
- 0x19bbc533f018be1aull,
- 0xdbfacdfeb573133dull,
- 0x3255dacc4c7bfe12ull,
- 0xbc6c9228e5518f6eull,
- 0xb5c1681037347178ull,
- 0xbaaa2cfef186bfadull,
- 0x1834b8ab0f9e876eull,
- 0x9d7b7f228433e0f7ull,
- 0xa99cc292d003dd09ull,
- 0xc0cb8037046b5295ull,
- 0xa6ffa3d4671aa3d2ull,
- 0xc27023fbee2862e6ull,
- 0x5a9877bcc4bd3172ull,
- 0xfcb0da3caf9fcfe0ull,
- 0xc35ef57e1866ceaeull,
- 0xd4f7c927d169a115ull,
- 0x699054518fc74756ull,
- 0xa75cbf617fc9db8dull,
- 0x7f3adf4369665a9cull,
- 0x6b98eeeb4c517f42ull,
- 0xa12e44f5de954f24ull,
- 0x5789ded4dced978eull,
- 0xe4dd20ed27cd3567ull,
- 0x9b4e90c365b8790bull,
- 0xd486ed6e099f499bull,
- 0x3f3d0ccfeaa0c0dcull,
- 0x548c746cdb192adaull,
- 0x8ce636d8469fe2dcull,
- 0xca59ec929549a6a1ull,
- 0x647b9878deaba1f0ull,
- 0xebbb4b2641c54d34ull,
- 0x7be6d2918b680abdull,
- 0x02ad265fb4733490ull,
- 0xfe1053044faf3486ull,
- 0x539ea358ff6b6df3ull,
- 0x025d73224a2b5826ull,
- 0x7daad302451f41b3ull,
- 0x6038455ddb535976ull,
- 0x8d6d00a9a728a067ull,
- 0xe9f03d61d4965d59ull,
- 0x38314b8102daff3bull,
- 0x56b335e7893a76f1ull,
- 0x1048ca2f415712abull,
- 0xa9bc989a891dc173ull,
- 0xb741df3ae02836c2ull,
- 0x7711e6c6f5830783ull,
- 0x8edbf2be9226e24bull,
- 0xe4a4b8ba310fc2e2ull,
- 0xbc7b67f4a02f23c8ull,
- 0x5669b1a9d6d8df17ull,
- 0xdd3ebf2e3c516e26ull,
- 0x77bdd6def5236c4full,
- 0x9aeb54bdffacd65eull,
- 0xab676483404a21b8ull,
- 0xf7270f77a9d1b3a3ull,
- 0x3794e1cdcc7de433ull,
- 0x8e2b74d3a06aa56aull,
- 0x572698d05b901d40ull,
- 0x7bd6c265c1dd5cdfull,
- 0xd2f68a53970db82eull,
- 0x0e1d5f5dd9bd23bdull,
- 0x48814c6813505051ull,
- 0x8f2d21a7d4e4a481ull,
- 0x144531c871920cf3ull,
- 0x45c6f81c7fbc3b2full,
- 0xc562f1fc7f07a944ull,
- 0xaabdf3d0aa9872e4ull,
- 0x8db1f9d827c8df98ull,
- 0xea787210e13c16c7ull,
- 0xd43f6f582629ff39ull,
- 0x6ec7599da4cb298dull,
- 0xfa99d7196097dd94ull,
- 0xbe3a8a172a62f40dull,
- 0x07477bc03d9d5471ull,
- 0x03777d1ee44c0fa6ull,
- 0x5c6df847b0ae6fd1ull,
- 0xc1fc3bc2352d8125ull,
- 0x6800e3321d35b697ull,
- 0x51cc735e1b0920a8ull,
- 0x93dc60a2430c11acull,
- 0x13b6bf8ffc4e9e30ull,
- 0xada08114e6a01701ull,
- 0x1459b7a4254da4cdull,
- 0xf7575cd7ededcdfdull,
- 0x1e43675ed5ed33eeull,
- 0x639f5ff579cc30d4ull,
- 0x8f4f75d7eea7e300ull,
- 0x518939accd43dadeull,
- 0x989a77577fac24e2ull,
- 0x86c5e3998c819d51ull,
- 0xb84770f9cc15d139ull,
- 0x11544010174c2c99ull,
- 0xfb238b962405a579ull,
- 0xca5bddde9b80cb7bull,
- 0xbbe71928190dfab4ull,
- 0x0ec620294742b8c8ull,
- 0x90bc7a703ed63900ull,
- 0x2c8a80e1e85f72bbull,
- 0xf19ab262ed2ad914ull,
- 0xa233e89fab33793cull,
- 0x75b6f59568a958f5ull,
- 0x31fb15af9a41aca2ull,
- 0xf89db05aa21b3d1cull,
- 0x0023d08c52147f63ull,
- 0x8e0e3c2aba81e8fdull,
- 0xdb2efd057c157e71ull,
- 0x1a1797a83e39dfe4ull,
- 0x3afc43e1979f507dull,
- 0x0e647a8d8abe0936ull,
- 0xc8e03f27082eb0daull,
- 0xc1b0c11db52da528ull,
- 0xadeb52a144f497ceull,
- 0x046fc2f53bd97c9bull,
- 0x2d587fdf5fae80d5ull,
- 0x14036c3fb77fd73eull,
- 0x883f768734d14676ull,
- 0x212df988a83caffeull,
- 0x36b41dac32387e17ull,
- 0x9dbee8d7cf804bccull,
- 0x34d9a58fb9a35794ull,
- 0xda82beba879e3576ull,
- 0x0366148280e5adf1ull,
- 0x634085dcdba7b174ull,
- 0x8451523252446534ull,
- 0xc74ce8b4d5175b9aull,
- 0x40afebd30a9a4837ull,
- 0x7232952ce84e90beull,
- 0x7443a6e37b87992dull,
- 0xace8a33649218d9aull,
- 0x9ad8af9614b75655ull,
- 0xd518d9a700179ac2ull,
- 0xc7f906d90fd36259ull,
- 0xe2c47ec3abd8a12dull,
- 0xca05e96fcbbb76c1ull,
- 0x7186661c9ddf4973ull,
- 0x0146f5f47d5f42c2ull,
- 0x74a7c485608ea178ull,
- 0x8cb5e3e5f84f3040ull,
- 0xb330869dc366037dull,
- 0x0dbc4d22f932bbd4ull,
- 0xa8460b43946a5ecbull,
- 0xcee72c72fe5ddea6ull,
- 0x9f953cc7859b2a4eull,
- 0x95ea396619e60182ull,
- 0xfe9890efa6aa8c3eull,
- 0x3c9a5691d1e25799ull,
- 0xa54eeb19aa718b62ull,
- 0x69907b2ec11d82b7ull,
- 0x408c5405984caa65ull,
- 0x6f11a21711640470ull,
- 0x20b9227a8f53f9aeull,
- 0x42df63b7ec442178ull,
- 0xbec247f79407d00aull,
- 0x2946c31558eab2bcull,
- 0x84d045ffa174048eull,
- 0x28bad532ba4a450cull,
- 0x93ccf0cbaa274d7aull,
- 0xff30008411d25158ull,
- 0x3845ce6dcc30080aull,
- 0xcf5925239b6f3dbaull,
- 0x1f148649ed53660dull};
-
-
-
-
-
-
-gs_int32_t nextrandom = 0;
-gs_int32_t endrandom = 100;
-
-
-// The first "endrandom" numbers returned are truely random INT64.
-// They are perfect for bases of new hash functions, and currently
-// we have just enough such random numbers for tabulation based
-// 5-universal hashing, or one of the other schemes provided.
-// The remaining ones are based on the pseudo-random mrand48() which
-// returns a signed 32-bit number. We do not use the unsigned lrand48()
-// which only returns 31-bit.
-inline static INT64 RandINT64() {
- INT64 r,r1;
- if (nextrandom<endrandom) r=Random64[nextrandom++];
- else {
- if (nextrandom==endrandom)
- fprintf(stderr,"Switching from random to pseudo-random in Rand64");
- r =(INT64) mrand48();
- r1 = (INT64) mrand48();
- r=r<<32;
- r=r^r1;
- }
- return r;
-}
-
-
-/* Computes ax+b mod Prime, possibly plus Prime,
- exploiting the structure of Prime. */
-inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {
- INT64 a0,a1,c0,c1,c;
- a0 = LOW(a)*x;
- a1 = HIGH(a)*x;
- c0 = a0+(a1<<32);
- c1 = (a0>>32)+a1;
- c = (c0&Prime61)+(c1>>29)+b;
- return c;
-} // 12 instructions
-
-/* CWtrick for 32-bit key x with prime 2^61-1 */
-inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {
- INT64 h;
- gs_int32_t i;
-
- h = index[0];
- for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);
- h = (h&Prime61)+(h>>61);
- if (h>=Prime61) h-=Prime61;
- return h;
-}
-
-
-
-inline static INT64* InitHash61bit5univ() {
- gs_int32_t i;
-
- Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));
- for (i=0;i<5;i++) {
- hid[i]=RandINT64();
- hid[i] = (hid[i]&Prime61)+(hid[i]>>61);
- if (hid[i]>=Prime61) hid[i]-=Prime61;
- }
- return hid;
-}
-
-inline static HashTab32bit5univID InitHashTab32bit5univ() {
- gs_uint32_t i,j;
- HashTab32bit5univID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));
- for (i=0;i<3;i++) {
- htab[i] = (INT32*) malloc(65538*sizeof(INT32));
- h61id = InitHash61bit5univ();
- for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- }
- return htab;
-}
-
-/* tabulation based hashing for 32-bit key x using 16-bit characters.*/
-inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {
- INT32 x0, x1, x2;
- x0 = x&65535;
- x1 = x>>16;
- x2 = x0 + x1;
- x2 = 2 - (x2>>16) + (x2&65535); // optional compression
- return htab[0][x0]^htab[1][x1]^htab[2][x2];
-} // 8 + 4 = 12 instructions
-
-
-/* Tabulation based hashing for 32-bit keys using 16-bit characters.*/
-/* Uses 500KB tables and makes 2 lookups per hash. */
-
-inline static HashTab32bitLPID InitHashShortTab32bitLP() {
- gs_uint32_t i,j;
- HashTab32bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));
- for (i=0;i<2;i++) {
- htab[i] = (INT32*) malloc(65536*sizeof(INT32));
- h61id = InitHash61bit5univ();
- for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- }
- return htab;
-}
-
-/* tabulation based hashing for 32-bit key x using 16-bit characters.*/
-inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {
- INT32 x0, x1;
- x0 = x&65535;
- x1 = x>>16;
- return htab[0][x0]^htab[1][x1];
-}
-
-
-/* Tabulation based hashing for 32-bit keys using 8-bit characters.*/
-/* Uses 4KB tables and makes 4 lookups per hash. */
-
-inline static HashTab32bitLPID InitHashCharTab32bitLP() {
- gs_uint32_t i,j;
- HashTab32bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));
- for (i=0;i<4;i++) {
- htab[i] = (INT32*) malloc(256*sizeof(INT32));
- h61id = InitHash61bit5univ();
- for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- }
- return htab;
-}
-
-inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {
- INT8 c;
- INT32 h;
- gs_int32_t i;
-
- c=x;
- h=htab[0][c];
- for (i=1;i<4;i++) {
- x>>=8;
- c=x;
- h=h^htab[i][c];
- }
- return h;
-}
-
-
-/* Tabulation based hashing for 64-bit keys using 16-bit characters.*/
-/* Uses 2MB tables and makes 4 lookups per hash. */
-
-
-inline static HashTab64bitLPID InitHashShortTab64bitLP() {
- gs_int32_t i,j;
- HashTab64bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));
- for (i=0;i<4;i++) {
- htab[i] = (INT64*) malloc(65536*sizeof(INT64));
- h61id = InitHash61bit5univ();
- for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- h61id = InitHash61bit5univ();
- for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
- }
- return htab;
-}
-
-inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {
- INT16 c;
- INT64 h;
- gs_int32_t i;
-
- c=x;
- h=htab[0][c];
- for (i=1;i<4;i++) {
- x>>=16;
- c=x;
- h=h^htab[i][c];
- }
- return h;
-}
-
-/* Tabulation based hashing for 64-bit keys using 8-bit characters.*/
-/* Uses 14KB tables and makes 8 lookups per hash. */
-
-inline static HashTab64bitLPID InitHashCharTab64bitLP() {
- gs_int32_t i,j;
- HashTab64bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));
- for (i=0;i<8;i++) {
- htab[i] = (INT64*) malloc(256*sizeof(INT64));
- h61id = InitHash61bit5univ();
- for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- h61id = InitHash61bit5univ();
- for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
- }
- return htab;
-}
-
-inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {
- INT8 c;
- INT64 h;
- gs_int32_t i;
-
- c=x;
- h=htab[0][c];
- for (i=1;i<8;i++) {
- x>>=8;
- c=x;
- h=h^htab[i][c];
- }
- return h;
-}
-
-
-/* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/
-/* Uses 20KB tables and makes 3 lookups per hash. */
-
-inline static HashTab32bitLPID InitHashMipTab32bitLP() {
- gs_int32_t i,j;
- HashTab32bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));
- for (i=0;i<2;i++) {
- htab[i] = (INT32*) malloc(0x800*sizeof(INT32));
- h61id = InitHash61bit5univ();
- for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
- }
- for (i=2;i<3;i++) {
- htab[i] = (INT32*) malloc(0x400*sizeof(INT32));
- h61id = InitHash61bit5univ();
- for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
- }
- return htab;
-}
-
-inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {
- INT32 c;
- INT32 h;
- gs_int32_t i;
-
- h=0;
- for (i=0;i<2;i++) {
- c= x & 0x7FFull;
- h=h^htab[i][c];
- x>>=11;
- }
- c= x & 0x3FFull;
- h=h^htab[2][c];
- return h;
-}
-
-/* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/
-/* Uses 80KB tables and makes 6 lookups per hash. */
-
-inline static HashTab64bitLPID InitHashMipTab64bitLP() {
- gs_int32_t i,j;
- HashTab64bitLPID htab;
- Hash61bit5univID h61id;
-
-
- htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));
- for (i=0;i<4;i++) {
- htab[i] = (INT64*) malloc(0x800*sizeof(INT64));
- h61id = InitHash61bit5univ();
- for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- h61id = InitHash61bit5univ();
- for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
- }
- for (i=4;i<6;i++) {
- htab[i] = (INT64*) malloc(0x400*sizeof(INT64));
- h61id = InitHash61bit5univ();
- for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);
- h61id = InitHash61bit5univ();
- for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
- }
- return htab;
-}
-
-inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {
- INT64 c;
- INT64 h;
- gs_int32_t i;
-
- h=0;
- for (i=0;i<4;i++) {
- c= x & 0x7FFull;
- h=h^htab[i][c];
- x>>=11;
- }
- for (i=4;i<6;i++) {
- c= x & 0x3FFull;
- h=h^htab[i][c];
- x>>=10;
- }
- return h;
-}
-
-
-
-inline static Hash32bit2univID InitHash32bit2univ() {
- Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));
- hid[0]=RandINT64();
- hid[1]=RandINT64();
- return hid;
-}
-
-inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {
- INT64 H;
- INT32 h;
- H = (x*hid[0])+hid[1];
- h = H >> 32;
- return h;
-}
-
-
-// For the string hashing below is for string of 32bit integers, and
-// the output is a 2-universal 32-bit number.
-
-inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {
- gs_int32_t i;
- Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));
- for (i=0;i<=length;i++) hid[i]=RandINT64();
- return hid;
-}
-
-
-
-//assumes string length even
-inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,
- Hash32bit2univID hid) {
- gs_int32_t i;
- INT64 H;
- INT32 h;
- INT64 xh1,xh2,y;
-
- H=0;
- for (i=0;i<length;i+=2) {
- xh1=x[i]+hid[i+1];
- xh2=x[i+1]+hid[i];
- y=xh1*xh2;
- H = H^y;
- }
- H=H^hid[length];
- h = H >> 32;
- return h;
-}
-
-//assumes string length odd
-inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,
- Hash32bit2univID hid) {
- gs_int32_t i;
- INT64 H;
- INT32 h;
- INT64 xh1,xh2,y;
-
- H = x[0]*hid[0];
- for (i=1;i<length;i+=2) {
- xh1=x[i]+hid[i+1];
- xh2=x[i+1]+hid[i];
- y=xh1*xh2;
- H = H^y;
- }
- H=H^hid[length];
- h = H >> 32;
- return h;
-}
-
-//Below we have the generic algorithm for fixed length string hashing
-//For shorter strings of length < 6, it is worthwhile using
-//specialized versions with fewer tests.
-//All versions rely on the some initialization above.
-inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {
-// gs_int32_t i;
-// INT64 H=0;
-// INT32 h;
-// for (i=0;i<length;i++) H = H^(x[i]*hid[i]);
-// H=H^hid[length];
-// h = H >> 32;
-// return h;
- if (length&1) return OddStringHash32bit2univ(x,length,hid);
- else return EvenStringHash32bit2univ(x,length,hid);
-}
-
-
-//string of length 2
-inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
- INT64 H;
- INT32 h;
- INT64 xh1,xh2;
-
- xh1=x[0]+hid[1];
- xh2=x[1]+hid[0];
- H=xh1*xh2;
- H=H^hid[2];
- h = H >> 32;
- return h;
-}
-
-//string of length 3
-inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
- INT64 H;
- INT32 h;
- INT64 xh1,xh2,y;
-
- H = x[0]*hid[0];
- xh1=x[1]+hid[2];
- xh2=x[2]+hid[1];
- y=xh1*xh2;
- H = H^y;
- H=H^hid[3];
- h = H >> 32;
- return h;
-}
-
-
-inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {
- gs_int32_t i;
- VarINT32StringHash32bit2univID hid;
-
- hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));
- for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();
- hid->string_hid = RandINT64() & Prime61;
- // hid->last_compr_hid = RandINT64() | 0x1ull;
- return hid;
- }
-
-inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {
-
- INT32 h,i,j,d;
- gs_int32_t e;
- INT64 C,H,M;
- INT64 xh1,xh2,y;
- INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
-
- if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);
- H=0;
- i=0;
- for (;;) {
- d=length-i;
- C = 0;
- j=0;
- if (d>16) d=16;
- else if (d&1) {
- C = x[i]*hid->str_compr_hid[j];
- i++;
- j++;
- }
- for (e=((int) (d>>1));e>0;e--) {
- xh1=x[i]+hid->str_compr_hid[j+1];
- xh2=x[i+1]+hid->str_compr_hid[j];
- y=xh1*xh2;
- C = C^y;
- i+=2;
- j+=2;
- }
- if (i==(INT32) length) {
- H<<=4;
- H+=C;
- H+=((INT64) length)*hid->length_factor;
- h=(H>>32);
- return h;
- }
- C>>=4;
- H+=C;
- M=hid->string_hid;
- //Multiply H*M mod Prime61, possibly plus Prime,
- //exploiting the structure of Prime61.
- //We assume H<2*Prime61 and M<=Prime61, e.g.,
- //H*M <2^123.
- HxM00 = LOW(H)*LOW(M);
- HxM01 = LOW(H)*HIGH(M);
- HxM10 = HIGH(H)*LOW(M);
- HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
- C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
- C0 = C0&Prime61; //Has at most 61 bits.
- C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
- C1 = C1>>29; //Has at most 33 bits.
- C2 = HxM11<<3; //Has at most 123-64+3=62 bits.
- CC = C0+C1+C2; //At most 63 bits.
- H = (CC&Prime61)+(CC>>61); //<2*Prime
- }
-}
-
-inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {
- gs_int32_t i;
- VarStringHash32bit2univID hid;
-
- hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));
- for (i=0;i<16;i++) {
- hid->int_str[i]=0;
- hid->str_compr_hid[i]=RandINT64();
- }
- hid->string_hid = RandINT64() & Prime61;
- // hid->last_compr_hid = RandINT64() | 0x1ull;
- return hid;
- }
-
-inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,
- VarStringHash32bit2univID hid) {
-
- INT32 h,i,l,d,str_ix;
- INT64 C,H,M;
- INT64 xh1,xh2,y;
- INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
-
- // Assumes hid->int_str[*]==0
-
- if (length<=60) {
- memcpy(hid->int_str,x,length);
- d=(length+3)>>2; //<16
- h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);
- for (i=0;i<d;i++) hid->int_str[i]=0;
- return h;
- }
- H=0;
- str_ix=0;
- for (;;) {
- C = 0;
- l=length-str_ix;
- if (l>64) l=64;
- memcpy(hid->int_str,x+str_ix,l);
- str_ix+=l;
- d=(l+3)>>2;
- i=0;
- if (d&1) {
- C = hid->int_str[i]*hid->str_compr_hid[i];
- i++;
- }
- while (i<d) {
- xh1=hid->int_str[i]+hid->str_compr_hid[i+1];
- xh2=hid->int_str[i+1]+hid->str_compr_hid[i];
- y=xh1*xh2;
- C = C^y;
- i+=2;
- }
- for (i=0;i<d;i++) hid->int_str[i]=0;
- if (str_ix==(INT32) length) {
- H<<=4;
- H+=C;
- H+=((INT64) length)*hid->length_factor;
- h=(H>>32);
- return h;
- }
- C>>=4;
- H+=C;
- M=hid->string_hid;
- //Multiply H*M mod Prime61, possibly plus Prime,
- //exploiting the structure of Prime61.
- //We assume H<2*Prime61 and M<=Prime61, e.g.,
- //H*M <2^123.
- HxM00 = LOW(H)*LOW(M);
- HxM01 = LOW(H)*HIGH(M);
- HxM10 = HIGH(H)*LOW(M);
- HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
- C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
- C0 = C0&Prime61; //Has at most 61 bits.
- C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
- C1 = C1>>29; //Has at most 33 bits.
- C2 = HxM11<<3; //Has at most 123-64+3=62 bits.
- CC = C0+C1+C2; //At most 63 bits.
- H = (CC&Prime61)+(CC>>61); //<2*Prime
- }
-}
-
-
-#endif
-
+/* ------------------------------------------------\r
+Copyright 2014 AT&T Intellectual Property\r
+ Licensed under the Apache License, Version 2.0 (the "License");\r
+ you may not use this file except in compliance with the License.\r
+ You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+ ------------------------------------------- */\r
+\r
+#ifndef _STRINGHASH_H\r
+#define _STRINGHASH_H\r
+#include <stdlib.h>\r
+#include <stdio.h>\r
+#include <limits.h>\r
+#include <stdint.h>\r
+#include <string.h>\r
+\r
+//Provides 5-universal hash functions.\r
+//Both a direct polynomial based one,\r
+//and the much faster tabulation based one.\r
+//\r
+//It also provides 2-universal string hashing.\r
+//For most applicatoins, introdcing a slight error,\r
+//if you want almost 5-uninversal hashing for strings,\r
+//you can do a 2-universal hashing into 32-bit integer\r
+//getting very few collissoins, and then use the above\r
+//5-universal one.\r
+\r
+\r
+\r
+typedef gs_uint8_t INT8;\r
+typedef gs_uint16_t INT16;\r
+typedef gs_uint32_t INT32;\r
+typedef gs_uint64_t INT64;\r
+\r
+typedef INT64 * Hash61bit5univID;\r
+typedef INT32** HashTab32bit5univID;\r
+typedef INT32** HashTab32bitLPID;\r
+typedef INT64** HashTab64bitLPID;\r
+typedef INT64 * Hash32bit2univID;\r
+typedef INT32 Hash32bitUnivID;\r
+\r
+const INT32 MAX_INT32 = UINT_MAX;\r
+\r
+typedef struct {\r
+ INT64 str_compr_hid[16];\r
+ INT64 string_hid;\r
+ INT64 length_factor;\r
+ // INT64 last_compr_hid;\r
+} VISHID;\r
+\r
+typedef VISHID * VarINT32StringHash32bit2univID;\r
+\r
+\r
+typedef struct {\r
+ INT32 int_str[16];\r
+ INT64 str_compr_hid[16];\r
+ INT64 string_hid;\r
+ INT64 length_factor;\r
+ // INT64 last_compr_hid;\r
+} VSHID;\r
+\r
+typedef VSHID * VarStringHash32bit2univID;\r
+\r
+\r
+\r
+\r
+\r
+// different views of a 64-bit double word\r
+typedef union {\r
+ INT64 as_int64;\r
+ INT32 as_int32s[2];\r
+ INT16 as_int16s[4];\r
+} int64views;\r
+\r
+const INT64 LowOnes = (((INT64)1)<<32)-1;\r
+\r
+#define LOW(x) ((x)&LowOnes) // extract lower 32 bits from INT64\r
+#define HIGH(x) ((x)>>32) // extract higher 32 bits from INT64\r
+\r
+const INT64 Prime61 = (((INT64)1)<<61) - 1;\r
+\r
+\r
+//Random numbers from random.org based on atmostpheric noise.\r
+INT64 Random64[200]= {0x285eb7722a62ce6eull,\r
+ 0xa84c7463e2b7856bull,\r
+ 0xb29100d6abcc8666ull,\r
+ 0xf9bfca5b7461fb1full,\r
+ 0x51c8dafc30c88dadull,\r
+ 0x0687468c365ec51dull,\r
+ 0x2bf2cd3ad64b6218ull,\r
+ 0xc20565a4d1f00f9eull,\r
+ 0x7d533575d313c658ull,\r
+ 0xbf2fba6b00725b85ull,\r
+ 0x4cfc2f6557e722beull,\r
+ 0xedbce818556dfb2bull,\r
+ 0xd9df508027db1bbeull,\r
+ 0x21f2d922f712f48bull,\r
+ 0xcd8b289d83a65804ull,\r
+ 0x4a19cfb02445a6d2ull,\r
+ 0xc95e56b1e19a4f94ull,\r
+ 0xcfeeeaccaf477248ull,\r
+ 0x4eec3378b73bb25cull,\r
+ 0x18a3f38d1c48b2beull,\r
+ 0x71b79ab5cb1e3730ull,\r
+ 0x79cdb30e2f38309dull,\r
+ 0xb41d4983bdbc8d6full,\r
+ 0xba9f57c01b66b7e3ull,\r
+ 0xe400503c95c16568ull,\r
+ 0x5977bfd4630294f1ull,\r
+ 0x57d4d7940099676full,\r
+ 0xd945de9268f4b191ull,\r
+ 0x4034711421eaf806ull,\r
+ 0x6d8108a4a6d58c22ull,\r
+ 0x5c421818ddbdd4eeull,\r
+ 0xbd9b7e4071713c13ull,\r
+ 0xa60d1d6e793e5eb2ull,\r
+ 0x7443fb031b8ec6c7ull,\r
+ 0xd8290c7120e05d4aull,\r
+ 0x797fb1d9a6a8d27full,\r
+ 0x543ec268ab1f2e45ull,\r
+ 0xcaf2a6139701f320ull,\r
+ 0x9519611d130bee47ull,\r
+ 0x19bbc533f018be1aull,\r
+ 0xdbfacdfeb573133dull,\r
+ 0x3255dacc4c7bfe12ull,\r
+ 0xbc6c9228e5518f6eull,\r
+ 0xb5c1681037347178ull,\r
+ 0xbaaa2cfef186bfadull,\r
+ 0x1834b8ab0f9e876eull,\r
+ 0x9d7b7f228433e0f7ull,\r
+ 0xa99cc292d003dd09ull,\r
+ 0xc0cb8037046b5295ull,\r
+ 0xa6ffa3d4671aa3d2ull,\r
+ 0xc27023fbee2862e6ull,\r
+ 0x5a9877bcc4bd3172ull,\r
+ 0xfcb0da3caf9fcfe0ull,\r
+ 0xc35ef57e1866ceaeull,\r
+ 0xd4f7c927d169a115ull,\r
+ 0x699054518fc74756ull,\r
+ 0xa75cbf617fc9db8dull,\r
+ 0x7f3adf4369665a9cull,\r
+ 0x6b98eeeb4c517f42ull,\r
+ 0xa12e44f5de954f24ull,\r
+ 0x5789ded4dced978eull,\r
+ 0xe4dd20ed27cd3567ull,\r
+ 0x9b4e90c365b8790bull,\r
+ 0xd486ed6e099f499bull,\r
+ 0x3f3d0ccfeaa0c0dcull,\r
+ 0x548c746cdb192adaull,\r
+ 0x8ce636d8469fe2dcull,\r
+ 0xca59ec929549a6a1ull,\r
+ 0x647b9878deaba1f0ull,\r
+ 0xebbb4b2641c54d34ull,\r
+ 0x7be6d2918b680abdull,\r
+ 0x02ad265fb4733490ull,\r
+ 0xfe1053044faf3486ull,\r
+ 0x539ea358ff6b6df3ull,\r
+ 0x025d73224a2b5826ull,\r
+ 0x7daad302451f41b3ull,\r
+ 0x6038455ddb535976ull,\r
+ 0x8d6d00a9a728a067ull,\r
+ 0xe9f03d61d4965d59ull,\r
+ 0x38314b8102daff3bull,\r
+ 0x56b335e7893a76f1ull,\r
+ 0x1048ca2f415712abull,\r
+ 0xa9bc989a891dc173ull,\r
+ 0xb741df3ae02836c2ull,\r
+ 0x7711e6c6f5830783ull,\r
+ 0x8edbf2be9226e24bull,\r
+ 0xe4a4b8ba310fc2e2ull,\r
+ 0xbc7b67f4a02f23c8ull,\r
+ 0x5669b1a9d6d8df17ull,\r
+ 0xdd3ebf2e3c516e26ull,\r
+ 0x77bdd6def5236c4full,\r
+ 0x9aeb54bdffacd65eull,\r
+ 0xab676483404a21b8ull,\r
+ 0xf7270f77a9d1b3a3ull,\r
+ 0x3794e1cdcc7de433ull,\r
+ 0x8e2b74d3a06aa56aull,\r
+ 0x572698d05b901d40ull,\r
+ 0x7bd6c265c1dd5cdfull,\r
+ 0xd2f68a53970db82eull,\r
+ 0x0e1d5f5dd9bd23bdull,\r
+ 0x48814c6813505051ull,\r
+ 0x8f2d21a7d4e4a481ull,\r
+ 0x144531c871920cf3ull,\r
+ 0x45c6f81c7fbc3b2full,\r
+ 0xc562f1fc7f07a944ull,\r
+ 0xaabdf3d0aa9872e4ull,\r
+ 0x8db1f9d827c8df98ull,\r
+ 0xea787210e13c16c7ull,\r
+ 0xd43f6f582629ff39ull,\r
+ 0x6ec7599da4cb298dull,\r
+ 0xfa99d7196097dd94ull,\r
+ 0xbe3a8a172a62f40dull,\r
+ 0x07477bc03d9d5471ull,\r
+ 0x03777d1ee44c0fa6ull,\r
+ 0x5c6df847b0ae6fd1ull,\r
+ 0xc1fc3bc2352d8125ull,\r
+ 0x6800e3321d35b697ull,\r
+ 0x51cc735e1b0920a8ull,\r
+ 0x93dc60a2430c11acull,\r
+ 0x13b6bf8ffc4e9e30ull,\r
+ 0xada08114e6a01701ull,\r
+ 0x1459b7a4254da4cdull,\r
+ 0xf7575cd7ededcdfdull,\r
+ 0x1e43675ed5ed33eeull,\r
+ 0x639f5ff579cc30d4ull,\r
+ 0x8f4f75d7eea7e300ull,\r
+ 0x518939accd43dadeull,\r
+ 0x989a77577fac24e2ull,\r
+ 0x86c5e3998c819d51ull,\r
+ 0xb84770f9cc15d139ull,\r
+ 0x11544010174c2c99ull,\r
+ 0xfb238b962405a579ull,\r
+ 0xca5bddde9b80cb7bull,\r
+ 0xbbe71928190dfab4ull,\r
+ 0x0ec620294742b8c8ull,\r
+ 0x90bc7a703ed63900ull,\r
+ 0x2c8a80e1e85f72bbull,\r
+ 0xf19ab262ed2ad914ull,\r
+ 0xa233e89fab33793cull,\r
+ 0x75b6f59568a958f5ull,\r
+ 0x31fb15af9a41aca2ull,\r
+ 0xf89db05aa21b3d1cull,\r
+ 0x0023d08c52147f63ull,\r
+ 0x8e0e3c2aba81e8fdull,\r
+ 0xdb2efd057c157e71ull,\r
+ 0x1a1797a83e39dfe4ull,\r
+ 0x3afc43e1979f507dull,\r
+ 0x0e647a8d8abe0936ull,\r
+ 0xc8e03f27082eb0daull,\r
+ 0xc1b0c11db52da528ull,\r
+ 0xadeb52a144f497ceull,\r
+ 0x046fc2f53bd97c9bull,\r
+ 0x2d587fdf5fae80d5ull,\r
+ 0x14036c3fb77fd73eull,\r
+ 0x883f768734d14676ull,\r
+ 0x212df988a83caffeull,\r
+ 0x36b41dac32387e17ull,\r
+ 0x9dbee8d7cf804bccull,\r
+ 0x34d9a58fb9a35794ull,\r
+ 0xda82beba879e3576ull,\r
+ 0x0366148280e5adf1ull,\r
+ 0x634085dcdba7b174ull,\r
+ 0x8451523252446534ull,\r
+ 0xc74ce8b4d5175b9aull,\r
+ 0x40afebd30a9a4837ull,\r
+ 0x7232952ce84e90beull,\r
+ 0x7443a6e37b87992dull,\r
+ 0xace8a33649218d9aull,\r
+ 0x9ad8af9614b75655ull,\r
+ 0xd518d9a700179ac2ull,\r
+ 0xc7f906d90fd36259ull,\r
+ 0xe2c47ec3abd8a12dull,\r
+ 0xca05e96fcbbb76c1ull,\r
+ 0x7186661c9ddf4973ull,\r
+ 0x0146f5f47d5f42c2ull,\r
+ 0x74a7c485608ea178ull,\r
+ 0x8cb5e3e5f84f3040ull,\r
+ 0xb330869dc366037dull,\r
+ 0x0dbc4d22f932bbd4ull,\r
+ 0xa8460b43946a5ecbull,\r
+ 0xcee72c72fe5ddea6ull,\r
+ 0x9f953cc7859b2a4eull,\r
+ 0x95ea396619e60182ull,\r
+ 0xfe9890efa6aa8c3eull,\r
+ 0x3c9a5691d1e25799ull,\r
+ 0xa54eeb19aa718b62ull,\r
+ 0x69907b2ec11d82b7ull,\r
+ 0x408c5405984caa65ull,\r
+ 0x6f11a21711640470ull,\r
+ 0x20b9227a8f53f9aeull,\r
+ 0x42df63b7ec442178ull,\r
+ 0xbec247f79407d00aull,\r
+ 0x2946c31558eab2bcull,\r
+ 0x84d045ffa174048eull,\r
+ 0x28bad532ba4a450cull,\r
+ 0x93ccf0cbaa274d7aull,\r
+ 0xff30008411d25158ull,\r
+ 0x3845ce6dcc30080aull,\r
+ 0xcf5925239b6f3dbaull,\r
+ 0x1f148649ed53660dull};\r
+\r
+\r
+\r
+\r
+\r
+\r
+gs_int32_t nextrandom = 0;\r
+gs_int32_t endrandom = 100;\r
+\r
+\r
+// The first "endrandom" numbers returned are truely random INT64.\r
+// They are perfect for bases of new hash functions, and currently\r
+// we have just enough such random numbers for tabulation based\r
+// 5-universal hashing, or one of the other schemes provided.\r
+// The remaining ones are based on the pseudo-random mrand48() which\r
+// returns a signed 32-bit number. We do not use the unsigned lrand48()\r
+// which only returns 31-bit.\r
+inline static INT64 RandINT64() {\r
+ INT64 r,r1;\r
+ if (nextrandom<endrandom) r=Random64[nextrandom++];\r
+ else {\r
+ if (nextrandom==endrandom)\r
+ fprintf(stderr,"Switching from random to pseudo-random in Rand64");\r
+ r =(INT64) mrand48();\r
+ r1 = (INT64) mrand48();\r
+ r=r<<32;\r
+ r=r^r1;\r
+ }\r
+ return r;\r
+}\r
+\r
+\r
+/* Computes ax+b mod Prime, possibly plus Prime,\r
+ exploiting the structure of Prime. */\r
+inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {\r
+ INT64 a0,a1,c0,c1,c;\r
+ a0 = LOW(a)*x;\r
+ a1 = HIGH(a)*x;\r
+ c0 = a0+(a1<<32);\r
+ c1 = (a0>>32)+a1;\r
+ c = (c0&Prime61)+(c1>>29)+b;\r
+ return c;\r
+} // 12 instructions\r
+\r
+/* CWtrick for 32-bit key x with prime 2^61-1 */\r
+inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {\r
+ INT64 h;\r
+ gs_int32_t i;\r
+\r
+ h = index[0];\r
+ for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);\r
+ h = (h&Prime61)+(h>>61);\r
+ if (h>=Prime61) h-=Prime61;\r
+ return h;\r
+}\r
+\r
+\r
+\r
+inline static INT64* InitHash61bit5univ() {\r
+ gs_int32_t i;\r
+\r
+ Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));\r
+ for (i=0;i<5;i++) {\r
+ hid[i]=RandINT64();\r
+ hid[i] = (hid[i]&Prime61)+(hid[i]>>61);\r
+ if (hid[i]>=Prime61) hid[i]-=Prime61;\r
+ }\r
+ return hid;\r
+}\r
+\r
+inline static HashTab32bit5univID InitHashTab32bit5univ() {\r
+ gs_uint32_t i,j;\r
+ HashTab32bit5univID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));\r
+ for (i=0;i<3;i++) {\r
+ htab[i] = (INT32*) malloc(65538*sizeof(INT32));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ }\r
+ return htab;\r
+}\r
+\r
+/* tabulation based hashing for 32-bit key x using 16-bit characters.*/\r
+inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {\r
+ INT32 x0, x1, x2;\r
+ x0 = x&65535;\r
+ x1 = x>>16;\r
+ x2 = x0 + x1;\r
+ x2 = 2 - (x2>>16) + (x2&65535); // optional compression\r
+ return htab[0][x0]^htab[1][x1]^htab[2][x2];\r
+} // 8 + 4 = 12 instructions\r
+\r
+\r
+/* Tabulation based hashing for 32-bit keys using 16-bit characters.*/\r
+/* Uses 500KB tables and makes 2 lookups per hash. */\r
+\r
+inline static HashTab32bitLPID InitHashShortTab32bitLP() {\r
+ gs_uint32_t i,j;\r
+ HashTab32bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));\r
+ for (i=0;i<2;i++) {\r
+ htab[i] = (INT32*) malloc(65536*sizeof(INT32));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ }\r
+ return htab;\r
+}\r
+\r
+/* tabulation based hashing for 32-bit key x using 16-bit characters.*/\r
+inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
+ INT32 x0, x1;\r
+ x0 = x&65535;\r
+ x1 = x>>16;\r
+ return htab[0][x0]^htab[1][x1];\r
+}\r
+\r
+\r
+/* Tabulation based hashing for 32-bit keys using 8-bit characters.*/\r
+/* Uses 4KB tables and makes 4 lookups per hash. */\r
+\r
+inline static HashTab32bitLPID InitHashCharTab32bitLP() {\r
+ gs_uint32_t i,j;\r
+ HashTab32bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));\r
+ for (i=0;i<4;i++) {\r
+ htab[i] = (INT32*) malloc(256*sizeof(INT32));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ }\r
+ return htab;\r
+}\r
+\r
+inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
+ INT8 c;\r
+ INT32 h;\r
+ gs_int32_t i;\r
+\r
+ c=x;\r
+ h=htab[0][c];\r
+ for (i=1;i<4;i++) {\r
+ x>>=8;\r
+ c=x;\r
+ h=h^htab[i][c];\r
+ }\r
+ return h;\r
+}\r
+\r
+\r
+/* Tabulation based hashing for 64-bit keys using 16-bit characters.*/\r
+/* Uses 2MB tables and makes 4 lookups per hash. */\r
+\r
+\r
+inline static HashTab64bitLPID InitHashShortTab64bitLP() {\r
+ gs_int32_t i,j;\r
+ HashTab64bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));\r
+ for (i=0;i<4;i++) {\r
+ htab[i] = (INT64*) malloc(65536*sizeof(INT64));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
+ }\r
+ return htab;\r
+}\r
+\r
+inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
+ INT16 c;\r
+ INT64 h;\r
+ gs_int32_t i;\r
+\r
+ c=x;\r
+ h=htab[0][c];\r
+ for (i=1;i<4;i++) {\r
+ x>>=16;\r
+ c=x;\r
+ h=h^htab[i][c];\r
+ }\r
+ return h;\r
+}\r
+\r
+/* Tabulation based hashing for 64-bit keys using 8-bit characters.*/\r
+/* Uses 14KB tables and makes 8 lookups per hash. */\r
+\r
+inline static HashTab64bitLPID InitHashCharTab64bitLP() {\r
+ gs_int32_t i,j;\r
+ HashTab64bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));\r
+ for (i=0;i<8;i++) {\r
+ htab[i] = (INT64*) malloc(256*sizeof(INT64));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
+ }\r
+ return htab;\r
+}\r
+\r
+inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
+ INT8 c;\r
+ INT64 h;\r
+ gs_int32_t i;\r
+\r
+ c=x;\r
+ h=htab[0][c];\r
+ for (i=1;i<8;i++) {\r
+ x>>=8;\r
+ c=x;\r
+ h=h^htab[i][c];\r
+ }\r
+ return h;\r
+}\r
+\r
+\r
+/* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/\r
+/* Uses 20KB tables and makes 3 lookups per hash. */\r
+\r
+inline static HashTab32bitLPID InitHashMipTab32bitLP() {\r
+ gs_int32_t i,j;\r
+ HashTab32bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));\r
+ for (i=0;i<2;i++) {\r
+ htab[i] = (INT32*) malloc(0x800*sizeof(INT32));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);\r
+ }\r
+ for (i=2;i<3;i++) {\r
+ htab[i] = (INT32*) malloc(0x400*sizeof(INT32));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);\r
+ }\r
+ return htab;\r
+}\r
+\r
+inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
+ INT32 c;\r
+ INT32 h;\r
+ gs_int32_t i;\r
+\r
+ h=0;\r
+ for (i=0;i<2;i++) {\r
+ c= x & 0x7FFull;\r
+ h=h^htab[i][c];\r
+ x>>=11;\r
+ }\r
+ c= x & 0x3FFull;\r
+ h=h^htab[2][c];\r
+ return h;\r
+}\r
+\r
+/* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/\r
+/* Uses 80KB tables and makes 6 lookups per hash. */\r
+\r
+inline static HashTab64bitLPID InitHashMipTab64bitLP() {\r
+ gs_int32_t i,j;\r
+ HashTab64bitLPID htab;\r
+ Hash61bit5univID h61id;\r
+\r
+\r
+ htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));\r
+ for (i=0;i<4;i++) {\r
+ htab[i] = (INT64*) malloc(0x800*sizeof(INT64));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
+ }\r
+ for (i=4;i<6;i++) {\r
+ htab[i] = (INT64*) malloc(0x400*sizeof(INT64));\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
+ h61id = InitHash61bit5univ();\r
+ for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
+ }\r
+ return htab;\r
+}\r
+\r
+inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
+ INT64 c;\r
+ INT64 h;\r
+ gs_int32_t i;\r
+\r
+ h=0;\r
+ for (i=0;i<4;i++) {\r
+ c= x & 0x7FFull;\r
+ h=h^htab[i][c];\r
+ x>>=11;\r
+ }\r
+ for (i=4;i<6;i++) {\r
+ c= x & 0x3FFull;\r
+ h=h^htab[i][c];\r
+ x>>=10;\r
+ }\r
+ return h;\r
+}\r
+\r
+\r
+\r
+inline static Hash32bit2univID InitHash32bit2univ() {\r
+ Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));\r
+ hid[0]=RandINT64();\r
+ hid[1]=RandINT64();\r
+ return hid;\r
+}\r
+\r
+inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {\r
+ INT64 H;\r
+ INT32 h;\r
+ H = (x*hid[0])+hid[1];\r
+ h = H >> 32;\r
+ return h;\r
+}\r
+\r
+\r
+// For the string hashing below is for string of 32bit integers, and\r
+// the output is a 2-universal 32-bit number.\r
+\r
+inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {\r
+ gs_int32_t i;\r
+ Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));\r
+ for (i=0;i<=length;i++) hid[i]=RandINT64();\r
+ return hid;\r
+}\r
+\r
+\r
+\r
+//assumes string length even\r
+inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,\r
+ Hash32bit2univID hid) {\r
+ gs_int32_t i;\r
+ INT64 H;\r
+ INT32 h;\r
+ INT64 xh1,xh2,y;\r
+\r
+ H=0;\r
+ for (i=0;i<length;i+=2) {\r
+ xh1=x[i]+hid[i+1];\r
+ xh2=x[i+1]+hid[i];\r
+ y=xh1*xh2;\r
+ H = H^y;\r
+ }\r
+ H=H^hid[length];\r
+ h = H >> 32;\r
+ return h;\r
+}\r
+\r
+//assumes string length odd\r
+inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,\r
+ Hash32bit2univID hid) {\r
+ gs_int32_t i;\r
+ INT64 H;\r
+ INT32 h;\r
+ INT64 xh1,xh2,y;\r
+\r
+ H = x[0]*hid[0];\r
+ for (i=1;i<length;i+=2) {\r
+ xh1=x[i]+hid[i+1];\r
+ xh2=x[i+1]+hid[i];\r
+ y=xh1*xh2;\r
+ H = H^y;\r
+ }\r
+ H=H^hid[length];\r
+ h = H >> 32;\r
+ return h;\r
+}\r
+\r
+//Below we have the generic algorithm for fixed length string hashing\r
+//For shorter strings of length < 6, it is worthwhile using\r
+//specialized versions with fewer tests.\r
+//All versions rely on the some initialization above.\r
+inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {\r
+// gs_int32_t i;\r
+// INT64 H=0;\r
+// INT32 h;\r
+// for (i=0;i<length;i++) H = H^(x[i]*hid[i]);\r
+// H=H^hid[length];\r
+// h = H >> 32;\r
+// return h;\r
+ if (length&1) return OddStringHash32bit2univ(x,length,hid);\r
+ else return EvenStringHash32bit2univ(x,length,hid);\r
+}\r
+\r
+\r
+//string of length 2\r
+inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {\r
+ INT64 H;\r
+ INT32 h;\r
+ INT64 xh1,xh2;\r
+\r
+ xh1=x[0]+hid[1];\r
+ xh2=x[1]+hid[0];\r
+ H=xh1*xh2;\r
+ H=H^hid[2];\r
+ h = H >> 32;\r
+ return h;\r
+}\r
+\r
+//string of length 3\r
+inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {\r
+ INT64 H;\r
+ INT32 h;\r
+ INT64 xh1,xh2,y;\r
+\r
+ H = x[0]*hid[0];\r
+ xh1=x[1]+hid[2];\r
+ xh2=x[2]+hid[1];\r
+ y=xh1*xh2;\r
+ H = H^y;\r
+ H=H^hid[3];\r
+ h = H >> 32;\r
+ return h;\r
+}\r
+\r
+\r
+inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {\r
+ gs_int32_t i;\r
+ VarINT32StringHash32bit2univID hid;\r
+\r
+ hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));\r
+ for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();\r
+ hid->string_hid = RandINT64() & Prime61;\r
+ // hid->last_compr_hid = RandINT64() | 0x1ull;\r
+ return hid;\r
+ }\r
+\r
+inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {\r
+\r
+ INT32 h,i,j,d;\r
+ gs_int32_t e;\r
+ INT64 C,H,M;\r
+ INT64 xh1,xh2,y;\r
+ INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;\r
+\r
+ if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);\r
+ H=0;\r
+ i=0;\r
+ for (;;) {\r
+ d=length-i;\r
+ C = 0;\r
+ j=0;\r
+ if (d>16) d=16;\r
+ else if (d&1) {\r
+ C = x[i]*hid->str_compr_hid[j];\r
+ i++;\r
+ j++;\r
+ }\r
+ for (e=((int) (d>>1));e>0;e--) {\r
+ xh1=x[i]+hid->str_compr_hid[j+1];\r
+ xh2=x[i+1]+hid->str_compr_hid[j];\r
+ y=xh1*xh2;\r
+ C = C^y;\r
+ i+=2;\r
+ j+=2;\r
+ }\r
+ if (i==(INT32) length) {\r
+ H<<=4;\r
+ H+=C;\r
+ H+=((INT64) length)*hid->length_factor;\r
+ h=(H>>32);\r
+ return h;\r
+ }\r
+ C>>=4;\r
+ H+=C;\r
+ M=hid->string_hid;\r
+ //Multiply H*M mod Prime61, possibly plus Prime,\r
+ //exploiting the structure of Prime61.\r
+ //We assume H<2*Prime61 and M<=Prime61, e.g.,\r
+ //H*M <2^123.\r
+ HxM00 = LOW(H)*LOW(M);\r
+ HxM01 = LOW(H)*HIGH(M);\r
+ HxM10 = HIGH(H)*LOW(M);\r
+ HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits\r
+ C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.\r
+ C0 = C0&Prime61; //Has at most 61 bits.\r
+ C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits\r
+ C1 = C1>>29; //Has at most 33 bits.\r
+ C2 = HxM11<<3; //Has at most 123-64+3=62 bits.\r
+ CC = C0+C1+C2; //At most 63 bits.\r
+ H = (CC&Prime61)+(CC>>61); //<2*Prime\r
+ }\r
+}\r
+\r
+inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {\r
+ gs_int32_t i;\r
+ VarStringHash32bit2univID hid;\r
+\r
+ hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));\r
+ for (i=0;i<16;i++) {\r
+ hid->int_str[i]=0;\r
+ hid->str_compr_hid[i]=RandINT64();\r
+ }\r
+ hid->string_hid = RandINT64() & Prime61;\r
+ // hid->last_compr_hid = RandINT64() | 0x1ull;\r
+ return hid;\r
+ }\r
+\r
+inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,\r
+ VarStringHash32bit2univID hid) {\r
+\r
+ INT32 h,i,l,d,str_ix;\r
+ INT64 C,H,M;\r
+ INT64 xh1,xh2,y;\r
+ INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;\r
+\r
+ // Assumes hid->int_str[*]==0\r
+\r
+ if (length<=60) {\r
+ memcpy(hid->int_str,x,length);\r
+ d=(length+3)>>2; //<16\r
+ h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);\r
+ for (i=0;i<d;i++) hid->int_str[i]=0;\r
+ return h;\r
+ }\r
+ H=0;\r
+ str_ix=0;\r
+ for (;;) {\r
+ C = 0;\r
+ l=length-str_ix;\r
+ if (l>64) l=64;\r
+ memcpy(hid->int_str,x+str_ix,l);\r
+ str_ix+=l;\r
+ d=(l+3)>>2;\r
+ i=0;\r
+ if (d&1) {\r
+ C = hid->int_str[i]*hid->str_compr_hid[i];\r
+ i++;\r
+ }\r
+ while (i<d) {\r
+ xh1=hid->int_str[i]+hid->str_compr_hid[i+1];\r
+ xh2=hid->int_str[i+1]+hid->str_compr_hid[i];\r
+ y=xh1*xh2;\r
+ C = C^y;\r
+ i+=2;\r
+ }\r
+ for (i=0;i<d;i++) hid->int_str[i]=0;\r
+ if (str_ix==(INT32) length) {\r
+ H<<=4;\r
+ H+=C;\r
+ H+=((INT64) length)*hid->length_factor;\r
+ h=(H>>32);\r
+ return h;\r
+ }\r
+ C>>=4;\r
+ H+=C;\r
+ M=hid->string_hid;\r
+ //Multiply H*M mod Prime61, possibly plus Prime,\r
+ //exploiting the structure of Prime61.\r
+ //We assume H<2*Prime61 and M<=Prime61, e.g.,\r
+ //H*M <2^123.\r
+ HxM00 = LOW(H)*LOW(M);\r
+ HxM01 = LOW(H)*HIGH(M);\r
+ HxM10 = HIGH(H)*LOW(M);\r
+ HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits\r
+ C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.\r
+ C0 = C0&Prime61; //Has at most 61 bits.\r
+ C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits\r
+ C1 = C1>>29; //Has at most 33 bits.\r
+ C2 = HxM11<<3; //Has at most 123-64+3=62 bits.\r
+ CC = C0+C1+C2; //At most 63 bits.\r
+ H = (CC&Prime61)+(CC>>61); //<2*Prime\r
+ }\r
+}\r
+\r
+\r
+#endif\r
+\r