-/* ------------------------------------------------\r
-Copyright 2014 AT&T Intellectual Property\r
- Licensed under the Apache License, Version 2.0 (the "License");\r
- you may not use this file except in compliance with the License.\r
- You may obtain a copy of the License at\r
-\r
- http://www.apache.org/licenses/LICENSE-2.0\r
-\r
- Unless required by applicable law or agreed to in writing, software\r
- distributed under the License is distributed on an "AS IS" BASIS,\r
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- See the License for the specific language governing permissions and\r
- limitations under the License.\r
- ------------------------------------------- */\r
-\r
-#ifndef _STRINGHASH_H\r
-#define _STRINGHASH_H\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-#include <limits.h>\r
-#include <stdint.h>\r
-#include <string.h>\r
-\r
-//Provides 5-universal hash functions.\r
-//Both a direct polynomial based one,\r
-//and the much faster tabulation based one.\r
-//\r
-//It also provides 2-universal string hashing.\r
-//For most applicatoins, introdcing a slight error,\r
-//if you want almost 5-uninversal hashing for strings,\r
-//you can do a 2-universal hashing into 32-bit integer\r
-//getting very few collissoins, and then use the above\r
-//5-universal one.\r
-\r
-\r
-\r
-typedef gs_uint8_t INT8;\r
-typedef gs_uint16_t INT16;\r
-typedef gs_uint32_t INT32;\r
-typedef gs_uint64_t INT64;\r
-\r
-typedef INT64 * Hash61bit5univID;\r
-typedef INT32** HashTab32bit5univID;\r
-typedef INT32** HashTab32bitLPID;\r
-typedef INT64** HashTab64bitLPID;\r
-typedef INT64 * Hash32bit2univID;\r
-typedef INT32 Hash32bitUnivID;\r
-\r
-const INT32 MAX_INT32 = UINT_MAX;\r
-\r
-typedef struct {\r
- INT64 str_compr_hid[16];\r
- INT64 string_hid;\r
- INT64 length_factor;\r
- // INT64 last_compr_hid;\r
-} VISHID;\r
-\r
-typedef VISHID * VarINT32StringHash32bit2univID;\r
-\r
-\r
-typedef struct {\r
- INT32 int_str[16];\r
- INT64 str_compr_hid[16];\r
- INT64 string_hid;\r
- INT64 length_factor;\r
- // INT64 last_compr_hid;\r
-} VSHID;\r
-\r
-typedef VSHID * VarStringHash32bit2univID;\r
-\r
-\r
-\r
-\r
-\r
-// different views of a 64-bit double word\r
-typedef union {\r
- INT64 as_int64;\r
- INT32 as_int32s[2];\r
- INT16 as_int16s[4];\r
-} int64views;\r
-\r
-const INT64 LowOnes = (((INT64)1)<<32)-1;\r
-\r
-#define LOW(x) ((x)&LowOnes) // extract lower 32 bits from INT64\r
-#define HIGH(x) ((x)>>32) // extract higher 32 bits from INT64\r
-\r
-const INT64 Prime61 = (((INT64)1)<<61) - 1;\r
-\r
-\r
-//Random numbers from random.org based on atmostpheric noise.\r
-INT64 Random64[200]= {0x285eb7722a62ce6eull,\r
- 0xa84c7463e2b7856bull,\r
- 0xb29100d6abcc8666ull,\r
- 0xf9bfca5b7461fb1full,\r
- 0x51c8dafc30c88dadull,\r
- 0x0687468c365ec51dull,\r
- 0x2bf2cd3ad64b6218ull,\r
- 0xc20565a4d1f00f9eull,\r
- 0x7d533575d313c658ull,\r
- 0xbf2fba6b00725b85ull,\r
- 0x4cfc2f6557e722beull,\r
- 0xedbce818556dfb2bull,\r
- 0xd9df508027db1bbeull,\r
- 0x21f2d922f712f48bull,\r
- 0xcd8b289d83a65804ull,\r
- 0x4a19cfb02445a6d2ull,\r
- 0xc95e56b1e19a4f94ull,\r
- 0xcfeeeaccaf477248ull,\r
- 0x4eec3378b73bb25cull,\r
- 0x18a3f38d1c48b2beull,\r
- 0x71b79ab5cb1e3730ull,\r
- 0x79cdb30e2f38309dull,\r
- 0xb41d4983bdbc8d6full,\r
- 0xba9f57c01b66b7e3ull,\r
- 0xe400503c95c16568ull,\r
- 0x5977bfd4630294f1ull,\r
- 0x57d4d7940099676full,\r
- 0xd945de9268f4b191ull,\r
- 0x4034711421eaf806ull,\r
- 0x6d8108a4a6d58c22ull,\r
- 0x5c421818ddbdd4eeull,\r
- 0xbd9b7e4071713c13ull,\r
- 0xa60d1d6e793e5eb2ull,\r
- 0x7443fb031b8ec6c7ull,\r
- 0xd8290c7120e05d4aull,\r
- 0x797fb1d9a6a8d27full,\r
- 0x543ec268ab1f2e45ull,\r
- 0xcaf2a6139701f320ull,\r
- 0x9519611d130bee47ull,\r
- 0x19bbc533f018be1aull,\r
- 0xdbfacdfeb573133dull,\r
- 0x3255dacc4c7bfe12ull,\r
- 0xbc6c9228e5518f6eull,\r
- 0xb5c1681037347178ull,\r
- 0xbaaa2cfef186bfadull,\r
- 0x1834b8ab0f9e876eull,\r
- 0x9d7b7f228433e0f7ull,\r
- 0xa99cc292d003dd09ull,\r
- 0xc0cb8037046b5295ull,\r
- 0xa6ffa3d4671aa3d2ull,\r
- 0xc27023fbee2862e6ull,\r
- 0x5a9877bcc4bd3172ull,\r
- 0xfcb0da3caf9fcfe0ull,\r
- 0xc35ef57e1866ceaeull,\r
- 0xd4f7c927d169a115ull,\r
- 0x699054518fc74756ull,\r
- 0xa75cbf617fc9db8dull,\r
- 0x7f3adf4369665a9cull,\r
- 0x6b98eeeb4c517f42ull,\r
- 0xa12e44f5de954f24ull,\r
- 0x5789ded4dced978eull,\r
- 0xe4dd20ed27cd3567ull,\r
- 0x9b4e90c365b8790bull,\r
- 0xd486ed6e099f499bull,\r
- 0x3f3d0ccfeaa0c0dcull,\r
- 0x548c746cdb192adaull,\r
- 0x8ce636d8469fe2dcull,\r
- 0xca59ec929549a6a1ull,\r
- 0x647b9878deaba1f0ull,\r
- 0xebbb4b2641c54d34ull,\r
- 0x7be6d2918b680abdull,\r
- 0x02ad265fb4733490ull,\r
- 0xfe1053044faf3486ull,\r
- 0x539ea358ff6b6df3ull,\r
- 0x025d73224a2b5826ull,\r
- 0x7daad302451f41b3ull,\r
- 0x6038455ddb535976ull,\r
- 0x8d6d00a9a728a067ull,\r
- 0xe9f03d61d4965d59ull,\r
- 0x38314b8102daff3bull,\r
- 0x56b335e7893a76f1ull,\r
- 0x1048ca2f415712abull,\r
- 0xa9bc989a891dc173ull,\r
- 0xb741df3ae02836c2ull,\r
- 0x7711e6c6f5830783ull,\r
- 0x8edbf2be9226e24bull,\r
- 0xe4a4b8ba310fc2e2ull,\r
- 0xbc7b67f4a02f23c8ull,\r
- 0x5669b1a9d6d8df17ull,\r
- 0xdd3ebf2e3c516e26ull,\r
- 0x77bdd6def5236c4full,\r
- 0x9aeb54bdffacd65eull,\r
- 0xab676483404a21b8ull,\r
- 0xf7270f77a9d1b3a3ull,\r
- 0x3794e1cdcc7de433ull,\r
- 0x8e2b74d3a06aa56aull,\r
- 0x572698d05b901d40ull,\r
- 0x7bd6c265c1dd5cdfull,\r
- 0xd2f68a53970db82eull,\r
- 0x0e1d5f5dd9bd23bdull,\r
- 0x48814c6813505051ull,\r
- 0x8f2d21a7d4e4a481ull,\r
- 0x144531c871920cf3ull,\r
- 0x45c6f81c7fbc3b2full,\r
- 0xc562f1fc7f07a944ull,\r
- 0xaabdf3d0aa9872e4ull,\r
- 0x8db1f9d827c8df98ull,\r
- 0xea787210e13c16c7ull,\r
- 0xd43f6f582629ff39ull,\r
- 0x6ec7599da4cb298dull,\r
- 0xfa99d7196097dd94ull,\r
- 0xbe3a8a172a62f40dull,\r
- 0x07477bc03d9d5471ull,\r
- 0x03777d1ee44c0fa6ull,\r
- 0x5c6df847b0ae6fd1ull,\r
- 0xc1fc3bc2352d8125ull,\r
- 0x6800e3321d35b697ull,\r
- 0x51cc735e1b0920a8ull,\r
- 0x93dc60a2430c11acull,\r
- 0x13b6bf8ffc4e9e30ull,\r
- 0xada08114e6a01701ull,\r
- 0x1459b7a4254da4cdull,\r
- 0xf7575cd7ededcdfdull,\r
- 0x1e43675ed5ed33eeull,\r
- 0x639f5ff579cc30d4ull,\r
- 0x8f4f75d7eea7e300ull,\r
- 0x518939accd43dadeull,\r
- 0x989a77577fac24e2ull,\r
- 0x86c5e3998c819d51ull,\r
- 0xb84770f9cc15d139ull,\r
- 0x11544010174c2c99ull,\r
- 0xfb238b962405a579ull,\r
- 0xca5bddde9b80cb7bull,\r
- 0xbbe71928190dfab4ull,\r
- 0x0ec620294742b8c8ull,\r
- 0x90bc7a703ed63900ull,\r
- 0x2c8a80e1e85f72bbull,\r
- 0xf19ab262ed2ad914ull,\r
- 0xa233e89fab33793cull,\r
- 0x75b6f59568a958f5ull,\r
- 0x31fb15af9a41aca2ull,\r
- 0xf89db05aa21b3d1cull,\r
- 0x0023d08c52147f63ull,\r
- 0x8e0e3c2aba81e8fdull,\r
- 0xdb2efd057c157e71ull,\r
- 0x1a1797a83e39dfe4ull,\r
- 0x3afc43e1979f507dull,\r
- 0x0e647a8d8abe0936ull,\r
- 0xc8e03f27082eb0daull,\r
- 0xc1b0c11db52da528ull,\r
- 0xadeb52a144f497ceull,\r
- 0x046fc2f53bd97c9bull,\r
- 0x2d587fdf5fae80d5ull,\r
- 0x14036c3fb77fd73eull,\r
- 0x883f768734d14676ull,\r
- 0x212df988a83caffeull,\r
- 0x36b41dac32387e17ull,\r
- 0x9dbee8d7cf804bccull,\r
- 0x34d9a58fb9a35794ull,\r
- 0xda82beba879e3576ull,\r
- 0x0366148280e5adf1ull,\r
- 0x634085dcdba7b174ull,\r
- 0x8451523252446534ull,\r
- 0xc74ce8b4d5175b9aull,\r
- 0x40afebd30a9a4837ull,\r
- 0x7232952ce84e90beull,\r
- 0x7443a6e37b87992dull,\r
- 0xace8a33649218d9aull,\r
- 0x9ad8af9614b75655ull,\r
- 0xd518d9a700179ac2ull,\r
- 0xc7f906d90fd36259ull,\r
- 0xe2c47ec3abd8a12dull,\r
- 0xca05e96fcbbb76c1ull,\r
- 0x7186661c9ddf4973ull,\r
- 0x0146f5f47d5f42c2ull,\r
- 0x74a7c485608ea178ull,\r
- 0x8cb5e3e5f84f3040ull,\r
- 0xb330869dc366037dull,\r
- 0x0dbc4d22f932bbd4ull,\r
- 0xa8460b43946a5ecbull,\r
- 0xcee72c72fe5ddea6ull,\r
- 0x9f953cc7859b2a4eull,\r
- 0x95ea396619e60182ull,\r
- 0xfe9890efa6aa8c3eull,\r
- 0x3c9a5691d1e25799ull,\r
- 0xa54eeb19aa718b62ull,\r
- 0x69907b2ec11d82b7ull,\r
- 0x408c5405984caa65ull,\r
- 0x6f11a21711640470ull,\r
- 0x20b9227a8f53f9aeull,\r
- 0x42df63b7ec442178ull,\r
- 0xbec247f79407d00aull,\r
- 0x2946c31558eab2bcull,\r
- 0x84d045ffa174048eull,\r
- 0x28bad532ba4a450cull,\r
- 0x93ccf0cbaa274d7aull,\r
- 0xff30008411d25158ull,\r
- 0x3845ce6dcc30080aull,\r
- 0xcf5925239b6f3dbaull,\r
- 0x1f148649ed53660dull};\r
-\r
-\r
-\r
-\r
-\r
-\r
-gs_int32_t nextrandom = 0;\r
-gs_int32_t endrandom = 100;\r
-\r
-\r
-// The first "endrandom" numbers returned are truely random INT64.\r
-// They are perfect for bases of new hash functions, and currently\r
-// we have just enough such random numbers for tabulation based\r
-// 5-universal hashing, or one of the other schemes provided.\r
-// The remaining ones are based on the pseudo-random mrand48() which\r
-// returns a signed 32-bit number. We do not use the unsigned lrand48()\r
-// which only returns 31-bit.\r
-inline static INT64 RandINT64() {\r
- INT64 r,r1;\r
- if (nextrandom<endrandom) r=Random64[nextrandom++];\r
- else {\r
- if (nextrandom==endrandom)\r
- fprintf(stderr,"Switching from random to pseudo-random in Rand64");\r
- r =(INT64) mrand48();\r
- r1 = (INT64) mrand48();\r
- r=r<<32;\r
- r=r^r1;\r
- }\r
- return r;\r
-}\r
-\r
-\r
-/* Computes ax+b mod Prime, possibly plus Prime,\r
- exploiting the structure of Prime. */\r
-inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {\r
- INT64 a0,a1,c0,c1,c;\r
- a0 = LOW(a)*x;\r
- a1 = HIGH(a)*x;\r
- c0 = a0+(a1<<32);\r
- c1 = (a0>>32)+a1;\r
- c = (c0&Prime61)+(c1>>29)+b;\r
- return c;\r
-} // 12 instructions\r
-\r
-/* CWtrick for 32-bit key x with prime 2^61-1 */\r
-inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {\r
- INT64 h;\r
- gs_int32_t i;\r
-\r
- h = index[0];\r
- for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);\r
- h = (h&Prime61)+(h>>61);\r
- if (h>=Prime61) h-=Prime61;\r
- return h;\r
-}\r
-\r
-\r
-\r
-inline static INT64* InitHash61bit5univ() {\r
- gs_int32_t i;\r
-\r
- Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));\r
- for (i=0;i<5;i++) {\r
- hid[i]=RandINT64();\r
- hid[i] = (hid[i]&Prime61)+(hid[i]>>61);\r
- if (hid[i]>=Prime61) hid[i]-=Prime61;\r
- }\r
- return hid;\r
-}\r
-\r
-inline static HashTab32bit5univID InitHashTab32bit5univ() {\r
- gs_uint32_t i,j;\r
- HashTab32bit5univID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));\r
- for (i=0;i<3;i++) {\r
- htab[i] = (INT32*) malloc(65538*sizeof(INT32));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- }\r
- return htab;\r
-}\r
-\r
-/* tabulation based hashing for 32-bit key x using 16-bit characters.*/\r
-inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {\r
- INT32 x0, x1, x2;\r
- x0 = x&65535;\r
- x1 = x>>16;\r
- x2 = x0 + x1;\r
- x2 = 2 - (x2>>16) + (x2&65535); // optional compression\r
- return htab[0][x0]^htab[1][x1]^htab[2][x2];\r
-} // 8 + 4 = 12 instructions\r
-\r
-\r
-/* Tabulation based hashing for 32-bit keys using 16-bit characters.*/\r
-/* Uses 500KB tables and makes 2 lookups per hash. */\r
-\r
-inline static HashTab32bitLPID InitHashShortTab32bitLP() {\r
- gs_uint32_t i,j;\r
- HashTab32bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));\r
- for (i=0;i<2;i++) {\r
- htab[i] = (INT32*) malloc(65536*sizeof(INT32));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- }\r
- return htab;\r
-}\r
-\r
-/* tabulation based hashing for 32-bit key x using 16-bit characters.*/\r
-inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
- INT32 x0, x1;\r
- x0 = x&65535;\r
- x1 = x>>16;\r
- return htab[0][x0]^htab[1][x1];\r
-}\r
-\r
-\r
-/* Tabulation based hashing for 32-bit keys using 8-bit characters.*/\r
-/* Uses 4KB tables and makes 4 lookups per hash. */\r
-\r
-inline static HashTab32bitLPID InitHashCharTab32bitLP() {\r
- gs_uint32_t i,j;\r
- HashTab32bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));\r
- for (i=0;i<4;i++) {\r
- htab[i] = (INT32*) malloc(256*sizeof(INT32));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- }\r
- return htab;\r
-}\r
-\r
-inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
- INT8 c;\r
- INT32 h;\r
- gs_int32_t i;\r
-\r
- c=x;\r
- h=htab[0][c];\r
- for (i=1;i<4;i++) {\r
- x>>=8;\r
- c=x;\r
- h=h^htab[i][c];\r
- }\r
- return h;\r
-}\r
-\r
-\r
-/* Tabulation based hashing for 64-bit keys using 16-bit characters.*/\r
-/* Uses 2MB tables and makes 4 lookups per hash. */\r
-\r
-\r
-inline static HashTab64bitLPID InitHashShortTab64bitLP() {\r
- gs_int32_t i,j;\r
- HashTab64bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));\r
- for (i=0;i<4;i++) {\r
- htab[i] = (INT64*) malloc(65536*sizeof(INT64));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
- }\r
- return htab;\r
-}\r
-\r
-inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
- INT16 c;\r
- INT64 h;\r
- gs_int32_t i;\r
-\r
- c=x;\r
- h=htab[0][c];\r
- for (i=1;i<4;i++) {\r
- x>>=16;\r
- c=x;\r
- h=h^htab[i][c];\r
- }\r
- return h;\r
-}\r
-\r
-/* Tabulation based hashing for 64-bit keys using 8-bit characters.*/\r
-/* Uses 14KB tables and makes 8 lookups per hash. */\r
-\r
-inline static HashTab64bitLPID InitHashCharTab64bitLP() {\r
- gs_int32_t i,j;\r
- HashTab64bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));\r
- for (i=0;i<8;i++) {\r
- htab[i] = (INT64*) malloc(256*sizeof(INT64));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
- }\r
- return htab;\r
-}\r
-\r
-inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
- INT8 c;\r
- INT64 h;\r
- gs_int32_t i;\r
-\r
- c=x;\r
- h=htab[0][c];\r
- for (i=1;i<8;i++) {\r
- x>>=8;\r
- c=x;\r
- h=h^htab[i][c];\r
- }\r
- return h;\r
-}\r
-\r
-\r
-/* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/\r
-/* Uses 20KB tables and makes 3 lookups per hash. */\r
-\r
-inline static HashTab32bitLPID InitHashMipTab32bitLP() {\r
- gs_int32_t i,j;\r
- HashTab32bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));\r
- for (i=0;i<2;i++) {\r
- htab[i] = (INT32*) malloc(0x800*sizeof(INT32));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);\r
- }\r
- for (i=2;i<3;i++) {\r
- htab[i] = (INT32*) malloc(0x400*sizeof(INT32));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);\r
- }\r
- return htab;\r
-}\r
-\r
-inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {\r
- INT32 c;\r
- INT32 h;\r
- gs_int32_t i;\r
-\r
- h=0;\r
- for (i=0;i<2;i++) {\r
- c= x & 0x7FFull;\r
- h=h^htab[i][c];\r
- x>>=11;\r
- }\r
- c= x & 0x3FFull;\r
- h=h^htab[2][c];\r
- return h;\r
-}\r
-\r
-/* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/\r
-/* Uses 80KB tables and makes 6 lookups per hash. */\r
-\r
-inline static HashTab64bitLPID InitHashMipTab64bitLP() {\r
- gs_int32_t i,j;\r
- HashTab64bitLPID htab;\r
- Hash61bit5univID h61id;\r
-\r
-\r
- htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));\r
- for (i=0;i<4;i++) {\r
- htab[i] = (INT64*) malloc(0x800*sizeof(INT64));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
- }\r
- for (i=4;i<6;i++) {\r
- htab[i] = (INT64*) malloc(0x400*sizeof(INT64));\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);\r
- h61id = InitHash61bit5univ();\r
- for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;\r
- }\r
- return htab;\r
-}\r
-\r
-inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {\r
- INT64 c;\r
- INT64 h;\r
- gs_int32_t i;\r
-\r
- h=0;\r
- for (i=0;i<4;i++) {\r
- c= x & 0x7FFull;\r
- h=h^htab[i][c];\r
- x>>=11;\r
- }\r
- for (i=4;i<6;i++) {\r
- c= x & 0x3FFull;\r
- h=h^htab[i][c];\r
- x>>=10;\r
- }\r
- return h;\r
-}\r
-\r
-\r
-\r
-inline static Hash32bit2univID InitHash32bit2univ() {\r
- Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));\r
- hid[0]=RandINT64();\r
- hid[1]=RandINT64();\r
- return hid;\r
-}\r
-\r
-inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {\r
- INT64 H;\r
- INT32 h;\r
- H = (x*hid[0])+hid[1];\r
- h = H >> 32;\r
- return h;\r
-}\r
-\r
-\r
-// For the string hashing below is for string of 32bit integers, and\r
-// the output is a 2-universal 32-bit number.\r
-\r
-inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {\r
- gs_int32_t i;\r
- Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));\r
- for (i=0;i<=length;i++) hid[i]=RandINT64();\r
- return hid;\r
-}\r
-\r
-\r
-\r
-//assumes string length even\r
-inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,\r
- Hash32bit2univID hid) {\r
- gs_int32_t i;\r
- INT64 H;\r
- INT32 h;\r
- INT64 xh1,xh2,y;\r
-\r
- H=0;\r
- for (i=0;i<length;i+=2) {\r
- xh1=x[i]+hid[i+1];\r
- xh2=x[i+1]+hid[i];\r
- y=xh1*xh2;\r
- H = H^y;\r
- }\r
- H=H^hid[length];\r
- h = H >> 32;\r
- return h;\r
-}\r
-\r
-//assumes string length odd\r
-inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,\r
- Hash32bit2univID hid) {\r
- gs_int32_t i;\r
- INT64 H;\r
- INT32 h;\r
- INT64 xh1,xh2,y;\r
-\r
- H = x[0]*hid[0];\r
- for (i=1;i<length;i+=2) {\r
- xh1=x[i]+hid[i+1];\r
- xh2=x[i+1]+hid[i];\r
- y=xh1*xh2;\r
- H = H^y;\r
- }\r
- H=H^hid[length];\r
- h = H >> 32;\r
- return h;\r
-}\r
-\r
-//Below we have the generic algorithm for fixed length string hashing\r
-//For shorter strings of length < 6, it is worthwhile using\r
-//specialized versions with fewer tests.\r
-//All versions rely on the some initialization above.\r
-inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {\r
-// gs_int32_t i;\r
-// INT64 H=0;\r
-// INT32 h;\r
-// for (i=0;i<length;i++) H = H^(x[i]*hid[i]);\r
-// H=H^hid[length];\r
-// h = H >> 32;\r
-// return h;\r
- if (length&1) return OddStringHash32bit2univ(x,length,hid);\r
- else return EvenStringHash32bit2univ(x,length,hid);\r
-}\r
-\r
-\r
-//string of length 2\r
-inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {\r
- INT64 H;\r
- INT32 h;\r
- INT64 xh1,xh2;\r
-\r
- xh1=x[0]+hid[1];\r
- xh2=x[1]+hid[0];\r
- H=xh1*xh2;\r
- H=H^hid[2];\r
- h = H >> 32;\r
- return h;\r
-}\r
-\r
-//string of length 3\r
-inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {\r
- INT64 H;\r
- INT32 h;\r
- INT64 xh1,xh2,y;\r
-\r
- H = x[0]*hid[0];\r
- xh1=x[1]+hid[2];\r
- xh2=x[2]+hid[1];\r
- y=xh1*xh2;\r
- H = H^y;\r
- H=H^hid[3];\r
- h = H >> 32;\r
- return h;\r
-}\r
-\r
-\r
-inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {\r
- gs_int32_t i;\r
- VarINT32StringHash32bit2univID hid;\r
-\r
- hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));\r
- for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();\r
- hid->string_hid = RandINT64() & Prime61;\r
- // hid->last_compr_hid = RandINT64() | 0x1ull;\r
- return hid;\r
- }\r
-\r
-inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {\r
-\r
- INT32 h,i,j,d;\r
- gs_int32_t e;\r
- INT64 C,H,M;\r
- INT64 xh1,xh2,y;\r
- INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;\r
-\r
- if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);\r
- H=0;\r
- i=0;\r
- for (;;) {\r
- d=length-i;\r
- C = 0;\r
- j=0;\r
- if (d>16) d=16;\r
- else if (d&1) {\r
- C = x[i]*hid->str_compr_hid[j];\r
- i++;\r
- j++;\r
- }\r
- for (e=((int) (d>>1));e>0;e--) {\r
- xh1=x[i]+hid->str_compr_hid[j+1];\r
- xh2=x[i+1]+hid->str_compr_hid[j];\r
- y=xh1*xh2;\r
- C = C^y;\r
- i+=2;\r
- j+=2;\r
- }\r
- if (i==(INT32) length) {\r
- H<<=4;\r
- H+=C;\r
- H+=((INT64) length)*hid->length_factor;\r
- h=(H>>32);\r
- return h;\r
- }\r
- C>>=4;\r
- H+=C;\r
- M=hid->string_hid;\r
- //Multiply H*M mod Prime61, possibly plus Prime,\r
- //exploiting the structure of Prime61.\r
- //We assume H<2*Prime61 and M<=Prime61, e.g.,\r
- //H*M <2^123.\r
- HxM00 = LOW(H)*LOW(M);\r
- HxM01 = LOW(H)*HIGH(M);\r
- HxM10 = HIGH(H)*LOW(M);\r
- HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits\r
- C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.\r
- C0 = C0&Prime61; //Has at most 61 bits.\r
- C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits\r
- C1 = C1>>29; //Has at most 33 bits.\r
- C2 = HxM11<<3; //Has at most 123-64+3=62 bits.\r
- CC = C0+C1+C2; //At most 63 bits.\r
- H = (CC&Prime61)+(CC>>61); //<2*Prime\r
- }\r
-}\r
-\r
-inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {\r
- gs_int32_t i;\r
- VarStringHash32bit2univID hid;\r
-\r
- hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));\r
- for (i=0;i<16;i++) {\r
- hid->int_str[i]=0;\r
- hid->str_compr_hid[i]=RandINT64();\r
- }\r
- hid->string_hid = RandINT64() & Prime61;\r
- // hid->last_compr_hid = RandINT64() | 0x1ull;\r
- return hid;\r
- }\r
-\r
-inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,\r
- VarStringHash32bit2univID hid) {\r
-\r
- INT32 h,i,l,d,str_ix;\r
- INT64 C,H,M;\r
- INT64 xh1,xh2,y;\r
- INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;\r
-\r
- // Assumes hid->int_str[*]==0\r
-\r
- if (length<=60) {\r
- memcpy(hid->int_str,x,length);\r
- d=(length+3)>>2; //<16\r
- h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);\r
- for (i=0;i<d;i++) hid->int_str[i]=0;\r
- return h;\r
- }\r
- H=0;\r
- str_ix=0;\r
- for (;;) {\r
- C = 0;\r
- l=length-str_ix;\r
- if (l>64) l=64;\r
- memcpy(hid->int_str,x+str_ix,l);\r
- str_ix+=l;\r
- d=(l+3)>>2;\r
- i=0;\r
- if (d&1) {\r
- C = hid->int_str[i]*hid->str_compr_hid[i];\r
- i++;\r
- }\r
- while (i<d) {\r
- xh1=hid->int_str[i]+hid->str_compr_hid[i+1];\r
- xh2=hid->int_str[i+1]+hid->str_compr_hid[i];\r
- y=xh1*xh2;\r
- C = C^y;\r
- i+=2;\r
- }\r
- for (i=0;i<d;i++) hid->int_str[i]=0;\r
- if (str_ix==(INT32) length) {\r
- H<<=4;\r
- H+=C;\r
- H+=((INT64) length)*hid->length_factor;\r
- h=(H>>32);\r
- return h;\r
- }\r
- C>>=4;\r
- H+=C;\r
- M=hid->string_hid;\r
- //Multiply H*M mod Prime61, possibly plus Prime,\r
- //exploiting the structure of Prime61.\r
- //We assume H<2*Prime61 and M<=Prime61, e.g.,\r
- //H*M <2^123.\r
- HxM00 = LOW(H)*LOW(M);\r
- HxM01 = LOW(H)*HIGH(M);\r
- HxM10 = HIGH(H)*LOW(M);\r
- HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits\r
- C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.\r
- C0 = C0&Prime61; //Has at most 61 bits.\r
- C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits\r
- C1 = C1>>29; //Has at most 33 bits.\r
- C2 = HxM11<<3; //Has at most 123-64+3=62 bits.\r
- CC = C0+C1+C2; //At most 63 bits.\r
- H = (CC&Prime61)+(CC>>61); //<2*Prime\r
- }\r
-}\r
-\r
-\r
-#endif\r
-\r
+/* ------------------------------------------------
+Copyright 2014 AT&T Intellectual Property
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ------------------------------------------- */
+
+#ifndef _STRINGHASH_H
+#define _STRINGHASH_H
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+//Provides 5-universal hash functions.
+//Both a direct polynomial based one,
+//and the much faster tabulation based one.
+//
+//It also provides 2-universal string hashing.
+//For most applicatoins, introdcing a slight error,
+//if you want almost 5-uninversal hashing for strings,
+//you can do a 2-universal hashing into 32-bit integer
+//getting very few collissoins, and then use the above
+//5-universal one.
+
+
+
+typedef gs_uint8_t INT8;
+typedef gs_uint16_t INT16;
+typedef gs_uint32_t INT32;
+typedef gs_uint64_t INT64;
+
+typedef INT64 * Hash61bit5univID;
+typedef INT32** HashTab32bit5univID;
+typedef INT32** HashTab32bitLPID;
+typedef INT64** HashTab64bitLPID;
+typedef INT64 * Hash32bit2univID;
+typedef INT32 Hash32bitUnivID;
+
+const INT32 MAX_INT32 = UINT_MAX;
+
+typedef struct {
+ INT64 str_compr_hid[16];
+ INT64 string_hid;
+ INT64 length_factor;
+ // INT64 last_compr_hid;
+} VISHID;
+
+typedef VISHID * VarINT32StringHash32bit2univID;
+
+
+typedef struct {
+ INT32 int_str[16];
+ INT64 str_compr_hid[16];
+ INT64 string_hid;
+ INT64 length_factor;
+ // INT64 last_compr_hid;
+} VSHID;
+
+typedef VSHID * VarStringHash32bit2univID;
+
+
+
+
+
+// different views of a 64-bit double word
+typedef union {
+ INT64 as_int64;
+ INT32 as_int32s[2];
+ INT16 as_int16s[4];
+} int64views;
+
+const INT64 LowOnes = (((INT64)1)<<32)-1;
+
+#define LOW(x) ((x)&LowOnes) // extract lower 32 bits from INT64
+#define HIGH(x) ((x)>>32) // extract higher 32 bits from INT64
+
+const INT64 Prime61 = (((INT64)1)<<61) - 1;
+
+
+//Random numbers from random.org based on atmostpheric noise.
+INT64 Random64[200]= {0x285eb7722a62ce6eull,
+ 0xa84c7463e2b7856bull,
+ 0xb29100d6abcc8666ull,
+ 0xf9bfca5b7461fb1full,
+ 0x51c8dafc30c88dadull,
+ 0x0687468c365ec51dull,
+ 0x2bf2cd3ad64b6218ull,
+ 0xc20565a4d1f00f9eull,
+ 0x7d533575d313c658ull,
+ 0xbf2fba6b00725b85ull,
+ 0x4cfc2f6557e722beull,
+ 0xedbce818556dfb2bull,
+ 0xd9df508027db1bbeull,
+ 0x21f2d922f712f48bull,
+ 0xcd8b289d83a65804ull,
+ 0x4a19cfb02445a6d2ull,
+ 0xc95e56b1e19a4f94ull,
+ 0xcfeeeaccaf477248ull,
+ 0x4eec3378b73bb25cull,
+ 0x18a3f38d1c48b2beull,
+ 0x71b79ab5cb1e3730ull,
+ 0x79cdb30e2f38309dull,
+ 0xb41d4983bdbc8d6full,
+ 0xba9f57c01b66b7e3ull,
+ 0xe400503c95c16568ull,
+ 0x5977bfd4630294f1ull,
+ 0x57d4d7940099676full,
+ 0xd945de9268f4b191ull,
+ 0x4034711421eaf806ull,
+ 0x6d8108a4a6d58c22ull,
+ 0x5c421818ddbdd4eeull,
+ 0xbd9b7e4071713c13ull,
+ 0xa60d1d6e793e5eb2ull,
+ 0x7443fb031b8ec6c7ull,
+ 0xd8290c7120e05d4aull,
+ 0x797fb1d9a6a8d27full,
+ 0x543ec268ab1f2e45ull,
+ 0xcaf2a6139701f320ull,
+ 0x9519611d130bee47ull,
+ 0x19bbc533f018be1aull,
+ 0xdbfacdfeb573133dull,
+ 0x3255dacc4c7bfe12ull,
+ 0xbc6c9228e5518f6eull,
+ 0xb5c1681037347178ull,
+ 0xbaaa2cfef186bfadull,
+ 0x1834b8ab0f9e876eull,
+ 0x9d7b7f228433e0f7ull,
+ 0xa99cc292d003dd09ull,
+ 0xc0cb8037046b5295ull,
+ 0xa6ffa3d4671aa3d2ull,
+ 0xc27023fbee2862e6ull,
+ 0x5a9877bcc4bd3172ull,
+ 0xfcb0da3caf9fcfe0ull,
+ 0xc35ef57e1866ceaeull,
+ 0xd4f7c927d169a115ull,
+ 0x699054518fc74756ull,
+ 0xa75cbf617fc9db8dull,
+ 0x7f3adf4369665a9cull,
+ 0x6b98eeeb4c517f42ull,
+ 0xa12e44f5de954f24ull,
+ 0x5789ded4dced978eull,
+ 0xe4dd20ed27cd3567ull,
+ 0x9b4e90c365b8790bull,
+ 0xd486ed6e099f499bull,
+ 0x3f3d0ccfeaa0c0dcull,
+ 0x548c746cdb192adaull,
+ 0x8ce636d8469fe2dcull,
+ 0xca59ec929549a6a1ull,
+ 0x647b9878deaba1f0ull,
+ 0xebbb4b2641c54d34ull,
+ 0x7be6d2918b680abdull,
+ 0x02ad265fb4733490ull,
+ 0xfe1053044faf3486ull,
+ 0x539ea358ff6b6df3ull,
+ 0x025d73224a2b5826ull,
+ 0x7daad302451f41b3ull,
+ 0x6038455ddb535976ull,
+ 0x8d6d00a9a728a067ull,
+ 0xe9f03d61d4965d59ull,
+ 0x38314b8102daff3bull,
+ 0x56b335e7893a76f1ull,
+ 0x1048ca2f415712abull,
+ 0xa9bc989a891dc173ull,
+ 0xb741df3ae02836c2ull,
+ 0x7711e6c6f5830783ull,
+ 0x8edbf2be9226e24bull,
+ 0xe4a4b8ba310fc2e2ull,
+ 0xbc7b67f4a02f23c8ull,
+ 0x5669b1a9d6d8df17ull,
+ 0xdd3ebf2e3c516e26ull,
+ 0x77bdd6def5236c4full,
+ 0x9aeb54bdffacd65eull,
+ 0xab676483404a21b8ull,
+ 0xf7270f77a9d1b3a3ull,
+ 0x3794e1cdcc7de433ull,
+ 0x8e2b74d3a06aa56aull,
+ 0x572698d05b901d40ull,
+ 0x7bd6c265c1dd5cdfull,
+ 0xd2f68a53970db82eull,
+ 0x0e1d5f5dd9bd23bdull,
+ 0x48814c6813505051ull,
+ 0x8f2d21a7d4e4a481ull,
+ 0x144531c871920cf3ull,
+ 0x45c6f81c7fbc3b2full,
+ 0xc562f1fc7f07a944ull,
+ 0xaabdf3d0aa9872e4ull,
+ 0x8db1f9d827c8df98ull,
+ 0xea787210e13c16c7ull,
+ 0xd43f6f582629ff39ull,
+ 0x6ec7599da4cb298dull,
+ 0xfa99d7196097dd94ull,
+ 0xbe3a8a172a62f40dull,
+ 0x07477bc03d9d5471ull,
+ 0x03777d1ee44c0fa6ull,
+ 0x5c6df847b0ae6fd1ull,
+ 0xc1fc3bc2352d8125ull,
+ 0x6800e3321d35b697ull,
+ 0x51cc735e1b0920a8ull,
+ 0x93dc60a2430c11acull,
+ 0x13b6bf8ffc4e9e30ull,
+ 0xada08114e6a01701ull,
+ 0x1459b7a4254da4cdull,
+ 0xf7575cd7ededcdfdull,
+ 0x1e43675ed5ed33eeull,
+ 0x639f5ff579cc30d4ull,
+ 0x8f4f75d7eea7e300ull,
+ 0x518939accd43dadeull,
+ 0x989a77577fac24e2ull,
+ 0x86c5e3998c819d51ull,
+ 0xb84770f9cc15d139ull,
+ 0x11544010174c2c99ull,
+ 0xfb238b962405a579ull,
+ 0xca5bddde9b80cb7bull,
+ 0xbbe71928190dfab4ull,
+ 0x0ec620294742b8c8ull,
+ 0x90bc7a703ed63900ull,
+ 0x2c8a80e1e85f72bbull,
+ 0xf19ab262ed2ad914ull,
+ 0xa233e89fab33793cull,
+ 0x75b6f59568a958f5ull,
+ 0x31fb15af9a41aca2ull,
+ 0xf89db05aa21b3d1cull,
+ 0x0023d08c52147f63ull,
+ 0x8e0e3c2aba81e8fdull,
+ 0xdb2efd057c157e71ull,
+ 0x1a1797a83e39dfe4ull,
+ 0x3afc43e1979f507dull,
+ 0x0e647a8d8abe0936ull,
+ 0xc8e03f27082eb0daull,
+ 0xc1b0c11db52da528ull,
+ 0xadeb52a144f497ceull,
+ 0x046fc2f53bd97c9bull,
+ 0x2d587fdf5fae80d5ull,
+ 0x14036c3fb77fd73eull,
+ 0x883f768734d14676ull,
+ 0x212df988a83caffeull,
+ 0x36b41dac32387e17ull,
+ 0x9dbee8d7cf804bccull,
+ 0x34d9a58fb9a35794ull,
+ 0xda82beba879e3576ull,
+ 0x0366148280e5adf1ull,
+ 0x634085dcdba7b174ull,
+ 0x8451523252446534ull,
+ 0xc74ce8b4d5175b9aull,
+ 0x40afebd30a9a4837ull,
+ 0x7232952ce84e90beull,
+ 0x7443a6e37b87992dull,
+ 0xace8a33649218d9aull,
+ 0x9ad8af9614b75655ull,
+ 0xd518d9a700179ac2ull,
+ 0xc7f906d90fd36259ull,
+ 0xe2c47ec3abd8a12dull,
+ 0xca05e96fcbbb76c1ull,
+ 0x7186661c9ddf4973ull,
+ 0x0146f5f47d5f42c2ull,
+ 0x74a7c485608ea178ull,
+ 0x8cb5e3e5f84f3040ull,
+ 0xb330869dc366037dull,
+ 0x0dbc4d22f932bbd4ull,
+ 0xa8460b43946a5ecbull,
+ 0xcee72c72fe5ddea6ull,
+ 0x9f953cc7859b2a4eull,
+ 0x95ea396619e60182ull,
+ 0xfe9890efa6aa8c3eull,
+ 0x3c9a5691d1e25799ull,
+ 0xa54eeb19aa718b62ull,
+ 0x69907b2ec11d82b7ull,
+ 0x408c5405984caa65ull,
+ 0x6f11a21711640470ull,
+ 0x20b9227a8f53f9aeull,
+ 0x42df63b7ec442178ull,
+ 0xbec247f79407d00aull,
+ 0x2946c31558eab2bcull,
+ 0x84d045ffa174048eull,
+ 0x28bad532ba4a450cull,
+ 0x93ccf0cbaa274d7aull,
+ 0xff30008411d25158ull,
+ 0x3845ce6dcc30080aull,
+ 0xcf5925239b6f3dbaull,
+ 0x1f148649ed53660dull};
+
+
+
+
+
+
+gs_int32_t nextrandom = 0;
+gs_int32_t endrandom = 100;
+
+
+// The first "endrandom" numbers returned are truely random INT64.
+// They are perfect for bases of new hash functions, and currently
+// we have just enough such random numbers for tabulation based
+// 5-universal hashing, or one of the other schemes provided.
+// The remaining ones are based on the pseudo-random mrand48() which
+// returns a signed 32-bit number. We do not use the unsigned lrand48()
+// which only returns 31-bit.
+inline static INT64 RandINT64() {
+ INT64 r,r1;
+ if (nextrandom<endrandom) r=Random64[nextrandom++];
+ else {
+ if (nextrandom==endrandom)
+ fprintf(stderr,"Switching from random to pseudo-random in Rand64");
+ r =(INT64) mrand48();
+ r1 = (INT64) mrand48();
+ r=r<<32;
+ r=r^r1;
+ }
+ return r;
+}
+
+
+/* Computes ax+b mod Prime, possibly plus Prime,
+ exploiting the structure of Prime. */
+inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {
+ INT64 a0,a1,c0,c1,c;
+ a0 = LOW(a)*x;
+ a1 = HIGH(a)*x;
+ c0 = a0+(a1<<32);
+ c1 = (a0>>32)+a1;
+ c = (c0&Prime61)+(c1>>29)+b;
+ return c;
+} // 12 instructions
+
+/* CWtrick for 32-bit key x with prime 2^61-1 */
+inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {
+ INT64 h;
+ gs_int32_t i;
+
+ h = index[0];
+ for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);
+ h = (h&Prime61)+(h>>61);
+ if (h>=Prime61) h-=Prime61;
+ return h;
+}
+
+
+
+inline static INT64* InitHash61bit5univ() {
+ gs_int32_t i;
+
+ Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));
+ for (i=0;i<5;i++) {
+ hid[i]=RandINT64();
+ hid[i] = (hid[i]&Prime61)+(hid[i]>>61);
+ if (hid[i]>=Prime61) hid[i]-=Prime61;
+ }
+ return hid;
+}
+
+inline static HashTab32bit5univID InitHashTab32bit5univ() {
+ gs_uint32_t i,j;
+ HashTab32bit5univID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));
+ for (i=0;i<3;i++) {
+ htab[i] = (INT32*) malloc(65538*sizeof(INT32));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ }
+ return htab;
+}
+
+/* tabulation based hashing for 32-bit key x using 16-bit characters.*/
+inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {
+ INT32 x0, x1, x2;
+ x0 = x&65535;
+ x1 = x>>16;
+ x2 = x0 + x1;
+ x2 = 2 - (x2>>16) + (x2&65535); // optional compression
+ return htab[0][x0]^htab[1][x1]^htab[2][x2];
+} // 8 + 4 = 12 instructions
+
+
+/* Tabulation based hashing for 32-bit keys using 16-bit characters.*/
+/* Uses 500KB tables and makes 2 lookups per hash. */
+
+inline static HashTab32bitLPID InitHashShortTab32bitLP() {
+ gs_uint32_t i,j;
+ HashTab32bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));
+ for (i=0;i<2;i++) {
+ htab[i] = (INT32*) malloc(65536*sizeof(INT32));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ }
+ return htab;
+}
+
+/* tabulation based hashing for 32-bit key x using 16-bit characters.*/
+inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {
+ INT32 x0, x1;
+ x0 = x&65535;
+ x1 = x>>16;
+ return htab[0][x0]^htab[1][x1];
+}
+
+
+/* Tabulation based hashing for 32-bit keys using 8-bit characters.*/
+/* Uses 4KB tables and makes 4 lookups per hash. */
+
+inline static HashTab32bitLPID InitHashCharTab32bitLP() {
+ gs_uint32_t i,j;
+ HashTab32bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));
+ for (i=0;i<4;i++) {
+ htab[i] = (INT32*) malloc(256*sizeof(INT32));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ }
+ return htab;
+}
+
+inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {
+ INT8 c;
+ INT32 h;
+ gs_int32_t i;
+
+ c=x;
+ h=htab[0][c];
+ for (i=1;i<4;i++) {
+ x>>=8;
+ c=x;
+ h=h^htab[i][c];
+ }
+ return h;
+}
+
+
+/* Tabulation based hashing for 64-bit keys using 16-bit characters.*/
+/* Uses 2MB tables and makes 4 lookups per hash. */
+
+
+inline static HashTab64bitLPID InitHashShortTab64bitLP() {
+ gs_int32_t i,j;
+ HashTab64bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));
+ for (i=0;i<4;i++) {
+ htab[i] = (INT64*) malloc(65536*sizeof(INT64));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ h61id = InitHash61bit5univ();
+ for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
+ }
+ return htab;
+}
+
+inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {
+ INT16 c;
+ INT64 h;
+ gs_int32_t i;
+
+ c=x;
+ h=htab[0][c];
+ for (i=1;i<4;i++) {
+ x>>=16;
+ c=x;
+ h=h^htab[i][c];
+ }
+ return h;
+}
+
+/* Tabulation based hashing for 64-bit keys using 8-bit characters.*/
+/* Uses 14KB tables and makes 8 lookups per hash. */
+
+inline static HashTab64bitLPID InitHashCharTab64bitLP() {
+ gs_int32_t i,j;
+ HashTab64bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));
+ for (i=0;i<8;i++) {
+ htab[i] = (INT64*) malloc(256*sizeof(INT64));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ h61id = InitHash61bit5univ();
+ for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
+ }
+ return htab;
+}
+
+inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {
+ INT8 c;
+ INT64 h;
+ gs_int32_t i;
+
+ c=x;
+ h=htab[0][c];
+ for (i=1;i<8;i++) {
+ x>>=8;
+ c=x;
+ h=h^htab[i][c];
+ }
+ return h;
+}
+
+
+/* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/
+/* Uses 20KB tables and makes 3 lookups per hash. */
+
+inline static HashTab32bitLPID InitHashMipTab32bitLP() {
+ gs_int32_t i,j;
+ HashTab32bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));
+ for (i=0;i<2;i++) {
+ htab[i] = (INT32*) malloc(0x800*sizeof(INT32));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
+ }
+ for (i=2;i<3;i++) {
+ htab[i] = (INT32*) malloc(0x400*sizeof(INT32));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
+ }
+ return htab;
+}
+
+inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {
+ INT32 c;
+ INT32 h;
+ gs_int32_t i;
+
+ h=0;
+ for (i=0;i<2;i++) {
+ c= x & 0x7FFull;
+ h=h^htab[i][c];
+ x>>=11;
+ }
+ c= x & 0x3FFull;
+ h=h^htab[2][c];
+ return h;
+}
+
+/* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/
+/* Uses 80KB tables and makes 6 lookups per hash. */
+
+inline static HashTab64bitLPID InitHashMipTab64bitLP() {
+ gs_int32_t i,j;
+ HashTab64bitLPID htab;
+ Hash61bit5univID h61id;
+
+
+ htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));
+ for (i=0;i<4;i++) {
+ htab[i] = (INT64*) malloc(0x800*sizeof(INT64));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
+ }
+ for (i=4;i<6;i++) {
+ htab[i] = (INT64*) malloc(0x400*sizeof(INT64));
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);
+ h61id = InitHash61bit5univ();
+ for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
+ }
+ return htab;
+}
+
+inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {
+ INT64 c;
+ INT64 h;
+ gs_int32_t i;
+
+ h=0;
+ for (i=0;i<4;i++) {
+ c= x & 0x7FFull;
+ h=h^htab[i][c];
+ x>>=11;
+ }
+ for (i=4;i<6;i++) {
+ c= x & 0x3FFull;
+ h=h^htab[i][c];
+ x>>=10;
+ }
+ return h;
+}
+
+
+
+inline static Hash32bit2univID InitHash32bit2univ() {
+ Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));
+ hid[0]=RandINT64();
+ hid[1]=RandINT64();
+ return hid;
+}
+
+inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {
+ INT64 H;
+ INT32 h;
+ H = (x*hid[0])+hid[1];
+ h = H >> 32;
+ return h;
+}
+
+
+// For the string hashing below is for string of 32bit integers, and
+// the output is a 2-universal 32-bit number.
+
+inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {
+ gs_int32_t i;
+ Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));
+ for (i=0;i<=length;i++) hid[i]=RandINT64();
+ return hid;
+}
+
+
+
+//assumes string length even
+inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,
+ Hash32bit2univID hid) {
+ gs_int32_t i;
+ INT64 H;
+ INT32 h;
+ INT64 xh1,xh2,y;
+
+ H=0;
+ for (i=0;i<length;i+=2) {
+ xh1=x[i]+hid[i+1];
+ xh2=x[i+1]+hid[i];
+ y=xh1*xh2;
+ H = H^y;
+ }
+ H=H^hid[length];
+ h = H >> 32;
+ return h;
+}
+
+//assumes string length odd
+inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,
+ Hash32bit2univID hid) {
+ gs_int32_t i;
+ INT64 H;
+ INT32 h;
+ INT64 xh1,xh2,y;
+
+ H = x[0]*hid[0];
+ for (i=1;i<length;i+=2) {
+ xh1=x[i]+hid[i+1];
+ xh2=x[i+1]+hid[i];
+ y=xh1*xh2;
+ H = H^y;
+ }
+ H=H^hid[length];
+ h = H >> 32;
+ return h;
+}
+
+//Below we have the generic algorithm for fixed length string hashing
+//For shorter strings of length < 6, it is worthwhile using
+//specialized versions with fewer tests.
+//All versions rely on the some initialization above.
+inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {
+// gs_int32_t i;
+// INT64 H=0;
+// INT32 h;
+// for (i=0;i<length;i++) H = H^(x[i]*hid[i]);
+// H=H^hid[length];
+// h = H >> 32;
+// return h;
+ if (length&1) return OddStringHash32bit2univ(x,length,hid);
+ else return EvenStringHash32bit2univ(x,length,hid);
+}
+
+
+//string of length 2
+inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
+ INT64 H;
+ INT32 h;
+ INT64 xh1,xh2;
+
+ xh1=x[0]+hid[1];
+ xh2=x[1]+hid[0];
+ H=xh1*xh2;
+ H=H^hid[2];
+ h = H >> 32;
+ return h;
+}
+
+//string of length 3
+inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
+ INT64 H;
+ INT32 h;
+ INT64 xh1,xh2,y;
+
+ H = x[0]*hid[0];
+ xh1=x[1]+hid[2];
+ xh2=x[2]+hid[1];
+ y=xh1*xh2;
+ H = H^y;
+ H=H^hid[3];
+ h = H >> 32;
+ return h;
+}
+
+
+inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {
+ gs_int32_t i;
+ VarINT32StringHash32bit2univID hid;
+
+ hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));
+ for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();
+ hid->string_hid = RandINT64() & Prime61;
+ // hid->last_compr_hid = RandINT64() | 0x1ull;
+ return hid;
+ }
+
+inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {
+
+ INT32 h,i,j,d;
+ gs_int32_t e;
+ INT64 C,H,M;
+ INT64 xh1,xh2,y;
+ INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
+
+ if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);
+ H=0;
+ i=0;
+ for (;;) {
+ d=length-i;
+ C = 0;
+ j=0;
+ if (d>16) d=16;
+ else if (d&1) {
+ C = x[i]*hid->str_compr_hid[j];
+ i++;
+ j++;
+ }
+ for (e=((int) (d>>1));e>0;e--) {
+ xh1=x[i]+hid->str_compr_hid[j+1];
+ xh2=x[i+1]+hid->str_compr_hid[j];
+ y=xh1*xh2;
+ C = C^y;
+ i+=2;
+ j+=2;
+ }
+ if (i==(INT32) length) {
+ H<<=4;
+ H+=C;
+ H+=((INT64) length)*hid->length_factor;
+ h=(H>>32);
+ return h;
+ }
+ C>>=4;
+ H+=C;
+ M=hid->string_hid;
+ //Multiply H*M mod Prime61, possibly plus Prime,
+ //exploiting the structure of Prime61.
+ //We assume H<2*Prime61 and M<=Prime61, e.g.,
+ //H*M <2^123.
+ HxM00 = LOW(H)*LOW(M);
+ HxM01 = LOW(H)*HIGH(M);
+ HxM10 = HIGH(H)*LOW(M);
+ HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
+ C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
+ C0 = C0&Prime61; //Has at most 61 bits.
+ C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
+ C1 = C1>>29; //Has at most 33 bits.
+ C2 = HxM11<<3; //Has at most 123-64+3=62 bits.
+ CC = C0+C1+C2; //At most 63 bits.
+ H = (CC&Prime61)+(CC>>61); //<2*Prime
+ }
+}
+
+inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {
+ gs_int32_t i;
+ VarStringHash32bit2univID hid;
+
+ hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));
+ for (i=0;i<16;i++) {
+ hid->int_str[i]=0;
+ hid->str_compr_hid[i]=RandINT64();
+ }
+ hid->string_hid = RandINT64() & Prime61;
+ // hid->last_compr_hid = RandINT64() | 0x1ull;
+ return hid;
+ }
+
+inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,
+ VarStringHash32bit2univID hid) {
+
+ INT32 h,i,l,d,str_ix;
+ INT64 C,H,M;
+ INT64 xh1,xh2,y;
+ INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
+
+ // Assumes hid->int_str[*]==0
+
+ if (length<=60) {
+ memcpy(hid->int_str,x,length);
+ d=(length+3)>>2; //<16
+ h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);
+ for (i=0;i<d;i++) hid->int_str[i]=0;
+ return h;
+ }
+ H=0;
+ str_ix=0;
+ for (;;) {
+ C = 0;
+ l=length-str_ix;
+ if (l>64) l=64;
+ memcpy(hid->int_str,x+str_ix,l);
+ str_ix+=l;
+ d=(l+3)>>2;
+ i=0;
+ if (d&1) {
+ C = hid->int_str[i]*hid->str_compr_hid[i];
+ i++;
+ }
+ while (i<d) {
+ xh1=hid->int_str[i]+hid->str_compr_hid[i+1];
+ xh2=hid->int_str[i+1]+hid->str_compr_hid[i];
+ y=xh1*xh2;
+ C = C^y;
+ i+=2;
+ }
+ for (i=0;i<d;i++) hid->int_str[i]=0;
+ if (str_ix==(INT32) length) {
+ H<<=4;
+ H+=C;
+ H+=((INT64) length)*hid->length_factor;
+ h=(H>>32);
+ return h;
+ }
+ C>>=4;
+ H+=C;
+ M=hid->string_hid;
+ //Multiply H*M mod Prime61, possibly plus Prime,
+ //exploiting the structure of Prime61.
+ //We assume H<2*Prime61 and M<=Prime61, e.g.,
+ //H*M <2^123.
+ HxM00 = LOW(H)*LOW(M);
+ HxM01 = LOW(H)*HIGH(M);
+ HxM10 = HIGH(H)*LOW(M);
+ HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
+ C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
+ C0 = C0&Prime61; //Has at most 61 bits.
+ C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
+ C1 = C1>>29; //Has at most 33 bits.
+ C2 = HxM11<<3; //Has at most 123-64+3=62 bits.
+ CC = C0+C1+C2; //At most 63 bits.
+ H = (CC&Prime61)+(CC>>61); //<2*Prime
+ }
+}
+
+
+#endif
+