Add to_hex_string UDAF
[com/gs-lite.git] / include / stringhash.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef _STRINGHASH_H
17 #define _STRINGHASH_H
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <limits.h>
21 #include <stdint.h>
22 #include <string.h>
23
24 //Provides 5-universal hash functions.
25 //Both a direct polynomial based one,
26 //and the much faster tabulation based one.
27 //
28 //It also provides 2-universal string hashing.
29 //For most applicatoins, introdcing a slight error,
30 //if you want almost 5-uninversal hashing for strings,
31 //you can do a 2-universal hashing into 32-bit integer
32 //getting very few collissoins, and then use the above
33 //5-universal one.
34
35
36
37 typedef gs_uint8_t INT8;
38 typedef gs_uint16_t INT16;
39 typedef gs_uint32_t INT32;
40 typedef gs_uint64_t INT64;
41
42 typedef INT64 * Hash61bit5univID;
43 typedef INT32** HashTab32bit5univID;
44 typedef INT32** HashTab32bitLPID;
45 typedef INT64** HashTab64bitLPID;
46 typedef INT64 * Hash32bit2univID;
47 typedef INT32 Hash32bitUnivID;
48
49 const INT32 MAX_INT32 = UINT_MAX;
50
51 typedef struct {
52   INT64 str_compr_hid[16];
53   INT64 string_hid;
54   INT64 length_factor;
55   //  INT64 last_compr_hid;
56 } VISHID;
57
58 typedef VISHID * VarINT32StringHash32bit2univID;
59
60
61 typedef struct {
62   INT32 int_str[16];
63   INT64 str_compr_hid[16];
64   INT64 string_hid;
65   INT64 length_factor;
66   //  INT64 last_compr_hid;
67 } VSHID;
68
69 typedef VSHID * VarStringHash32bit2univID;
70
71
72
73
74
75 // different views of a 64-bit double word
76 typedef union {
77   INT64 as_int64;
78   INT32 as_int32s[2];
79   INT16 as_int16s[4];
80 } int64views;
81
82 const INT64 LowOnes = (((INT64)1)<<32)-1;
83
84 #define LOW(x)  ((x)&LowOnes)    // extract lower 32 bits from INT64
85 #define HIGH(x) ((x)>>32)        // extract higher 32 bits from INT64
86
87 const INT64 Prime61 = (((INT64)1)<<61) - 1;
88
89
90 //Random numbers from random.org based on atmostpheric noise.
91 INT64 Random64[200]= {0x285eb7722a62ce6eull,
92                       0xa84c7463e2b7856bull,
93                       0xb29100d6abcc8666ull,
94                       0xf9bfca5b7461fb1full,
95                       0x51c8dafc30c88dadull,
96                       0x0687468c365ec51dull,
97                       0x2bf2cd3ad64b6218ull,
98                       0xc20565a4d1f00f9eull,
99                       0x7d533575d313c658ull,
100                       0xbf2fba6b00725b85ull,
101                       0x4cfc2f6557e722beull,
102                       0xedbce818556dfb2bull,
103                       0xd9df508027db1bbeull,
104                       0x21f2d922f712f48bull,
105                       0xcd8b289d83a65804ull,
106                       0x4a19cfb02445a6d2ull,
107                       0xc95e56b1e19a4f94ull,
108                       0xcfeeeaccaf477248ull,
109                       0x4eec3378b73bb25cull,
110                       0x18a3f38d1c48b2beull,
111                       0x71b79ab5cb1e3730ull,
112                       0x79cdb30e2f38309dull,
113                       0xb41d4983bdbc8d6full,
114                       0xba9f57c01b66b7e3ull,
115                       0xe400503c95c16568ull,
116                       0x5977bfd4630294f1ull,
117                       0x57d4d7940099676full,
118                       0xd945de9268f4b191ull,
119                       0x4034711421eaf806ull,
120                       0x6d8108a4a6d58c22ull,
121                       0x5c421818ddbdd4eeull,
122                       0xbd9b7e4071713c13ull,
123                       0xa60d1d6e793e5eb2ull,
124                       0x7443fb031b8ec6c7ull,
125                       0xd8290c7120e05d4aull,
126                       0x797fb1d9a6a8d27full,
127                       0x543ec268ab1f2e45ull,
128                       0xcaf2a6139701f320ull,
129                       0x9519611d130bee47ull,
130                       0x19bbc533f018be1aull,
131                       0xdbfacdfeb573133dull,
132                       0x3255dacc4c7bfe12ull,
133                       0xbc6c9228e5518f6eull,
134                       0xb5c1681037347178ull,
135                       0xbaaa2cfef186bfadull,
136                       0x1834b8ab0f9e876eull,
137                       0x9d7b7f228433e0f7ull,
138                       0xa99cc292d003dd09ull,
139                       0xc0cb8037046b5295ull,
140                       0xa6ffa3d4671aa3d2ull,
141                       0xc27023fbee2862e6ull,
142                       0x5a9877bcc4bd3172ull,
143                       0xfcb0da3caf9fcfe0ull,
144                       0xc35ef57e1866ceaeull,
145                       0xd4f7c927d169a115ull,
146                       0x699054518fc74756ull,
147                       0xa75cbf617fc9db8dull,
148                       0x7f3adf4369665a9cull,
149                       0x6b98eeeb4c517f42ull,
150                       0xa12e44f5de954f24ull,
151                       0x5789ded4dced978eull,
152                       0xe4dd20ed27cd3567ull,
153                       0x9b4e90c365b8790bull,
154                       0xd486ed6e099f499bull,
155                       0x3f3d0ccfeaa0c0dcull,
156                       0x548c746cdb192adaull,
157                       0x8ce636d8469fe2dcull,
158                       0xca59ec929549a6a1ull,
159                       0x647b9878deaba1f0ull,
160                       0xebbb4b2641c54d34ull,
161                       0x7be6d2918b680abdull,
162                       0x02ad265fb4733490ull,
163                       0xfe1053044faf3486ull,
164                       0x539ea358ff6b6df3ull,
165                       0x025d73224a2b5826ull,
166                       0x7daad302451f41b3ull,
167                       0x6038455ddb535976ull,
168                       0x8d6d00a9a728a067ull,
169                       0xe9f03d61d4965d59ull,
170                       0x38314b8102daff3bull,
171                       0x56b335e7893a76f1ull,
172                       0x1048ca2f415712abull,
173                       0xa9bc989a891dc173ull,
174                       0xb741df3ae02836c2ull,
175                       0x7711e6c6f5830783ull,
176                       0x8edbf2be9226e24bull,
177                       0xe4a4b8ba310fc2e2ull,
178                       0xbc7b67f4a02f23c8ull,
179                       0x5669b1a9d6d8df17ull,
180                       0xdd3ebf2e3c516e26ull,
181                       0x77bdd6def5236c4full,
182                       0x9aeb54bdffacd65eull,
183                       0xab676483404a21b8ull,
184                       0xf7270f77a9d1b3a3ull,
185                       0x3794e1cdcc7de433ull,
186                       0x8e2b74d3a06aa56aull,
187                       0x572698d05b901d40ull,
188                       0x7bd6c265c1dd5cdfull,
189                       0xd2f68a53970db82eull,
190                       0x0e1d5f5dd9bd23bdull,
191                       0x48814c6813505051ull,
192                       0x8f2d21a7d4e4a481ull,
193                       0x144531c871920cf3ull,
194                       0x45c6f81c7fbc3b2full,
195                       0xc562f1fc7f07a944ull,
196                       0xaabdf3d0aa9872e4ull,
197                       0x8db1f9d827c8df98ull,
198                       0xea787210e13c16c7ull,
199                       0xd43f6f582629ff39ull,
200                       0x6ec7599da4cb298dull,
201                       0xfa99d7196097dd94ull,
202                       0xbe3a8a172a62f40dull,
203                       0x07477bc03d9d5471ull,
204                       0x03777d1ee44c0fa6ull,
205                       0x5c6df847b0ae6fd1ull,
206                       0xc1fc3bc2352d8125ull,
207                       0x6800e3321d35b697ull,
208                       0x51cc735e1b0920a8ull,
209                       0x93dc60a2430c11acull,
210                       0x13b6bf8ffc4e9e30ull,
211                       0xada08114e6a01701ull,
212                       0x1459b7a4254da4cdull,
213                       0xf7575cd7ededcdfdull,
214                       0x1e43675ed5ed33eeull,
215                       0x639f5ff579cc30d4ull,
216                       0x8f4f75d7eea7e300ull,
217                       0x518939accd43dadeull,
218                       0x989a77577fac24e2ull,
219                       0x86c5e3998c819d51ull,
220                       0xb84770f9cc15d139ull,
221                       0x11544010174c2c99ull,
222                       0xfb238b962405a579ull,
223                       0xca5bddde9b80cb7bull,
224                       0xbbe71928190dfab4ull,
225                       0x0ec620294742b8c8ull,
226                       0x90bc7a703ed63900ull,
227                       0x2c8a80e1e85f72bbull,
228                       0xf19ab262ed2ad914ull,
229                       0xa233e89fab33793cull,
230                       0x75b6f59568a958f5ull,
231                       0x31fb15af9a41aca2ull,
232                       0xf89db05aa21b3d1cull,
233                       0x0023d08c52147f63ull,
234                       0x8e0e3c2aba81e8fdull,
235                       0xdb2efd057c157e71ull,
236                       0x1a1797a83e39dfe4ull,
237                       0x3afc43e1979f507dull,
238                       0x0e647a8d8abe0936ull,
239                       0xc8e03f27082eb0daull,
240                       0xc1b0c11db52da528ull,
241                       0xadeb52a144f497ceull,
242                       0x046fc2f53bd97c9bull,
243                       0x2d587fdf5fae80d5ull,
244                       0x14036c3fb77fd73eull,
245                       0x883f768734d14676ull,
246                       0x212df988a83caffeull,
247                       0x36b41dac32387e17ull,
248                       0x9dbee8d7cf804bccull,
249                       0x34d9a58fb9a35794ull,
250                       0xda82beba879e3576ull,
251                       0x0366148280e5adf1ull,
252                       0x634085dcdba7b174ull,
253                       0x8451523252446534ull,
254                       0xc74ce8b4d5175b9aull,
255                       0x40afebd30a9a4837ull,
256                       0x7232952ce84e90beull,
257                       0x7443a6e37b87992dull,
258                       0xace8a33649218d9aull,
259                       0x9ad8af9614b75655ull,
260                       0xd518d9a700179ac2ull,
261                       0xc7f906d90fd36259ull,
262                       0xe2c47ec3abd8a12dull,
263                       0xca05e96fcbbb76c1ull,
264                       0x7186661c9ddf4973ull,
265                       0x0146f5f47d5f42c2ull,
266                       0x74a7c485608ea178ull,
267                       0x8cb5e3e5f84f3040ull,
268                       0xb330869dc366037dull,
269                       0x0dbc4d22f932bbd4ull,
270                       0xa8460b43946a5ecbull,
271                       0xcee72c72fe5ddea6ull,
272                       0x9f953cc7859b2a4eull,
273                       0x95ea396619e60182ull,
274                       0xfe9890efa6aa8c3eull,
275                       0x3c9a5691d1e25799ull,
276                       0xa54eeb19aa718b62ull,
277                       0x69907b2ec11d82b7ull,
278                       0x408c5405984caa65ull,
279                       0x6f11a21711640470ull,
280                       0x20b9227a8f53f9aeull,
281                       0x42df63b7ec442178ull,
282                       0xbec247f79407d00aull,
283                       0x2946c31558eab2bcull,
284                       0x84d045ffa174048eull,
285                       0x28bad532ba4a450cull,
286                       0x93ccf0cbaa274d7aull,
287                       0xff30008411d25158ull,
288                       0x3845ce6dcc30080aull,
289                       0xcf5925239b6f3dbaull,
290                       0x1f148649ed53660dull};
291
292
293
294
295
296
297 gs_int32_t nextrandom = 0;
298 gs_int32_t endrandom = 100;
299
300
301 // The first "endrandom" numbers returned are truely random INT64.
302 // They are perfect for bases of new hash functions, and currently
303 // we have just enough such random numbers for tabulation based
304 // 5-universal hashing, or one of the other schemes provided.
305 // The remaining ones are based on the pseudo-random mrand48() which
306 // returns a signed 32-bit number. We do not use the unsigned lrand48()
307 // which only returns 31-bit.
308 inline static INT64 RandINT64() {
309   INT64 r,r1;
310   if (nextrandom<endrandom) r=Random64[nextrandom++];
311   else {
312     if (nextrandom==endrandom)
313       fprintf(stderr,"Switching from random to pseudo-random in Rand64");
314     r =(INT64) mrand48();
315     r1 = (INT64) mrand48();
316     r=r<<32;
317     r=r^r1;
318   }
319   return r;
320 }
321
322
323 /* Computes ax+b mod Prime, possibly plus Prime,
324    exploiting the structure of Prime.  */
325 inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {
326   INT64 a0,a1,c0,c1,c;
327   a0 = LOW(a)*x;
328   a1 = HIGH(a)*x;
329   c0 = a0+(a1<<32);
330   c1 = (a0>>32)+a1;
331   c  = (c0&Prime61)+(c1>>29)+b;
332   return c;
333 } // 12 instructions
334
335 /* CWtrick for 32-bit key x with prime 2^61-1 */
336 inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {
337   INT64 h;
338   gs_int32_t i;
339
340   h = index[0];
341   for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);
342   h = (h&Prime61)+(h>>61);
343   if (h>=Prime61) h-=Prime61;
344   return h;
345 }
346
347
348
349 inline static INT64* InitHash61bit5univ() {
350   gs_int32_t i;
351
352   Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));
353   for (i=0;i<5;i++) {
354     hid[i]=RandINT64();
355     hid[i] = (hid[i]&Prime61)+(hid[i]>>61);
356     if (hid[i]>=Prime61) hid[i]-=Prime61;
357   }
358   return hid;
359 }
360
361 inline static HashTab32bit5univID InitHashTab32bit5univ() {
362   gs_uint32_t i,j;
363   HashTab32bit5univID htab;
364   Hash61bit5univID h61id;
365
366
367   htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));
368   for (i=0;i<3;i++) {
369      htab[i] = (INT32*) malloc(65538*sizeof(INT32));
370      h61id = InitHash61bit5univ();
371      for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);
372   }
373   return htab;
374 }
375
376 /* tabulation based hashing for 32-bit key x using 16-bit characters.*/
377 inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {
378   INT32 x0, x1, x2;
379   x0 = x&65535;
380   x1 = x>>16;
381   x2 = x0 + x1;
382   x2 = 2 - (x2>>16) + (x2&65535);  // optional compression
383   return htab[0][x0]^htab[1][x1]^htab[2][x2];
384 } // 8 + 4 = 12 instructions
385
386
387 /* Tabulation based hashing for 32-bit keys using 16-bit characters.*/
388 /* Uses 500KB tables and makes 2 lookups per hash. */
389
390 inline static HashTab32bitLPID InitHashShortTab32bitLP() {
391   gs_uint32_t i,j;
392   HashTab32bitLPID htab;
393   Hash61bit5univID h61id;
394
395
396   htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));
397   for (i=0;i<2;i++) {
398      htab[i] = (INT32*) malloc(65536*sizeof(INT32));
399      h61id = InitHash61bit5univ();
400      for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
401   }
402   return htab;
403 }
404
405 /* tabulation based hashing for 32-bit key x using 16-bit characters.*/
406 inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {
407   INT32 x0, x1;
408   x0 = x&65535;
409   x1 = x>>16;
410   return htab[0][x0]^htab[1][x1];
411 }
412
413
414 /* Tabulation based hashing for 32-bit keys using 8-bit characters.*/
415 /* Uses 4KB tables and makes 4 lookups per hash. */
416
417 inline static HashTab32bitLPID InitHashCharTab32bitLP() {
418   gs_uint32_t i,j;
419   HashTab32bitLPID htab;
420   Hash61bit5univID h61id;
421
422
423   htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));
424   for (i=0;i<4;i++) {
425      htab[i] = (INT32*) malloc(256*sizeof(INT32));
426      h61id = InitHash61bit5univ();
427      for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
428   }
429   return htab;
430 }
431
432 inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {
433   INT8 c;
434   INT32 h;
435   gs_int32_t i;
436
437   c=x;
438   h=htab[0][c];
439   for (i=1;i<4;i++) {
440     x>>=8;
441     c=x;
442     h=h^htab[i][c];
443   }
444   return h;
445 }
446
447
448 /* Tabulation based hashing for 64-bit keys using 16-bit characters.*/
449 /* Uses 2MB tables and makes 4 lookups per hash. */
450
451
452 inline static HashTab64bitLPID InitHashShortTab64bitLP() {
453   gs_int32_t i,j;
454   HashTab64bitLPID htab;
455   Hash61bit5univID h61id;
456
457
458   htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));
459   for (i=0;i<4;i++) {
460      htab[i] = (INT64*) malloc(65536*sizeof(INT64));
461      h61id = InitHash61bit5univ();
462      for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
463      h61id = InitHash61bit5univ();
464      for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
465   }
466   return htab;
467 }
468
469 inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {
470   INT16 c;
471   INT64 h;
472   gs_int32_t i;
473
474   c=x;
475   h=htab[0][c];
476   for (i=1;i<4;i++) {
477     x>>=16;
478     c=x;
479     h=h^htab[i][c];
480   }
481   return h;
482 }
483
484 /* Tabulation based hashing for 64-bit keys using 8-bit characters.*/
485 /* Uses 14KB tables and makes 8 lookups per hash. */
486
487 inline static HashTab64bitLPID InitHashCharTab64bitLP() {
488   gs_int32_t i,j;
489   HashTab64bitLPID htab;
490   Hash61bit5univID h61id;
491
492
493   htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));
494   for (i=0;i<8;i++) {
495      htab[i] = (INT64*) malloc(256*sizeof(INT64));
496      h61id = InitHash61bit5univ();
497      for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
498      h61id = InitHash61bit5univ();
499      for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
500   }
501   return htab;
502 }
503
504 inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {
505   INT8 c;
506   INT64 h;
507   gs_int32_t i;
508
509   c=x;
510   h=htab[0][c];
511   for (i=1;i<8;i++) {
512     x>>=8;
513     c=x;
514     h=h^htab[i][c];
515   }
516   return h;
517 }
518
519
520 /* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/
521 /* Uses 20KB tables and makes 3 lookups per hash. */
522
523 inline static HashTab32bitLPID InitHashMipTab32bitLP() {
524   gs_int32_t i,j;
525   HashTab32bitLPID htab;
526   Hash61bit5univID h61id;
527
528
529   htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));
530   for (i=0;i<2;i++) {
531      htab[i] = (INT32*) malloc(0x800*sizeof(INT32));
532      h61id = InitHash61bit5univ();
533      for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
534   }
535   for (i=2;i<3;i++) {
536      htab[i] = (INT32*) malloc(0x400*sizeof(INT32));
537      h61id = InitHash61bit5univ();
538      for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
539   }
540   return htab;
541 }
542
543 inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {
544   INT32 c;
545   INT32 h;
546   gs_int32_t i;
547
548   h=0;
549   for (i=0;i<2;i++) {
550     c= x & 0x7FFull;
551     h=h^htab[i][c];
552     x>>=11;
553   }
554   c= x & 0x3FFull;
555   h=h^htab[2][c];
556   return h;
557 }
558
559 /* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/
560 /* Uses 80KB tables and makes 6 lookups per hash. */
561
562 inline static HashTab64bitLPID InitHashMipTab64bitLP() {
563   gs_int32_t i,j;
564   HashTab64bitLPID htab;
565   Hash61bit5univID h61id;
566
567
568   htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));
569   for (i=0;i<4;i++) {
570      htab[i] = (INT64*) malloc(0x800*sizeof(INT64));
571      h61id = InitHash61bit5univ();
572      for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);
573      h61id = InitHash61bit5univ();
574      for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
575   }
576   for (i=4;i<6;i++) {
577      htab[i] = (INT64*) malloc(0x400*sizeof(INT64));
578      h61id = InitHash61bit5univ();
579      for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);
580      h61id = InitHash61bit5univ();
581      for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
582   }
583   return htab;
584 }
585
586 inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {
587   INT64 c;
588   INT64 h;
589   gs_int32_t i;
590
591   h=0;
592   for (i=0;i<4;i++) {
593     c= x & 0x7FFull;
594     h=h^htab[i][c];
595     x>>=11;
596   }
597   for (i=4;i<6;i++) {
598     c= x & 0x3FFull;
599     h=h^htab[i][c];
600     x>>=10;
601   }
602   return h;
603 }
604
605
606
607 inline static Hash32bit2univID InitHash32bit2univ() {
608   Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));
609   hid[0]=RandINT64();
610   hid[1]=RandINT64();
611   return hid;
612 }
613
614 inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {
615   INT64 H;
616   INT32 h;
617   H = (x*hid[0])+hid[1];
618   h = H >> 32;
619   return h;
620 }
621
622
623 // For the string hashing below is for string of 32bit integers, and
624 // the output is a 2-universal 32-bit number.
625
626 inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {
627   gs_int32_t i;
628   Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));
629   for (i=0;i<=length;i++) hid[i]=RandINT64();
630   return hid;
631 }
632
633
634
635 //assumes string length even
636 inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,
637                                       Hash32bit2univID hid) {
638   gs_int32_t i;
639   INT64 H;
640   INT32 h;
641   INT64 xh1,xh2,y;
642
643   H=0;
644   for (i=0;i<length;i+=2) {
645     xh1=x[i]+hid[i+1];
646     xh2=x[i+1]+hid[i];
647     y=xh1*xh2;
648     H = H^y;
649   }
650   H=H^hid[length];
651   h = H >> 32;
652   return h;
653 }
654
655 //assumes string length odd
656 inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,
657                                       Hash32bit2univID hid) {
658   gs_int32_t i;
659   INT64 H;
660   INT32 h;
661   INT64 xh1,xh2,y;
662
663   H = x[0]*hid[0];
664   for (i=1;i<length;i+=2) {
665     xh1=x[i]+hid[i+1];
666     xh2=x[i+1]+hid[i];
667     y=xh1*xh2;
668     H = H^y;
669   }
670   H=H^hid[length];
671   h = H >> 32;
672   return h;
673 }
674
675 //Below we have the generic algorithm for fixed length string hashing
676 //For shorter strings of length < 6, it is worthwhile using
677 //specialized versions with fewer tests.
678 //All versions rely on the some initialization above.
679 inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {
680 //  gs_int32_t i;
681 //  INT64 H=0;
682 //  INT32 h;
683 //  for (i=0;i<length;i++) H = H^(x[i]*hid[i]);
684 //  H=H^hid[length];
685 //  h = H >> 32;
686 //  return h;
687   if (length&1) return OddStringHash32bit2univ(x,length,hid);
688   else return EvenStringHash32bit2univ(x,length,hid);
689 }
690
691
692 //string of length 2
693 inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
694   INT64 H;
695   INT32 h;
696   INT64 xh1,xh2;
697
698   xh1=x[0]+hid[1];
699   xh2=x[1]+hid[0];
700   H=xh1*xh2;
701   H=H^hid[2];
702   h = H >> 32;
703   return h;
704 }
705
706 //string of length 3
707 inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
708   INT64 H;
709   INT32 h;
710   INT64 xh1,xh2,y;
711
712   H = x[0]*hid[0];
713   xh1=x[1]+hid[2];
714   xh2=x[2]+hid[1];
715   y=xh1*xh2;
716   H = H^y;
717   H=H^hid[3];
718   h = H >> 32;
719   return h;
720 }
721
722
723 inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {
724   gs_int32_t i;
725   VarINT32StringHash32bit2univID hid;
726
727   hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));
728   for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();
729   hid->string_hid = RandINT64() & Prime61;
730   //  hid->last_compr_hid = RandINT64() | 0x1ull;
731   return hid;
732   }
733
734 inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {
735
736   INT32 h,i,j,d;
737   gs_int32_t e;
738   INT64 C,H,M;
739   INT64 xh1,xh2,y;
740   INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
741
742   if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);
743   H=0;
744   i=0;
745   for (;;) {
746      d=length-i;
747      C = 0;
748      j=0;
749      if (d>16) d=16;
750      else if (d&1) {
751         C = x[i]*hid->str_compr_hid[j];
752         i++;
753         j++;
754      }
755      for (e=((int) (d>>1));e>0;e--) {
756          xh1=x[i]+hid->str_compr_hid[j+1];
757          xh2=x[i+1]+hid->str_compr_hid[j];
758          y=xh1*xh2;
759          C = C^y;
760          i+=2;
761          j+=2;
762      }
763      if (i==(INT32) length) {
764         H<<=4;
765         H+=C;
766         H+=((INT64) length)*hid->length_factor;
767         h=(H>>32);
768         return h;
769      }
770      C>>=4;
771      H+=C;
772      M=hid->string_hid;
773      //Multiply H*M mod Prime61, possibly plus Prime,
774      //exploiting the structure of Prime61.
775      //We assume H<2*Prime61 and M<=Prime61, e.g.,
776      //H*M <2^123.
777      HxM00 = LOW(H)*LOW(M);
778      HxM01 = LOW(H)*HIGH(M);
779      HxM10 = HIGH(H)*LOW(M);
780      HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
781      C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
782      C0 = C0&Prime61;              //Has at most 61 bits.
783      C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
784      C1 = C1>>29;                  //Has at most 33 bits.
785      C2 = HxM11<<3;                //Has at most 123-64+3=62 bits.
786      CC = C0+C1+C2;                //At most 63 bits.
787      H = (CC&Prime61)+(CC>>61);    //<2*Prime
788   }
789 }
790
791 inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {
792   gs_int32_t i;
793   VarStringHash32bit2univID hid;
794
795   hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));
796   for (i=0;i<16;i++) {
797     hid->int_str[i]=0;
798     hid->str_compr_hid[i]=RandINT64();
799   }
800   hid->string_hid = RandINT64() & Prime61;
801   //  hid->last_compr_hid = RandINT64() | 0x1ull;
802   return hid;
803   }
804
805 inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,
806                                          VarStringHash32bit2univID hid) {
807
808   INT32 h,i,l,d,str_ix;
809   INT64 C,H,M;
810   INT64 xh1,xh2,y;
811   INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
812
813   // Assumes hid->int_str[*]==0
814
815   if (length<=60) {
816     memcpy(hid->int_str,x,length);
817     d=(length+3)>>2; //<16
818     h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);
819     for (i=0;i<d;i++) hid->int_str[i]=0;
820     return h;
821   }
822   H=0;
823   str_ix=0;
824   for (;;) {
825      C = 0;
826      l=length-str_ix;
827      if (l>64) l=64;
828      memcpy(hid->int_str,x+str_ix,l);
829      str_ix+=l;
830      d=(l+3)>>2;
831      i=0;
832      if (d&1) {
833         C = hid->int_str[i]*hid->str_compr_hid[i];
834         i++;
835      }
836      while (i<d) {
837          xh1=hid->int_str[i]+hid->str_compr_hid[i+1];
838          xh2=hid->int_str[i+1]+hid->str_compr_hid[i];
839          y=xh1*xh2;
840          C = C^y;
841          i+=2;
842      }
843      for (i=0;i<d;i++) hid->int_str[i]=0;
844      if (str_ix==(INT32) length) {
845         H<<=4;
846         H+=C;
847         H+=((INT64) length)*hid->length_factor;
848         h=(H>>32);
849         return h;
850      }
851      C>>=4;
852      H+=C;
853      M=hid->string_hid;
854      //Multiply H*M mod Prime61, possibly plus Prime,
855      //exploiting the structure of Prime61.
856      //We assume H<2*Prime61 and M<=Prime61, e.g.,
857      //H*M <2^123.
858      HxM00 = LOW(H)*LOW(M);
859      HxM01 = LOW(H)*HIGH(M);
860      HxM10 = HIGH(H)*LOW(M);
861      HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
862      C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
863      C0 = C0&Prime61;              //Has at most 61 bits.
864      C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
865      C1 = C1>>29;                  //Has at most 33 bits.
866      C2 = HxM11<<3;                //Has at most 123-64+3=62 bits.
867      CC = C0+C1+C2;                //At most 63 bits.
868      H = (CC&Prime61)+(CC>>61);    //<2*Prime
869   }
870 }
871
872
873 #endif
874