Add new udafs and RMR support to gsprintconsole_ves
[com/gs-lite.git] / include / stringhash.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef _STRINGHASH_H
17 #define _STRINGHASH_H
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <limits.h>
21 #include <stdint.h>
22 #include <string.h>
23
24 //Provides 5-universal hash functions.
25 //Both a direct polynomial based one,
26 //and the much faster tabulation based one.
27 //
28 //It also provides 2-universal string hashing.
29 //For most applicatoins, introdcing a slight error,
30 //if you want almost 5-uninversal hashing for strings,
31 //you can do a 2-universal hashing into 32-bit integer
32 //getting very few collissoins, and then use the above
33 //5-universal one.
34
35
36
37 typedef gs_uint8_t INT8;
38 typedef gs_uint16_t INT16;
39 typedef gs_uint32_t INT32;
40 typedef gs_uint64_t INT64;
41
42 typedef INT64 * Hash61bit5univID;
43 typedef INT32** HashTab32bit5univID;
44 typedef INT32** HashTab32bitLPID;
45 typedef INT64** HashTab64bitLPID;
46 typedef INT64 * Hash32bit2univID;
47 typedef INT32 Hash32bitUnivID;
48
49 const INT32 MAX_INT32 = UINT_MAX;
50
51 typedef struct {
52   INT64 str_compr_hid[16];
53   INT64 string_hid;
54   INT64 length_factor;
55   //  INT64 last_compr_hid;
56 } VISHID;
57
58 typedef VISHID * VarINT32StringHash32bit2univID;
59
60
61 typedef struct {
62   INT32 int_str[16];
63   INT64 str_compr_hid[16];
64   INT64 string_hid;
65   INT64 length_factor;
66   //  INT64 last_compr_hid;
67 } VSHID;
68
69 typedef VSHID * VarStringHash32bit2univID;
70
71
72
73
74
75 // different views of a 64-bit double word
76 typedef union {
77   INT64 as_int64;
78   INT32 as_int32s[2];
79   INT16 as_int16s[4];
80 } int64views;
81
82 const INT64 LowOnes = (((INT64)1)<<32)-1;
83
84 #define LOW(x)  ((x)&LowOnes)    // extract lower 32 bits from INT64
85 #define HIGH(x) ((x)>>32)        // extract higher 32 bits from INT64
86
87 const INT64 Prime61 = (((INT64)1)<<61) - 1;
88
89
90 //Random numbers from random.org based on atmostpheric noise.
91 static INT64 Random64[200]= {0x285eb7722a62ce6eull,
92                       0xa84c7463e2b7856bull,
93                       0xb29100d6abcc8666ull,
94                       0xf9bfca5b7461fb1full,
95                       0x51c8dafc30c88dadull,
96                       0x0687468c365ec51dull,
97                       0x2bf2cd3ad64b6218ull,
98                       0xc20565a4d1f00f9eull,
99                       0x7d533575d313c658ull,
100                       0xbf2fba6b00725b85ull,
101                       0x4cfc2f6557e722beull,
102                       0xedbce818556dfb2bull,
103                       0xd9df508027db1bbeull,
104                       0x21f2d922f712f48bull,
105                       0xcd8b289d83a65804ull,
106                       0x4a19cfb02445a6d2ull,
107                       0xc95e56b1e19a4f94ull,
108                       0xcfeeeaccaf477248ull,
109                       0x4eec3378b73bb25cull,
110                       0x18a3f38d1c48b2beull,
111                       0x71b79ab5cb1e3730ull,
112                       0x79cdb30e2f38309dull,
113                       0xb41d4983bdbc8d6full,
114                       0xba9f57c01b66b7e3ull,
115                       0xe400503c95c16568ull,
116                       0x5977bfd4630294f1ull,
117                       0x57d4d7940099676full,
118                       0xd945de9268f4b191ull,
119                       0x4034711421eaf806ull,
120                       0x6d8108a4a6d58c22ull,
121                       0x5c421818ddbdd4eeull,
122                       0xbd9b7e4071713c13ull,
123                       0xa60d1d6e793e5eb2ull,
124                       0x7443fb031b8ec6c7ull,
125                       0xd8290c7120e05d4aull,
126                       0x797fb1d9a6a8d27full,
127                       0x543ec268ab1f2e45ull,
128                       0xcaf2a6139701f320ull,
129                       0x9519611d130bee47ull,
130                       0x19bbc533f018be1aull,
131                       0xdbfacdfeb573133dull,
132                       0x3255dacc4c7bfe12ull,
133                       0xbc6c9228e5518f6eull,
134                       0xb5c1681037347178ull,
135                       0xbaaa2cfef186bfadull,
136                       0x1834b8ab0f9e876eull,
137                       0x9d7b7f228433e0f7ull,
138                       0xa99cc292d003dd09ull,
139                       0xc0cb8037046b5295ull,
140                       0xa6ffa3d4671aa3d2ull,
141                       0xc27023fbee2862e6ull,
142                       0x5a9877bcc4bd3172ull,
143                       0xfcb0da3caf9fcfe0ull,
144                       0xc35ef57e1866ceaeull,
145                       0xd4f7c927d169a115ull,
146                       0x699054518fc74756ull,
147                       0xa75cbf617fc9db8dull,
148                       0x7f3adf4369665a9cull,
149                       0x6b98eeeb4c517f42ull,
150                       0xa12e44f5de954f24ull,
151                       0x5789ded4dced978eull,
152                       0xe4dd20ed27cd3567ull,
153                       0x9b4e90c365b8790bull,
154                       0xd486ed6e099f499bull,
155                       0x3f3d0ccfeaa0c0dcull,
156                       0x548c746cdb192adaull,
157                       0x8ce636d8469fe2dcull,
158                       0xca59ec929549a6a1ull,
159                       0x647b9878deaba1f0ull,
160                       0xebbb4b2641c54d34ull,
161                       0x7be6d2918b680abdull,
162                       0x02ad265fb4733490ull,
163                       0xfe1053044faf3486ull,
164                       0x539ea358ff6b6df3ull,
165                       0x025d73224a2b5826ull,
166                       0x7daad302451f41b3ull,
167                       0x6038455ddb535976ull,
168                       0x8d6d00a9a728a067ull,
169                       0xe9f03d61d4965d59ull,
170                       0x38314b8102daff3bull,
171                       0x56b335e7893a76f1ull,
172                       0x1048ca2f415712abull,
173                       0xa9bc989a891dc173ull,
174                       0xb741df3ae02836c2ull,
175                       0x7711e6c6f5830783ull,
176                       0x8edbf2be9226e24bull,
177                       0xe4a4b8ba310fc2e2ull,
178                       0xbc7b67f4a02f23c8ull,
179                       0x5669b1a9d6d8df17ull,
180                       0xdd3ebf2e3c516e26ull,
181                       0x77bdd6def5236c4full,
182                       0x9aeb54bdffacd65eull,
183                       0xab676483404a21b8ull,
184                       0xf7270f77a9d1b3a3ull,
185                       0x3794e1cdcc7de433ull,
186                       0x8e2b74d3a06aa56aull,
187                       0x572698d05b901d40ull,
188                       0x7bd6c265c1dd5cdfull,
189                       0xd2f68a53970db82eull,
190                       0x0e1d5f5dd9bd23bdull,
191                       0x48814c6813505051ull,
192                       0x8f2d21a7d4e4a481ull,
193                       0x144531c871920cf3ull,
194                       0x45c6f81c7fbc3b2full,
195                       0xc562f1fc7f07a944ull,
196                       0xaabdf3d0aa9872e4ull,
197                       0x8db1f9d827c8df98ull,
198                       0xea787210e13c16c7ull,
199                       0xd43f6f582629ff39ull,
200                       0x6ec7599da4cb298dull,
201                       0xfa99d7196097dd94ull,
202                       0xbe3a8a172a62f40dull,
203                       0x07477bc03d9d5471ull,
204                       0x03777d1ee44c0fa6ull,
205                       0x5c6df847b0ae6fd1ull,
206                       0xc1fc3bc2352d8125ull,
207                       0x6800e3321d35b697ull,
208                       0x51cc735e1b0920a8ull,
209                       0x93dc60a2430c11acull,
210                       0x13b6bf8ffc4e9e30ull,
211                       0xada08114e6a01701ull,
212                       0x1459b7a4254da4cdull,
213                       0xf7575cd7ededcdfdull,
214                       0x1e43675ed5ed33eeull,
215                       0x639f5ff579cc30d4ull,
216                       0x8f4f75d7eea7e300ull,
217                       0x518939accd43dadeull,
218                       0x989a77577fac24e2ull,
219                       0x86c5e3998c819d51ull,
220                       0xb84770f9cc15d139ull,
221                       0x11544010174c2c99ull,
222                       0xfb238b962405a579ull,
223                       0xca5bddde9b80cb7bull,
224                       0xbbe71928190dfab4ull,
225                       0x0ec620294742b8c8ull,
226                       0x90bc7a703ed63900ull,
227                       0x2c8a80e1e85f72bbull,
228                       0xf19ab262ed2ad914ull,
229                       0xa233e89fab33793cull,
230                       0x75b6f59568a958f5ull,
231                       0x31fb15af9a41aca2ull,
232                       0xf89db05aa21b3d1cull,
233                       0x0023d08c52147f63ull,
234                       0x8e0e3c2aba81e8fdull,
235                       0xdb2efd057c157e71ull,
236                       0x1a1797a83e39dfe4ull,
237                       0x3afc43e1979f507dull,
238                       0x0e647a8d8abe0936ull,
239                       0xc8e03f27082eb0daull,
240                       0xc1b0c11db52da528ull,
241                       0xadeb52a144f497ceull,
242                       0x046fc2f53bd97c9bull,
243                       0x2d587fdf5fae80d5ull,
244                       0x14036c3fb77fd73eull,
245                       0x883f768734d14676ull,
246                       0x212df988a83caffeull,
247                       0x36b41dac32387e17ull,
248                       0x9dbee8d7cf804bccull,
249                       0x34d9a58fb9a35794ull,
250                       0xda82beba879e3576ull,
251                       0x0366148280e5adf1ull,
252                       0x634085dcdba7b174ull,
253                       0x8451523252446534ull,
254                       0xc74ce8b4d5175b9aull,
255                       0x40afebd30a9a4837ull,
256                       0x7232952ce84e90beull,
257                       0x7443a6e37b87992dull,
258                       0xace8a33649218d9aull,
259                       0x9ad8af9614b75655ull,
260                       0xd518d9a700179ac2ull,
261                       0xc7f906d90fd36259ull,
262                       0xe2c47ec3abd8a12dull,
263                       0xca05e96fcbbb76c1ull,
264                       0x7186661c9ddf4973ull,
265                       0x0146f5f47d5f42c2ull,
266                       0x74a7c485608ea178ull,
267                       0x8cb5e3e5f84f3040ull,
268                       0xb330869dc366037dull,
269                       0x0dbc4d22f932bbd4ull,
270                       0xa8460b43946a5ecbull,
271                       0xcee72c72fe5ddea6ull,
272                       0x9f953cc7859b2a4eull,
273                       0x95ea396619e60182ull,
274                       0xfe9890efa6aa8c3eull,
275                       0x3c9a5691d1e25799ull,
276                       0xa54eeb19aa718b62ull,
277                       0x69907b2ec11d82b7ull,
278                       0x408c5405984caa65ull,
279                       0x6f11a21711640470ull,
280                       0x20b9227a8f53f9aeull,
281                       0x42df63b7ec442178ull,
282                       0xbec247f79407d00aull,
283                       0x2946c31558eab2bcull,
284                       0x84d045ffa174048eull,
285                       0x28bad532ba4a450cull,
286                       0x93ccf0cbaa274d7aull,
287                       0xff30008411d25158ull,
288                       0x3845ce6dcc30080aull,
289                       0xcf5925239b6f3dbaull,
290                       0x1f148649ed53660dull};
291
292
293
294
295
296
297 static gs_int32_t nextrandom = 0;
298 static gs_int32_t endrandom = 100;
299
300
301 // The first "endrandom" numbers returned are truely random INT64.
302 // They are perfect for bases of new hash functions, and currently
303 // we have just enough such random numbers for tabulation based
304 // 5-universal hashing, or one of the other schemes provided.
305 // The remaining ones are based on the pseudo-random mrand48() which
306 // returns a signed 32-bit number. We do not use the unsigned lrand48()
307 // which only returns 31-bit.
308 inline static INT64 RandINT64() {
309   INT64 r,r1;
310   if (nextrandom<endrandom) r=Random64[nextrandom++];
311   else {
312 //    if (nextrandom==endrandom)
313 //      fprintf(stderr,"Switching from random to pseudo-random in Rand64");
314     r =(INT64) mrand48();
315     r1 = (INT64) mrand48();
316     r=r<<32;
317     r=r^r1;
318   }
319   return r;
320 }
321
322
323 /* Computes ax+b mod Prime, possibly plus Prime,
324    exploiting the structure of Prime.  */
325 inline static INT64 MultAddPrime(INT32 x, INT64 a, INT64 b) {
326   INT64 a0,a1,c0,c1,c;
327   a0 = LOW(a)*x;
328   a1 = HIGH(a)*x;
329   c0 = a0+(a1<<32);
330   c1 = (a0>>32)+a1;
331   c  = (c0&Prime61)+(c1>>29)+b;
332   return c;
333 } // 12 instructions
334
335 /* CWtrick for 32-bit key x with prime 2^61-1 */
336 inline static INT64 Hash61bit5univ(INT32 x, Hash61bit5univID index) {
337   INT64 h;
338   gs_int32_t i;
339
340   h = index[0];
341   for (i=1;i<5;i++) h = MultAddPrime(x,h,index[i]);
342   h = (h&Prime61)+(h>>61);
343   if (h>=Prime61) h-=Prime61;
344   return h;
345 }
346
347
348
349 inline static INT64* InitHash61bit5univ() {
350   gs_int32_t i;
351
352   Hash61bit5univID hid = (INT64*) malloc(5*sizeof(INT64));
353   for (i=0;i<5;i++) {
354     hid[i]=RandINT64();
355     hid[i] = (hid[i]&Prime61)+(hid[i]>>61);
356     if (hid[i]>=Prime61) hid[i]-=Prime61;
357   }
358   return hid;
359 }
360
361 inline static HashTab32bit5univID InitHashTab32bit5univ() {
362   gs_uint32_t i,j;
363   HashTab32bit5univID htab;
364   Hash61bit5univID h61id;
365
366
367   htab = (HashTab32bit5univID) malloc(3*sizeof(INT32*));
368   for (i=0;i<3;i++) {
369      htab[i] = (INT32*) malloc(65538*sizeof(INT32));
370      h61id = InitHash61bit5univ();
371      for (j=0;j<65538;j++) htab[i][j]=Hash61bit5univ(j,h61id);
372   }
373   return htab;
374 }
375
376 /* tabulation based hashing for 32-bit key x using 16-bit characters.*/
377 inline INT64 HashTab32bit5univ(INT32 x, HashTab32bit5univID htab) {
378   INT32 x0, x1, x2;
379   x0 = x&65535;
380   x1 = x>>16;
381   x2 = x0 + x1;
382   x2 = 2 - (x2>>16) + (x2&65535);  // optional compression
383   return htab[0][x0]^htab[1][x1]^htab[2][x2];
384 } // 8 + 4 = 12 instructions
385
386
387 /* Tabulation based hashing for 32-bit keys using 16-bit characters.*/
388 /* Uses 500KB tables and makes 2 lookups per hash. */
389
390 inline static HashTab32bitLPID InitHashShortTab32bitLP() {
391   gs_uint32_t i,j;
392   HashTab32bitLPID htab;
393   Hash61bit5univID h61id;
394
395
396   htab = (HashTab32bitLPID) malloc(2*sizeof(INT32*));
397   for (i=0;i<2;i++) {
398      htab[i] = (INT32*) malloc(65536*sizeof(INT32));
399      h61id = InitHash61bit5univ();
400      for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
401   }
402   return htab;
403 }
404
405 /* tabulation based hashing for 32-bit key x using 16-bit characters.*/
406 inline INT64 HashShortTab32bitLP(INT32 x, HashTab32bitLPID htab) {
407   INT32 x0, x1;
408   x0 = x&65535;
409   x1 = x>>16;
410   return htab[0][x0]^htab[1][x1];
411 }
412
413
414 /* Tabulation based hashing for 32-bit keys using 8-bit characters.*/
415 /* Uses 4KB tables and makes 4 lookups per hash. */
416
417 inline static HashTab32bitLPID InitHashCharTab32bitLP() {
418   gs_uint32_t i,j;
419   HashTab32bitLPID htab;
420   Hash61bit5univID h61id;
421
422
423   htab = (HashTab32bitLPID) malloc(4*sizeof(INT32*));
424   for (i=0;i<4;i++) {
425      htab[i] = (INT32*) malloc(256*sizeof(INT32));
426      h61id = InitHash61bit5univ();
427      for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
428   }
429   return htab;
430 }
431
432 inline INT32 HashCharTab32bitLP(INT32 x, HashTab32bitLPID htab) {
433   INT8 c;
434   INT32 h;
435   gs_int32_t i;
436
437   c=x;
438   h=htab[0][c];
439   for (i=1;i<4;i++) {
440     x>>=8;
441     c=x;
442     h=h^htab[i][c];
443   }
444   return h;
445 }
446
447
448 /* Tabulation based hashing for 64-bit keys using 16-bit characters.*/
449 /* Uses 2MB tables and makes 4 lookups per hash. */
450
451
452 inline static HashTab64bitLPID InitHashShortTab64bitLP() {
453   gs_int32_t i,j;
454   HashTab64bitLPID htab;
455   Hash61bit5univID h61id;
456
457
458   htab = (HashTab64bitLPID) malloc(4*sizeof(INT64*));
459   for (i=0;i<4;i++) {
460      htab[i] = (INT64*) malloc(65536*sizeof(INT64));
461      h61id = InitHash61bit5univ();
462      for (j=0;j<65536;j++) htab[i][j]=Hash61bit5univ(j,h61id);
463      h61id = InitHash61bit5univ();
464      for (j=0;j<65536;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
465   }
466   return htab;
467 }
468
469 inline INT64 HashShortTab64bitLP(INT64 x, HashTab64bitLPID htab) {
470   INT16 c;
471   INT64 h;
472   gs_int32_t i;
473
474   c=x;
475   h=htab[0][c];
476   for (i=1;i<4;i++) {
477     x>>=16;
478     c=x;
479     h=h^htab[i][c];
480   }
481   return h;
482 }
483
484 /* Tabulation based hashing for 64-bit keys using 8-bit characters.*/
485 /* Uses 14KB tables and makes 8 lookups per hash. */
486
487 inline static HashTab64bitLPID InitHashCharTab64bitLP() {
488   gs_int32_t i,j;
489   HashTab64bitLPID htab;
490   Hash61bit5univID h61id;
491
492
493   htab = (HashTab64bitLPID) malloc(8*sizeof(INT64*));
494   for (i=0;i<8;i++) {
495      htab[i] = (INT64*) malloc(256*sizeof(INT64));
496      h61id = InitHash61bit5univ();
497      for (j=0;j<256;j++) htab[i][j]=Hash61bit5univ(j,h61id);
498      h61id = InitHash61bit5univ();
499      for (j=0;j<256;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
500   }
501   return htab;
502 }
503
504 inline INT64 HashCharTab64bitLP(INT64 x, HashTab64bitLPID htab) {
505   INT8 c;
506   INT64 h;
507   gs_int32_t i;
508
509   c=x;
510   h=htab[0][c];
511   for (i=1;i<8;i++) {
512     x>>=8;
513     c=x;
514     h=h^htab[i][c];
515   }
516   return h;
517 }
518
519
520 /* Tabulation based hashing for 34-bit keys using 10-11-bit characters.*/
521 /* Uses 20KB tables and makes 3 lookups per hash. */
522
523 inline static HashTab32bitLPID InitHashMipTab32bitLP() {
524   gs_int32_t i,j;
525   HashTab32bitLPID htab;
526   Hash61bit5univID h61id;
527
528
529   htab = (HashTab32bitLPID) malloc(6*sizeof(INT32*));
530   for (i=0;i<2;i++) {
531      htab[i] = (INT32*) malloc(0x800*sizeof(INT32));
532      h61id = InitHash61bit5univ();
533      for (j=0;j<0x800;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
534   }
535   for (i=2;i<3;i++) {
536      htab[i] = (INT32*) malloc(0x400*sizeof(INT32));
537      h61id = InitHash61bit5univ();
538      for (j=0;j<0x400;j++) htab[i][j]=(INT32) Hash61bit5univ(j,h61id);
539   }
540   return htab;
541 }
542
543 inline INT32 HashMipTab32bitLP(INT32 x, HashTab32bitLPID htab) {
544   INT32 c;
545   INT32 h;
546   gs_int32_t i;
547
548   h=0;
549   for (i=0;i<2;i++) {
550     c= x & 0x7FFull;
551     h=h^htab[i][c];
552     x>>=11;
553   }
554   c= x & 0x3FFull;
555   h=h^htab[2][c];
556   return h;
557 }
558
559 /* Tabulation based hashing for 64-bit keys using 10-11-bit characters.*/
560 /* Uses 80KB tables and makes 6 lookups per hash. */
561
562 inline static HashTab64bitLPID InitHashMipTab64bitLP() {
563   gs_int32_t i,j;
564   HashTab64bitLPID htab;
565   Hash61bit5univID h61id;
566
567
568   htab = (HashTab64bitLPID) malloc(6*sizeof(INT64*));
569   for (i=0;i<4;i++) {
570      htab[i] = (INT64*) malloc(0x800*sizeof(INT64));
571      h61id = InitHash61bit5univ();
572      for (j=0;j<0x800;j++) htab[i][j]=Hash61bit5univ(j,h61id);
573      h61id = InitHash61bit5univ();
574      for (j=0;j<0x800;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
575   }
576   for (i=4;i<6;i++) {
577      htab[i] = (INT64*) malloc(0x400*sizeof(INT64));
578      h61id = InitHash61bit5univ();
579      for (j=0;j<0x400;j++) htab[i][j]=Hash61bit5univ(j,h61id);
580      h61id = InitHash61bit5univ();
581      for (j=0;j<0x400;j++) htab[i][j]+=Hash61bit5univ(j,h61id)<<32;
582   }
583   return htab;
584 }
585
586 inline INT64 HashMipTab64bitLP(INT64 x, HashTab64bitLPID htab) {
587   INT64 c;
588   INT64 h;
589   gs_int32_t i;
590
591   h=0;
592   for (i=0;i<4;i++) {
593     c= x & 0x7FFull;
594     h=h^htab[i][c];
595     x>>=11;
596   }
597   for (i=4;i<6;i++) {
598     c= x & 0x3FFull;
599     h=h^htab[i][c];
600     x>>=10;
601   }
602   return h;
603 }
604
605
606
607 inline static Hash32bit2univID InitHash32bit2univ() {
608   Hash32bit2univID hid = (INT64*) malloc(2*sizeof(INT64));
609   hid[0]=RandINT64();
610   hid[1]=RandINT64();
611   return hid;
612 }
613
614 inline INT32 Hash32bit2univ(INT32 x, Hash32bit2univID hid) {
615   INT64 H;
616   INT32 h;
617   H = (x*hid[0])+hid[1];
618   h = H >> 32;
619   return h;
620 }
621
622
623 // For the string hashing below is for string of 32bit integers, and
624 // the output is a 2-universal 32-bit number.
625
626 inline static Hash32bit2univID InitStringHash32bit2univ(gs_int32_t length) {
627   gs_int32_t i;
628   Hash32bit2univID hid = (INT64*) malloc((length+1)*sizeof(INT64));
629   for (i=0;i<=length;i++) hid[i]=RandINT64();
630   return hid;
631 }
632
633
634 // ---------------------------------------------
635 //              Mikkel's string hashing code, 250M hashes/sec
636
637 //assumes string length even
638 inline INT32 EvenStringHash32bit2univ(INT32 * x, gs_int32_t length,
639                                       Hash32bit2univID hid) {
640   gs_int32_t i;
641   INT64 H;
642   INT32 h;
643   INT64 xh1,xh2,y;
644
645   H=0;
646   for (i=0;i<length;i+=2) {
647     xh1=x[i]+hid[i+1];
648     xh2=x[i+1]+hid[i];
649     y=xh1*xh2;
650     H = H^y;
651   }
652   H=H^hid[length];
653   h = H >> 32;
654   return h;
655 }
656
657 //assumes string length odd
658 inline INT32 OddStringHash32bit2univ(INT32 * x, gs_int32_t length,
659                                       Hash32bit2univID hid) {
660   gs_int32_t i;
661   INT64 H;
662   INT32 h;
663   INT64 xh1,xh2,y;
664
665   H = x[0]*hid[0];
666   for (i=1;i<length;i+=2) {
667     xh1=x[i]+hid[i+1];
668     xh2=x[i+1]+hid[i];
669     y=xh1*xh2;
670     H = H^y;
671   }
672   H=H^hid[length];
673   h = H >> 32;
674   return h;
675 }
676
677 //Below we have the generic algorithm for fixed length string hashing
678 //For shorter strings of length < 6, it is worthwhile using
679 //specialized versions with fewer tests.
680 //All versions rely on the some initialization above.
681 inline INT32 StringHash32bit2univ(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {
682 //  gs_int32_t i;
683 //  INT64 H=0;
684 //  INT32 h;
685 //  for (i=0;i<length;i++) H = H^(x[i]*hid[i]);
686 //  H=H^hid[length];
687 //  h = H >> 32;
688 //  return h;
689   if (length&1) return OddStringHash32bit2univ(x,length,hid);
690   else return EvenStringHash32bit2univ(x,length,hid);
691 }
692
693 // ---------------------------------------------
694
695 // ---------------------------------------------
696 //              modification to Mikkel's string hashing code
697 //              Return the full 64 bits ... low order bits are not random
698 //              but we can squeeze a few more bits out.
699
700 //assumes string length even
701 inline INT64 EvenStringHash64bit(INT32 * x, gs_int32_t length,
702                                       Hash32bit2univID hid) {
703   gs_int32_t i;
704   INT64 H;
705   INT64 xh1,xh2,y;
706
707   H=0;
708   for (i=0;i<length;i+=2) {
709     xh1=x[i]+hid[i+1];
710     xh2=x[i+1]+hid[i];
711     y=xh1*xh2;
712     H = H^y;
713   }
714   H=H^hid[length];
715   return H;
716 }
717
718 //assumes string length odd
719 inline INT64 OddStringHash64bit(INT32 * x, gs_int32_t length,
720                                       Hash32bit2univID hid) {
721   gs_int32_t i;
722   INT64 H;
723   INT64 xh1,xh2,y;
724
725   H = x[0]*hid[0];
726   for (i=1;i<length;i+=2) {
727     xh1=x[i]+hid[i+1];
728     xh2=x[i+1]+hid[i];
729     y=xh1*xh2;
730     H = H^y;
731   }
732   H=H^hid[length];
733   return H;
734 }
735
736 //Below we have the generic algorithm for fixed length string hashing
737 //For shorter strings of length < 6, it is worthwhile using
738 //specialized versions with fewer tests.
739 //All versions rely on the some initialization above.
740 inline INT64 StringHash64bit(INT32 * x, gs_int32_t length, Hash32bit2univID hid) {
741 //  gs_int32_t i;
742 //  INT64 H=0;
743 //  INT32 h;
744 //  for (i=0;i<length;i++) H = H^(x[i]*hid[i]);
745 //  H=H^hid[length];
746 //  h = H >> 32;
747 //  return h;
748   if (length&1) return OddStringHash64bit(x,length,hid);
749   else return EvenStringHash64bit(x,length,hid);
750 }
751
752 // ---------------------------------------------
753
754
755
756 //string of length 2
757 inline INT32 String2Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
758   INT64 H;
759   INT32 h;
760   INT64 xh1,xh2;
761
762   xh1=x[0]+hid[1];
763   xh2=x[1]+hid[0];
764   H=xh1*xh2;
765   H=H^hid[2];
766   h = H >> 32;
767   return h;
768 }
769
770 //string of length 3
771 inline INT32 String3Hash32bit2univ(INT32 * x,Hash32bit2univID hid) {
772   INT64 H;
773   INT32 h;
774   INT64 xh1,xh2,y;
775
776   H = x[0]*hid[0];
777   xh1=x[1]+hid[2];
778   xh2=x[2]+hid[1];
779   y=xh1*xh2;
780   H = H^y;
781   H=H^hid[3];
782   h = H >> 32;
783   return h;
784 }
785
786
787 inline static VarINT32StringHash32bit2univID InitVarINT32StringHash32bitUniv() {
788   gs_int32_t i;
789   VarINT32StringHash32bit2univID hid;
790
791   hid = (VarINT32StringHash32bit2univID) malloc(sizeof(VISHID));
792   for (i=0;i<16;i++) hid->str_compr_hid[i]=RandINT64();
793   hid->string_hid = RandINT64() & Prime61;
794   //  hid->last_compr_hid = RandINT64() | 0x1ull;
795   return hid;
796   }
797
798 inline INT32 VarINT32StringHash32bitUniv(INT32 * x, gs_int32_t length, VarINT32StringHash32bit2univID hid) {
799
800   INT32 h,i,j,d;
801   gs_int32_t e;
802   INT64 C,H,M;
803   INT64 xh1,xh2,y;
804   INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
805
806   if (length<16) return StringHash32bit2univ(x,length,hid->str_compr_hid);
807   H=0;
808   i=0;
809   for (;;) {
810      d=length-i;
811      C = 0;
812      j=0;
813      if (d>16) d=16;
814      else if (d&1) {
815         C = x[i]*hid->str_compr_hid[j];
816         i++;
817         j++;
818      }
819      for (e=((int) (d>>1));e>0;e--) {
820          xh1=x[i]+hid->str_compr_hid[j+1];
821          xh2=x[i+1]+hid->str_compr_hid[j];
822          y=xh1*xh2;
823          C = C^y;
824          i+=2;
825          j+=2;
826      }
827      if (i==(INT32) length) {
828         H<<=4;
829         H+=C;
830         H+=((INT64) length)*hid->length_factor;
831         h=(H>>32);
832         return h;
833      }
834      C>>=4;
835      H+=C;
836      M=hid->string_hid;
837      //Multiply H*M mod Prime61, possibly plus Prime,
838      //exploiting the structure of Prime61.
839      //We assume H<2*Prime61 and M<=Prime61, e.g.,
840      //H*M <2^123.
841      HxM00 = LOW(H)*LOW(M);
842      HxM01 = LOW(H)*HIGH(M);
843      HxM10 = HIGH(H)*LOW(M);
844      HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
845      C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
846      C0 = C0&Prime61;              //Has at most 61 bits.
847      C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
848      C1 = C1>>29;                  //Has at most 33 bits.
849      C2 = HxM11<<3;                //Has at most 123-64+3=62 bits.
850      CC = C0+C1+C2;                //At most 63 bits.
851      H = (CC&Prime61)+(CC>>61);    //<2*Prime
852   }
853 }
854
855 inline static VarStringHash32bit2univID InitVarStringHash32bitUniv() {
856   gs_int32_t i;
857   VarStringHash32bit2univID hid;
858
859   hid = (VarStringHash32bit2univID) malloc(sizeof(VSHID));
860   for (i=0;i<16;i++) {
861     hid->int_str[i]=0;
862     hid->str_compr_hid[i]=RandINT64();
863   }
864   hid->string_hid = RandINT64() & Prime61;
865   //  hid->last_compr_hid = RandINT64() | 0x1ull;
866   return hid;
867   }
868
869 inline INT32 VarStringHash32bitUniv(char * x, gs_int32_t length,
870                                          VarStringHash32bit2univID hid) {
871
872   INT32 h,i,l,d,str_ix;
873   INT64 C,H,M;
874   INT64 xh1,xh2,y;
875   INT64 HxM00,HxM01,HxM10,HxM11,C0,C1,C2,CC;
876
877   // Assumes hid->int_str[*]==0
878
879   if (length<=60) {
880     memcpy(hid->int_str,x,length);
881     d=(length+3)>>2; //<16
882     h=StringHash32bit2univ(hid->int_str,d,hid->str_compr_hid);
883     for (i=0;i<d;i++) hid->int_str[i]=0;
884     return h;
885   }
886   H=0;
887   str_ix=0;
888   for (;;) {
889      C = 0;
890      l=length-str_ix;
891      if (l>64) l=64;
892      memcpy(hid->int_str,x+str_ix,l);
893      str_ix+=l;
894      d=(l+3)>>2;
895      i=0;
896      if (d&1) {
897         C = hid->int_str[i]*hid->str_compr_hid[i];
898         i++;
899      }
900      while (i<d) {
901          xh1=hid->int_str[i]+hid->str_compr_hid[i+1];
902          xh2=hid->int_str[i+1]+hid->str_compr_hid[i];
903          y=xh1*xh2;
904          C = C^y;
905          i+=2;
906      }
907      for (i=0;i<d;i++) hid->int_str[i]=0;
908      if (str_ix==(INT32) length) {
909         H<<=4;
910         H+=C;
911         H+=((INT64) length)*hid->length_factor;
912         h=(H>>32);
913         return h;
914      }
915      C>>=4;
916      H+=C;
917      M=hid->string_hid;
918      //Multiply H*M mod Prime61, possibly plus Prime,
919      //exploiting the structure of Prime61.
920      //We assume H<2*Prime61 and M<=Prime61, e.g.,
921      //H*M <2^123.
922      HxM00 = LOW(H)*LOW(M);
923      HxM01 = LOW(H)*HIGH(M);
924      HxM10 = HIGH(H)*LOW(M);
925      HxM11 = HIGH(H)*HIGH(M); //has at most 60 bits
926      C0 = HxM00+(HxM01<<32)+(HxM10<<32);//overflow doesn't matter here.
927      C0 = C0&Prime61;              //Has at most 61 bits.
928      C1 = (HxM00>>32)+HxM01+HxM10; //Overflow impossible each <=62 bits
929      C1 = C1>>29;                  //Has at most 33 bits.
930      C2 = HxM11<<3;                //Has at most 123-64+3=62 bits.
931      CC = C0+C1+C2;                //At most 63 bits.
932      H = (CC&Prime61)+(CC>>61);    //<2*Prime
933   }
934 }
935
936
937 #endif
938