Fix quantiling issues 82/5282/1 cherry
authorvlad shkapenyuk <vshkap@research.att.com>
Tue, 8 Dec 2020 20:39:07 +0000 (15:39 -0500)
committervlad shkapenyuk <vshkap@research.att.com>
Tue, 8 Dec 2020 20:39:07 +0000 (15:39 -0500)
Signed-off-by: vlad shkapenyuk <vshkap@research.att.com>
Change-Id: I6f5f26055c19fe758e4260b5393208293e6803eb

cfg/external_fcns.def
include/hfta/hfta_udaf.h
include/lfta/rts_udaf.h
include/udaf_common.h [new file with mode: 0644]
src/lib/gscphftaaux/flip_udaf.cc
src/lib/gscplftaaux/Makefile
src/lib/gscplftaaux/flip_udaf.cc [new file with mode: 0644]

index f8afc61..b506319 100644 (file)
@@ -619,15 +619,51 @@ uint FUN [LFTA_LEGAL, COST EXPENSIVE]
 //         Flips approximate quantile
 ///////////////////////////////////////////////////////////
 
-        uint EXTR quantile_of quant_udaf_hfta0 extr_quant_hfta0_fcn (uint, float);
-        uint  FUN [COST LOW] extr_quant_hfta0_fcn(string, float);
-        uint EXTR median_of quant_udaf_hfta0 extr_med_hfta0_fcn (uint);
-        uint  FUN [COST LOW] extr_med_hfta0_fcn(string);
-        uint EXTR qspace_of quant_udaf_hfta0 extr_quant_hfta0_space (uint);
-        uint  FUN [COST LOW] extr_quant_hfta0_space(string);
-        string UDAF [SUBAGGR quant_udaf_lfta3, SUPERAGGR quant_udaf_hfta3] quant_udaf_hfta0 fstring24 (uint);
-        string UDAF quant_udaf_hfta3 fstring24 (string);
-        string UDAF quant_udaf_lfta3 fstring6600 (uint);
+// uint
+        uint EXTR quantile_of quant_ui_udaf_hftaZ extr_quant_ui_hftaZ_fcn (uint, float);
+        uint  FUN [COST LOW] extr_quant_ui_hftaZ_fcn(string, float);
+        uint EXTR median_of quant_ui_udaf_hftaZ extr_med_ui_hftaZ_fcn (uint);
+        uint  FUN [COST LOW] extr_med_ui_hftaZ_fcn(string);
+
+        string UDAF [SUBAGGR quant_ui_udaf_lfta3, SUPERAGGR quant_ui_udaf_hfta3] quant_ui_udaf_hftaZ fstring32 (uint);
+        string UDAF quant_ui_udaf_hfta3 fstring32 (string);
+        string UDAF quant_ui_udaf_lfta3 fstring2088 (uint);
+// int
+        int EXTR quantile_of quant_i_udaf_hftaZ extr_quant_i_hftaZ_fcn (int, float);
+        int  FUN [COST LOW] extr_quant_i_hftaZ_fcn(string, float);
+        int EXTR median_of quant_i_udaf_hftaZ extr_med_i_hftaZ_fcn (int);
+        int  FUN [COST LOW] extr_med_i_hftaZ_fcn(string);
+
+        string UDAF [SUBAGGR quant_i_udaf_lfta3, SUPERAGGR quant_i_udaf_hfta3] quant_i_udaf_hftaZ fstring32 (int);
+        string UDAF quant_i_udaf_hfta3 fstring32 (string);
+        string UDAF quant_i_udaf_lfta3 fstring2088 (int);
+// ullong
+        ullong EXTR quantile_of quant_ul_udaf_hftaZ extr_quant_ul_hftaZ_fcn (ullong, float);
+        ullong  FUN [COST LOW] extr_quant_ul_hftaZ_fcn(string, float);
+        ullong EXTR median_of quant_ul_udaf_hftaZ extr_med_ul_hftaZ_fcn (ullong);
+        ullong  FUN [COST LOW] extr_med_ul_hftaZ_fcn(string);
+
+        string UDAF [SUBAGGR quant_ul_udaf_lfta3, SUPERAGGR quant_ul_udaf_hfta3] quant_ul_udaf_hftaZ fstring32 (ullong);
+        string UDAF quant_ul_udaf_hfta3 fstring32 (string);
+        string UDAF quant_ul_udaf_lfta3 fstring2904 (ullong);
+// llong
+        llong EXTR quantile_of quant_l_udaf_hftaZ extr_quant_l_hftaZ_fcn (llong, float);
+        llong  FUN [COST LOW] extr_quant_l_hftaZ_fcn(string, float);
+        llong EXTR median_of quant_l_udaf_hftaZ extr_med_l_hftaZ_fcn (llong);
+        llong  FUN [COST LOW] extr_med_l_hftaZ_fcn(string);
+
+        string UDAF [SUBAGGR quant_l_udaf_lfta3, SUPERAGGR quant_l_udaf_hfta3] quant_l_udaf_hftaZ fstring32 (llong);
+        string UDAF quant_l_udaf_hfta3 fstring32 (string);
+        string UDAF quant_l_udaf_lfta3 fstring2904 (llong);
+// float
+        float EXTR quantile_of quant_f_udaf_hftaZ extr_quant_f_hftaZ_fcn (float, float);
+        float  FUN [COST LOW] extr_quant_f_hftaZ_fcn(string, float);
+        float EXTR median_of quant_f_udaf_hftaZ extr_med_f_hftaZ_fcn (float);
+        float  FUN [COST LOW] extr_med_f_hftaZ_fcn(string);
+
+        string UDAF [SUBAGGR quant_f_udaf_lfta3, SUPERAGGR quant_f_udaf_hfta3] quant_f_udaf_hftaZ fstring32 (float);
+        string UDAF quant_f_udaf_hfta3 fstring32 (string);
+        string UDAF quant_f_udaf_lfta3 fstring2904 (float);
 
 ///////////////////////////////////////////////////////////
 //               FIRST and LAST aggregation funciton
index 9c960b5..7bcf02b 100644 (file)
@@ -279,24 +279,104 @@ void running_array_aggr_hfta_HFTA_AGGR_DESTROY_(vstring* scratch);
 /* HFTA functions                                              */
 /****************************************************************/
 
-void quant_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t);
-void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t, vstring *);
-void quant_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *, gs_sp_t);
-void quant_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t);
-gs_uint32_t extr_quant_hfta3_fcn(vstring *, gs_float_t);
-gs_uint32_t extr_med_hfta3_fcn(vstring *);
-gs_uint32_t extr_quant_hfta3_space(vstring *);
+// void quant_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t);
+// void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t, vstring *);
+// void quant_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *, gs_sp_t);
+// void quant_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t);
+// gs_uint32_t extr_quant_hfta3_fcn(vstring *, gs_float_t);
+// gs_uint32_t extr_med_hfta3_fcn(vstring *);
+// gs_uint32_t extr_quant_hfta3_space(vstring *);
+
+void quant_f_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_f_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v);
+void quant_f_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) ;
+//gs_float_t extr_quant_f_hfta3_fcn(vstring *v, gs_float_t phi) ;
+//gs_float_t extr_f_med_hfta3_fcn(vstring *v);
+void quant_f_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) ;
+
+void quant_ui_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_ui_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v);
+void quant_ui_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) ;
+//gs_uint32_t extr_quant_ui_hfta3_fcn(vstring *v, gs_float_t phi) ;
+//gs_uint32_t extr_ui_med_hfta3_fcn(vstring *v);
+void quant_ui_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) ;
+
+void quant_i_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_i_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v);
+void quant_i_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) ;
+//gs_int32_t extr_quant_i_hfta3_fcn(vstring *v, gs_float_t phi) ;
+//gs_int32_t extr_i_med_hfta3_fcn(vstring *v);
+void quant_i_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) ;
+
+void quant_ul_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_ul_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v);
+void quant_ul_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) ;
+//gs_uint64_t extr_quant_ul_hfta3_fcn(vstring *v, gs_float_t phi) ;
+//gs_uint64_t extr_ul_med_hfta3_fcn(vstring *v);
+void quant_ul_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) ;
+
+void quant_l_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_l_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v);
+void quant_l_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) ;
+//gs_int64_t extr_quant_l_hfta3_fcn(vstring *v, gs_float_t phi) ;
+//gs_int64_t extr_l_med_hfta3_fcn(vstring *v);
+void quant_l_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) ;
+
+
+
 
 /****************************************************************/
 /* HFTA-only functions                                         */
 /****************************************************************/
-void quant_udaf_hfta0_HFTA_AGGR_INIT_(gs_sp_t);
-void quant_udaf_hfta0_HFTA_AGGR_UPDATE_(gs_sp_t, gs_uint32_t);
-void quant_udaf_hfta0_HFTA_AGGR_OUTPUT_(vstring *, gs_sp_t);
-void quant_udaf_hfta0_HFTA_AGGR_DESTROY_(gs_sp_t);
-gs_uint32_t extr_quant_hfta0_fcn(vstring *, gs_float_t);
-gs_uint32_t extr_med_hfta0_fcn(vstring *);
-gs_uint32_t extr_quant_hfta0_space(vstring *);
+
+// void quant_udaf_hfta0_HFTA_AGGR_INIT_(gs_sp_t);
+// void quant_udaf_hfta0_HFTA_AGGR_UPDATE_(gs_sp_t, gs_uint32_t);
+// void quant_udaf_hfta0_HFTA_AGGR_OUTPUT_(vstring *, gs_sp_t);
+// void quant_udaf_hfta0_HFTA_AGGR_DESTROY_(gs_sp_t);
+// gs_uint32_t extr_quant_hfta0_fcn(vstring *, gs_float_t);
+// gs_uint32_t extr_med_hfta0_fcn(vstring *);
+// gs_uint32_t extr_quant_hfta0_space(vstring *);
+
+void quant_ui_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_ui_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint32_t v) ;
+void quant_ui_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b);
+void quant_ui_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b);
+gs_uint32_t extr_quant_ui_hftaZ_fcn(vstring *v, gs_float_t phi) ;
+gs_uint32_t extr_med_ui_hftaZ_fcn(vstring *v) ;
+int quant_ui_udaf_hftaZ_nelem(gs_sp_t b) ;
+
+void quant_ul_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_ul_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint64_t v) ;
+void quant_ul_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b);
+void quant_ul_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b);
+gs_uint64_t extr_quant_ul_hftaZ_fcn(vstring *v, gs_float_t phi) ;
+gs_uint64_t extr_med_ul_hftaZ_fcn(vstring *v) ;
+int quant_ul_udaf_hftaZ_nelem(gs_sp_t b) ;
+
+void quant_i_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_i_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_int32_t v) ;
+void quant_i_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b);
+void quant_i_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b);
+gs_int32_t extr_quant_i_hftaZ_fcn(vstring *v, gs_float_t phi) ;
+gs_int32_t extr_med_i_hftaZ_fcn(vstring *v) ;
+int quant_i_udaf_hftaZ_nelem(gs_sp_t b) ;
+
+void quant_l_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_l_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_int64_t v) ;
+void quant_l_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b);
+void quant_l_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b);
+gs_int64_t extr_quant_l_hftaZ_fcn(vstring *v, gs_float_t phi) ;
+gs_int64_t extr_med_l_hftaZ_fcn(vstring *v) ;
+int quant_l_udaf_hftaZ_nelem(gs_sp_t b) ;
+
+void quant_f_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b);
+void quant_f_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_float_t v) ;
+void quant_f_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b);
+void quant_f_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b);
+gs_float_t extr_quant_f_hftaZ_fcn(vstring *v, gs_float_t phi) ;
+gs_float_t extr_med_f_hftaZ_fcn(vstring *v) ;
+int quant_f_udaf_hftaZ_nelem(gs_sp_t b) ;
+
 
 
 /****************************************************************/
index 978e2c4..ed3771d 100644 (file)
@@ -147,11 +147,35 @@ void running_array_aggr_lfta_LFTA_AGGR_DESTROY_(char* scratch);
 /* LFTA functions                                              */
 /****************************************************************/
 
-void quant_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
-void quant_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_uint32_t);
-gs_retval_t quant_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
-void quant_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
-void quant_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
+void quant_ui_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
+void quant_ui_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_uint32_t);
+gs_retval_t quant_ui_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
+void quant_ui_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
+void quant_ui_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
+
+void quant_i_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
+void quant_i_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_int32_t);
+gs_retval_t quant_i_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
+void quant_i_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
+void quant_i_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
+
+void quant_ul_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
+void quant_ul_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_uint64_t);
+gs_retval_t quant_ul_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
+void quant_ul_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
+void quant_ul_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
+
+void quant_l_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
+void quant_l_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_int64_t);
+gs_retval_t quant_l_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
+void quant_l_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
+void quant_l_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
+
+void quant_f_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t);
+void quant_f_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t, gs_float_t);
+gs_retval_t quant_f_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t);
+void quant_f_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *, gs_sp_t);
+void quant_f_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t);
 
 /////////////////////////////////////////////////////////
 // ==============================================================
diff --git a/include/udaf_common.h b/include/udaf_common.h
new file mode 100644 (file)
index 0000000..621be3e
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _UDAF_COMMON_DEFINED_
+#define _UDAF_COMMON_DEFINED_
+
+//     This include file contains definitions for UDAFS
+//     Which must be synchronized between the LFTA and HFTA layers
+
+//     for quantiles (flip_udaf)
+#define QUANT_LFTA1_SIZE 729
+#define QUANT_LFTA2_SIZE 181
+#define QUANT_LFTA3_SIZE 50            
+#define QUANT_EPS 0.01
+#define SKIPDIR_SIZE 100
+#define SKIPDIR_HEIGHT_MAX 7
+
+
+#endif
index 0252fde..2d05f16 100644 (file)
 #include <math.h>
 #include "hfta_udaf.h"
 
-#define QUANT_LFTA1_SIZE 729
-#define QUANT_LFTA2_SIZE 181
-#define QUANT_LFTA3_SIZE 100
+#include <algorithm>    // std::sort 
 
-#define QUANT_EPS 0.01
-#define SKIPDIR_SIZE 100
-#define SKIPDIR_HEIGHT_MAX 7
-#define max(a,b) ((a) > (b) ? (a) : (b))
+#include<iostream>
 
+#include "udaf_common.h"
+//#define QUANT_LFTA1_SIZE 729
+//#define QUANT_LFTA2_SIZE 181
+//#define QUANT_LFTA3_SIZE 50          
+//#define QUANT_EPS 0.01
+//#define SKIPDIR_SIZE 100
+//#define SKIPDIR_HEIGHT_MAX 7
+
+
+//#define max(a,b) ((a) > (b) ? (a) : (b))
+
+using namespace std;
+
+// current use
+//     hfta_only: quant_udaf_hfta0 
+//     extraction: extr_quant_hfta0_fcn extr_med_hfta0_fcn extr_quant_hfta0_space 
+//     lfta/hfta: quant_udaf_lfta3 quant_udaf_hfta3
+
+// TODO
+//             - Should the hfta part of the hfta/lfta split (hfta3)
+//               match the hfta-only implementation (hftaZ)?
+//             - On out-of-space conditions, try a compress before
+//               discarding the sample.  If that happens, can the rate
+//               of compresses be decreased?
+//             - Can the lfta part be made to work with actual compression?
+//               if not, change the implementation to gather a collection
+//               of samples, then send them up.  This should decrease
+//               lfta space use and reduce the cost of adding to the hfta.
 
 /****************************************************************/
 /* Data Structures                                             */
 /****************************************************************/
-typedef struct tuple_t {
-       gs_uint32_t val;
-       gs_uint32_t gap;
-       gs_uint32_t del;
-       gs_uint32_t next;
-} tuple_t;
 
-typedef struct supertuple_t {
-       gs_uint32_t val;
-       gs_uint32_t gap;
-       gs_uint32_t del;
-       struct supertuple_t *next;
-} supertuple_t;
 
 /****************************************************************/
-typedef gs_uint32_t val_type;
+template <class T> struct tuple_t {
+       T val;
+       gs_uint32_t gap;
+       gs_uint32_t del;
+       gs_uint32_t next;
+};
 
-typedef struct skipnode {
-       val_type val;
+template <class T> struct skipnode_t {
+       T val;
        gs_uint32_t next;
        gs_uint32_t down;
-} skipnode_t;
+};
 
-typedef struct skipdir {
+template <class T> struct skipdir_t {
        gs_uint32_t height;                             // height of tree
        gs_uint32_t freeptr;                            // cursor space stack
        gs_uint32_t headptr[SKIPDIR_HEIGHT_MAX];        // ptrs to levels
-       skipnode_t list[SKIPDIR_SIZE+1];
-} skipdir_t;
+       skipnode_t<T> list[SKIPDIR_SIZE+1];
+};
 
 /****************************************************************/
 
-typedef struct quant_udaf_hfta0_struct_t {
-       gs_uint32_t nelts;              // 4 bytes
-       supertuple_t *t;                // 8 bytes
-} quant_udaf_hfta0_struct_t;
-
-typedef struct quant_udaf_lfta1_struct_t {
-       gs_uint32_t nelts;
-       gs_uint32_t samples[QUANT_LFTA1_SIZE];
-} quant_udaf_lfta1_struct_t;
-
-typedef struct quant_udaf_hfta1_struct_t {
-       gs_uint32_t nelts;              // 4 bytes
-       supertuple_t *t;                // 8 bytes
-} quant_udaf_hfta1_struct_t;
 
-typedef struct quant_udaf_lfta2_struct_t {
-       gs_uint32_t nelts;
-       gs_uint32_t freeptr;
-       gs_uint32_t usedptr;
-       tuple_t t[QUANT_LFTA2_SIZE+1];
-} quant_udaf_lfta2_struct_t;
 
-typedef struct quant_udaf_hfta2_struct_t {
-       gs_uint32_t nelts;              // 4 bytes
-       supertuple_t *t;                // 8 bytes
-} quant_udaf_hfta2_struct_t;
 
-typedef struct quant_udaf_lfta3_struct_t {
+template <class T> struct quant_udaf_lfta3_struct_t {
        gs_uint32_t nelts;
        gs_uint32_t freeptr;
        gs_uint32_t usedptr;
        gs_uint32_t circptr;
        gs_uint32_t size;
-       tuple_t t[QUANT_LFTA3_SIZE+1];
-       skipdir_t sd;
-} quant_udaf_lfta3_struct_t;
-
-typedef struct quant_udaf_hfta3_struct_t {
-       gs_uint32_t nelts;              // 4 bytes
-       supertuple_t *t;                // 8 bytes
-} quant_udaf_hfta3_struct_t;
-
-
-
-/*************************** Version 0 **************************/
-/* Version 0: HFTA-only                                                */
-/****************************************************************/
-void quant_udaf_hfta0_print(quant_udaf_hfta0_struct_t *s)
-{
-        supertuple_t *t;
-
-       gslog(LOG_DEBUG,"hfta nelts = %u\n",s->nelts);
-       gslog(LOG_DEBUG,"HFTA tuples:\n");
-        for (t=s->t; t != NULL; t=t->next) {
-                gslog(LOG_DEBUG,"(%u, %u, %u)\n",t->val,t->gap,t->del);
-        }
-}
-
-void quant_udaf_hfta0_compress(quant_udaf_hfta0_struct_t *s)
-{
-       supertuple_t *t=s->t, *d;
-       gs_uint32_t threshold;
-
-       threshold = (gs_uint32_t)ceil((2.0 * QUANT_EPS) * (float)(s->nelts));
-       if ((t == NULL) || (t->next == NULL)) return;
-       d = t->next;
-       while ((d != NULL) && (d->next != NULL)) {
-               if (d->gap+d->next->gap+d->next->del < threshold) {
-                       d->next->gap += d->gap;
-                       t->next = d->next;
-                       free(d);
-               }
-               t = t->next;
-               d = t->next;
-       }
-}
-
-/****************************************************************/
-/* HFTA0 functions                                             */
-/****************************************************************/
-void quant_udaf_hfta0_HFTA_AGGR_INIT_(gs_sp_t b)
-{
-       quant_udaf_hfta0_struct_t *s = (quant_udaf_hfta0_struct_t *)b;
-       s->nelts = 0;
-       s->t = NULL;
-}
-
-void quant_udaf_hfta0_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint32_t v)
-{
-       quant_udaf_hfta0_struct_t *s = (quant_udaf_hfta0_struct_t *)b;
-       supertuple_t *t=s->t;
-       supertuple_t *newptr;
-       gs_uint32_t threshold;
-       gs_uint32_t val, gap;
-       gs_uint32_t obj;
-
-       s->nelts++;
-       // left boundary case
-       if ((!t) || (v <= t->val)) {
-               newptr = (supertuple_t *)malloc(sizeof(supertuple_t));
-               if (!newptr) {
-                       gslog(LOG_ALERT, "Out of space.\n");
-                       return;
-               }
-               newptr->val = v;
-               newptr->gap = 1;
-               newptr->del = 0;
-               newptr->next = s->t;
-               s->t = newptr;
-               return;
-       }
-
-       // locate position that sandwiches v
-       while ((t->next) && (t->next->val < v))
-               t = t->next;
-
-       // right boundary case
-       if (!t->next) {
-               // create newptr node
-               newptr = (supertuple_t *)malloc(sizeof(supertuple_t));
-               newptr->val = v;
-               newptr->gap = 1;
-               newptr->del = 0;
-               newptr->next = NULL;
-               t->next = newptr;
-       }
-       // non-boundary case
-       else {
-               obj = t->gap+t->next->gap+t->next->del;
-               threshold = (gs_uint32_t)ceil(2.0 * QUANT_EPS * (float)s->nelts);
-               if (obj <= threshold) {
-                       // insert into existing bucket
-                       t->next->gap++;
-               }
-               else {
-                       newptr = (supertuple_t *)malloc(sizeof(supertuple_t));
-                       newptr->val = v;
-                       newptr->gap = 1;
-                       newptr->del = t->next->gap+t->next->del-1;
-                       newptr->next = t->next;
-                       t->next = newptr;
-               }
-       }
-       quant_udaf_hfta0_compress(s);
-}
+       tuple_t<T> t[QUANT_LFTA3_SIZE+1];
+       skipdir_t<T> sd;
+};
 
-void quant_udaf_hfta0_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b)
-{
-       r->length = sizeof(quant_udaf_hfta0_struct_t);
-       r->offset = (gs_p_t )b;
-       r->reserved = SHALLOW_COPY;
-}
 
-void quant_udaf_hfta0_HFTA_AGGR_DESTROY_(gs_sp_t b)
-{
-        quant_udaf_hfta0_struct_t *s = (quant_udaf_hfta0_struct_t *)b;
-       supertuple_t *t=s->t, *n;
-       while(t){
-               n=t->next;
-               free(t);
-               t=n;
-       }
+template <class T> struct supertuple3_t{       // hfta/lfta
+       T val;
+       gs_uint32_t gap;
+       gs_uint32_t del;
+       struct supertuple3_t<T> *next;
+};
 
-       return;
-}
+template <class T> struct supertupleZ_t{
+       T val;
+       gs_uint32_t gap;
+       gs_uint32_t del;
+       gs_int32_t next;
+};
 
 
-/****************************************************************/
-/* HFTA0 Extraction functions                                  */
-/****************************************************************/
-gs_uint32_t extr_quant_hfta0_fcn(vstring *v, gs_float_t phi)
-{
-//printf("In extr_quant_hfta0_fcn offset=%llx length=%d\n",v->offset, v->length);
-       quant_udaf_hfta0_struct_t *vs = (quant_udaf_hfta0_struct_t *)(v->offset);
-//printf("nelts=%d t=%llx\n",vs->nelts, (unsigned long long int)(vs->t));
-       supertuple_t *t, *p;
-       gs_uint32_t nelts=0;
-       gs_int32_t rmin=0, rmax, rank, ropt=INT_MAX;
-       gs_uint32_t count=0;
+template <class T> struct quant_udaf_hfta_struct_t {
+       gs_uint32_t nelts;              // 4 bytes
+       short int used_head;
+       short int free_head;
+       supertupleZ_t<T> *st;
+       gs_uint32_t *vals;
+       supertuple3_t<T> *t;            // 8 bytes
+};
 
-       for (t=vs->t; t != NULL; t=t->next) {
-//printf("in loop.\n");
-//printf("gap is %d\n",t->gap);
-               nelts += t->gap;
-               count++;
-       }
-       rank = (gs_int32_t) (phi*(float)nelts);
 
-       for (t=vs->t; t != NULL; t=t->next) {
-               rmin += t->gap;
-               rmax = rmin+t->del;
-               if (max(abs(rmin-rank), abs(rmax-rank)) < ropt) {
-                       p = t;
-                       ropt = max(abs(rmin-rank), abs(rmax-rank));
-               }
-       }
-       return p->val;
-}
-
-gs_uint32_t extr_med_hfta0_fcn(vstring *v)
-{
-       return extr_quant_hfta0_fcn(v, 0.5);
-}
 
-gs_uint32_t extr_quant_hfta0_space(vstring *v)
-{
-       quant_udaf_hfta0_struct_t *vs = (quant_udaf_hfta0_struct_t *)(v->offset);
-       supertuple_t *t;
-       gs_uint32_t count=0;
 
-       for (t=vs->t; t != NULL; t=t->next)
-               count++;
-       return count;
-}
 
 
 /*************************** Version 3 **************************/
 /* Version 3: LFTA-medium                                      */
 /****************************************************************/
-void quant_hfta3_print(quant_udaf_hfta3_struct_t *s)
+template <class T> void quant_hfta3_print(quant_udaf_hfta_struct_t<T> *s)
 {
-        supertuple_t *t;
+        supertuple3_t<T> *t;
 
 //printf("In quant_hfta3_print, s=%llx, t=%llx\n",(unsigned long long int)s,(unsigned long long int)(s->t));
        gslog(LOG_DEBUG,"HFTA tuples:\n");
@@ -279,9 +122,9 @@ void quant_hfta3_print(quant_udaf_hfta3_struct_t *s)
         }
 }
 
-void quant_hfta3_compress(quant_udaf_hfta3_struct_t *s)
+template <class T> void quant_hfta3_compress(quant_udaf_hfta_struct_t<T> *s)
 {
-       supertuple_t *t=s->t, *d;
+       supertuple3_t<T> *t=s->t, *d;
        gs_uint32_t threshold;
 
        threshold = (gs_uint32_t)ceil((2.0 * QUANT_EPS) * (float)(s->nelts));
@@ -302,20 +145,43 @@ void quant_hfta3_compress(quant_udaf_hfta3_struct_t *s)
 /****************************************************************/
 /* HFTA3 functions                                             */
 /****************************************************************/
-void quant_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b)
-{
-       quant_udaf_hfta3_struct_t *s = (quant_udaf_hfta3_struct_t *)b;
+
+//             since it does mallocs instead of allocations in a fixed block of memory
+template <class T> void quant_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b) {
+//printf("sizeof quant_udaf_hfta_struct_t<T> is %lu\n",sizeof(quant_udaf_hfta_struct_t<T>));
+//printf("sizeof quant_udaf_lfta3_struct_t<T> is %lu\n",sizeof(quant_udaf_lfta3_struct_t<T>));
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+//printf("quant_udaf_hfta3_HFTA_AGGR_INIT_ size is %lu\n",sizeof(quant_udaf_hfta_struct_t<T>));
        s->nelts = 0;
        s->t = NULL;
+       s->vals = NULL;
+       s->st = NULL;
+       s->used_head = -1;
+       s->free_head = -1;
 }
 
-void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v)
-{
-       quant_udaf_hfta3_struct_t *s = (quant_udaf_hfta3_struct_t *)b;
-       quant_udaf_lfta3_struct_t *vs = (quant_udaf_lfta3_struct_t *)(v->offset);
-       supertuple_t *t=s->t, *tprev=NULL;
-       tuple_t *u=vs->t;
-       supertuple_t *newptr;
+void quant_ui_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_hfta3_HFTA_AGGR_INIT_<gs_uint32_t>(b);
+}
+void quant_i_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_hfta3_HFTA_AGGR_INIT_<gs_int32_t>(b);
+}
+void quant_ul_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_hfta3_HFTA_AGGR_INIT_<gs_uint64_t>(b);
+}
+void quant_l_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_hfta3_HFTA_AGGR_INIT_<gs_int64_t>(b);
+}
+void quant_f_udaf_hfta3_HFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_hfta3_HFTA_AGGR_INIT_<gs_float_t>(b);
+}
+
+template <class T> void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v) {
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+       quant_udaf_lfta3_struct_t<T> *vs = (quant_udaf_lfta3_struct_t<T> *)(v->offset);
+       supertuple3_t<T> *t=s->t, *tprev=NULL;
+       tuple_t<T> *u=vs->t;
+       supertuple3_t<T> *newptr;
        gs_uint32_t uptr = vs->usedptr;
        gs_uint32_t threshold;
 
@@ -324,6 +190,7 @@ void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v)
 
        threshold = (gs_uint32_t)ceil((2.0 * QUANT_EPS) * (float)(vs->nelts));
        while (uptr != 0) {
+//printf("uptr=%d\n",uptr);
                if ((u[uptr].next != 0) && (u[uptr].gap+u[u[uptr].next].gap+u[u[uptr].next].del < threshold)) {
                        u[u[uptr].next].gap += u[uptr].gap;
                }
@@ -344,7 +211,7 @@ void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v)
                        }
                        if (!uptr) break;
                        // create newptr node
-                       newptr = (supertuple_t *)malloc(sizeof(supertuple_t));
+                       newptr = (supertuple3_t<T> *)malloc(sizeof(supertuple3_t<T>));
                        newptr->val = u[uptr].val;
                        newptr->gap = u[uptr].gap;
                        newptr->del = u[uptr].del;
@@ -361,33 +228,85 @@ void quant_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v)
                }
                uptr = u[uptr].next;
        }
-       quant_hfta3_compress(s);
+       quant_hfta3_compress<T>(s);
 //quant_hfta3_print(s);
 //printf("exiting quant_udaf_hfta3_HFTA_AGGR_UPDATE_, s=%llx, t=%llx\n",(unsigned long long int)s,(unsigned long long int)(s->t));
 }
 
-void quant_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b)
-{
-       r->length = sizeof(quant_udaf_hfta3_struct_t);
+void quant_ui_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v){
+       quant_udaf_hfta3_HFTA_AGGR_UPDATE_<gs_uint32_t>(b, v);
+}
+void quant_i_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v){
+       quant_udaf_hfta3_HFTA_AGGR_UPDATE_<gs_int32_t>(b, v);
+}
+void quant_ul_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v){
+       quant_udaf_hfta3_HFTA_AGGR_UPDATE_<gs_uint64_t>(b, v);
+}
+void quant_l_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v){
+       quant_udaf_hfta3_HFTA_AGGR_UPDATE_<gs_int64_t>(b, v);
+}
+void quant_f_udaf_hfta3_HFTA_AGGR_UPDATE_(gs_sp_t b, vstring *v){
+       quant_udaf_hfta3_HFTA_AGGR_UPDATE_<gs_float_t>(b, v);
+}
+
+template <class T> void quant_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       r->length = sizeof(quant_udaf_hfta_struct_t<T>);
        r->offset = (gs_p_t )b;
        r->reserved = SHALLOW_COPY;
 
-       quant_udaf_hfta3_struct_t *s = (quant_udaf_hfta3_struct_t *)b;
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
 //printf("In quant_udaf_hfta3_HFTA_AGGR_OUTPUT_, s=%llx, t=%llx\n\n",(unsigned long long int)s,(unsigned long long int)(s->t));
 }
 
-void quant_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b)
-{
+void quant_ui_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_OUTPUT_<gs_uint32_t>(r, b);
+}
+void quant_i_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_OUTPUT_<gs_int32_t>(r, b);
+}
+void quant_ul_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_OUTPUT_<gs_uint64_t>(r, b);
+}
+void quant_l_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_OUTPUT_<gs_int64_t>(r, b);
+}
+void quant_f_udaf_hfta3_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_OUTPUT_<gs_float_t>(r, b);
+}
+
+template <class T> void quant_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+    quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+       supertuple3_t<T> *t=s->t, *n;
+       while(t){
+               n=t->next;
+               free(t);
+               t=n;
+       }
        return;
 }
 
+void quant_ui_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_DESTROY_<gs_uint32_t>(b);
+}
+void quant_i_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_DESTROY_<gs_int32_t>(b);
+}
+void quant_ul_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_DESTROY_<gs_uint64_t>(b);
+}
+void quant_l_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_DESTROY_<gs_int64_t>(b);
+}
+void quant_f_udaf_hfta3_HFTA_AGGR_DESTROY_(gs_sp_t b) {
+       quant_udaf_hfta3_HFTA_AGGR_DESTROY_<gs_float_t>(b);
+}
+
 /****************************************************************/
 /* HFTA3 Extraction functions                                  */
 /****************************************************************/
-gs_uint32_t extr_quant_hfta3_fcn(vstring *v, gs_float_t  phi)
-{
-       quant_udaf_hfta3_struct_t *vs = (quant_udaf_hfta3_struct_t *)(v->offset);
-       supertuple_t *t, *p;
+template <class T> T extr_quant_hfta3_fcn(vstring *v, gs_float_t  phi) {
+       quant_udaf_hfta_struct_t<T> *vs = (quant_udaf_hfta_struct_t<T> *)(v->offset);
+       supertuple3_t<T> *t, *p;
        gs_uint32_t nelts=0;
        gs_int32_t rmin=0, rmax, rank, ropt=INT_MAX;
        gs_uint32_t count=0;
@@ -409,18 +328,421 @@ gs_uint32_t extr_quant_hfta3_fcn(vstring *v, gs_float_t  phi)
        return p->val;
 }
 
-gs_uint32_t extr_med_hfta3_fcn(vstring *v)
+/*
+gs_uint32_t extr_quant_ui_hfta3_fcn(vstring *v, gs_float_t  phi){
+       return extr_quant_hfta3_fcn<gs_uint32_t>(v, phi);
+}
+gs_int32_t extr_quant_i_hfta3_fcn(vstring *v, gs_float_t  phi){
+       return extr_quant_hfta3_fcn<gs_int32_t>(v, phi);
+}
+gs_uint64_t extr_quant_ul_hfta3_fcn(vstring *v, gs_float_t  phi){
+       return extr_quant_hfta3_fcn<gs_uint64_t>(v, phi);
+}
+gs_int64_t extr_quant_l_hfta3_fcn(vstring *v, gs_float_t  phi){
+       return extr_quant_hfta3_fcn<gs_int64_t>(v, phi);
+}
+gs_float_t extr_quant_f_hfta3_fcn(vstring *v, gs_float_t  phi){
+       return extr_quant_hfta3_fcn<gs_float_t>(v, phi);
+}
+*/
+
+template <class T> T extr_med_hfta3_fcn(vstring *v)
 {
-       return extr_quant_hfta3_fcn(v, 0.5);
+       return extr_quant_hfta3_fcn<T>(v, 0.5);
+}
+
+gs_uint32_t extr_ui_med_hfta3_fcn(vstring *v){
+       return extr_med_hfta3_fcn<gs_uint32_t>(v);
+}
+gs_int32_t extr_i_med_hfta3_fcn(vstring *v){
+       return extr_med_hfta3_fcn<gs_int32_t>(v);
+}
+gs_uint64_t extr_ul_med_hfta3_fcn(vstring *v){
+       return extr_med_hfta3_fcn<gs_uint64_t>(v);
+}
+gs_int64_t extr_l_med_hfta3_fcn(vstring *v){
+       return extr_med_hfta3_fcn<gs_int64_t>(v);
+}
+gs_float_t extr_f_med_hfta3_fcn(vstring *v){
+       return extr_med_hfta3_fcn<gs_float_t>(v);
 }
 
-gs_uint32_t extr_quant_hfta3_space(vstring *v)
+template <class T> gs_uint32_t extr_quant_hfta3_space(vstring *v)
 {
-       quant_udaf_hfta3_struct_t *vs = (quant_udaf_hfta3_struct_t *)(v->offset);
-       supertuple_t *t;
+       quant_udaf_hfta_struct_t<T> *vs = (quant_udaf_hfta_struct_t<T> *)(v->offset);
+       supertuple3_t<T> *t;
        gs_uint32_t count=0;
 
        for (t=vs->t; t != NULL; t=t->next)
                count++;
        return count;
 }
+
+
+
+//////////////////////////////////////////////////////////////////////
+//              hfta-only code V3
+
+//     This approach stores values in a buffer until
+//     the buffer gets filled, and then puts the values into
+//     the approximate quantile udaf.
+//
+//     Further, the code is templatized
+
+#define MAX_QUANT_ELEMS 128
+#define MAX_VAL_ELEMS 50
+//     MAX_VAL_ELEMS must be less than MAX_QUANT_ELEMS,
+//     and probably somewhat less than 1/QUANT_EPS
+//     Another consideration is space use, as most groups are small,
+//     so you want MAX_VAL_ELEMS to be as small as possible
+//     and still capture most small groups.
+
+//     To really optimize for space, use a doubling realloc
+//     strategy until the doubled size would be 2K bytes,
+//     and then instead of doubling, insert into the approx
+//     structure.
+//     
+
+/*
+template <class T> struct supertupleZ_t{
+       T val;
+       gs_uint32_t gap;
+       gs_uint32_t del;
+       gs_int32_t next;
+};
+*/
+
+/*
+template <class T> struct quant_udaf_hftaZ_struct_t{
+       gs_uint32_t nelts;
+       short int used_head;
+       short int free_head;
+       supertupleZ_t<T> *st;
+       gs_uint32_t *vals;
+};
+*/
+
+
+template <class T> void quant_udaf_hftaZ_compress(quant_udaf_hfta_struct_t<T> *s)
+{
+       int t = s->used_head, d, d_next=-1;
+       gs_uint32_t threshold;
+       supertupleZ_t<T> *st = s->st;
+
+       threshold = (gs_uint32_t)ceil((2.0 * QUANT_EPS) * (float)(s->nelts));
+       if ((t == -1) || (st[t].next == -1)) return;
+       d = st[t].next;
+       while ((d != -1) && (st[d].next != -1)) {
+               d_next = st[d].next;
+               if (st[d].gap + st[d_next].gap + st[d_next].del < threshold) {
+                       st[d_next].gap += st[d].gap;
+                       st[t].next = st[d].next;
+                       st[d].next = s->free_head;
+                       s->free_head = d;
+               }
+               t = st[t].next;
+               d = st[t].next;
+       }
+}
+
+template <class T>  void quant_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b) {
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+//printf("quant_udaf_hftaZ_HFTA_AGGR_INIT_ size is %lu\n",sizeof(quant_udaf_hfta_struct_t<T>));
+       s->nelts = 0;
+       s->st=NULL;
+       s->vals = (gs_uint32_t *)malloc(MAX_VAL_ELEMS*sizeof(T));
+       s->t = NULL;
+}
+
+template <class T>  void quant_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, T v) {
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+       if(s->nelts<MAX_VAL_ELEMS){
+               s->vals[s->nelts] = v;
+               s->nelts++;
+               return;
+       }
+
+       if(s->nelts==MAX_VAL_ELEMS){
+//             qsort(s->vals, MAX_VAL_ELEMS, sizeof(gs_uint32_t), compare_gs_uint32);
+               sort(s->vals,s->vals+s->nelts);
+               s->st = (supertupleZ_t<T> *)malloc(MAX_QUANT_ELEMS*sizeof(quant_udaf_hfta_struct_t<T>));
+               for(int i=0;i<MAX_VAL_ELEMS;++i){
+                       s->st[i].val = s->vals[i];
+                       s->st[i].gap = 1;
+                       s->st[i].del = 0;
+                       s->st[i].next = i+1;
+               }
+               s->st[MAX_VAL_ELEMS-1].next = -1;
+               for(int i=MAX_VAL_ELEMS; i<MAX_QUANT_ELEMS; ++i){
+                       s->st[i].next = i+1;
+               }
+               s->st[MAX_QUANT_ELEMS-1].next = -1;
+               s->free_head = MAX_VAL_ELEMS;
+               s->used_head = 0;
+               free(s->vals);
+               s->vals = NULL;
+       }
+
+//             s->nelts > MAX_VAL_ELEMS
+       int t=s->used_head;
+       int newptr;
+       gs_uint32_t threshold;
+       gs_uint32_t val, gap;
+       gs_uint32_t obj;
+       supertupleZ_t<T> *st = s->st;
+
+       s->nelts++;
+       // left boundary case
+       if ((t==-1) || (v <= st[t].val)) {
+               newptr = s->free_head;
+               if (newptr==-1) {
+                       gslog(LOG_ALERT, "Out of space in quant_udaf_hftaZ_HFTA_AGGR_UPDATE_.\n");
+                       cout << v << endl;
+                       quant_udaf_hftaZ_compress<T>(s);
+                       return;
+               }
+               s->free_head = st[newptr].next;
+               st[newptr].val = v;
+               st[newptr].gap = 1;
+               st[newptr].del = 0;
+               st[newptr].next = s->used_head;
+               s->used_head = newptr;
+               return;
+       }
+
+       // locate position that sandwiches v
+       int ptr=t;
+       while ((st[ptr].next!=-1) && (st[st[ptr].next].val < v))
+               ptr = st[ptr].next;
+
+       // right boundary case
+       if (st[ptr].next==-1) {
+               // create newptr node
+               newptr = s->free_head;
+               if (newptr==-1) {
+                       gslog(LOG_ALERT, "Out of space in quant_udaf_hftaZ_HFTA_AGGR_UPDATE_.\n");
+                       quant_udaf_hftaZ_compress<T>(s);
+                       return;
+               }
+               s->free_head = st[newptr].next;
+               st[newptr].val = v;
+               st[newptr].gap = 1;
+               st[newptr].del = 0;
+               st[newptr].next =-1;
+               st[ptr].next = newptr;
+       }
+       // non-boundary case
+       else {
+               int nextptr = st[ptr].next;
+               obj = st[ptr].gap + st[nextptr].gap + st[nextptr].del;
+               threshold = (gs_uint32_t)ceil(2.0 * QUANT_EPS * (float)s->nelts);
+               if (obj <= threshold) {
+                       // insert into existing bucket
+                       st[nextptr].gap++;
+               }
+               else {
+                       newptr = s->free_head;
+                       if (newptr==-1) {
+                               gslog(LOG_ALERT, "Out of space in quant_udaf_hftaZ_HFTA_AGGR_UPDATE_.\n");
+                               quant_udaf_hftaZ_compress<T>(s);
+                               return;
+                       }
+                       s->free_head = st[newptr].next;
+                       st[newptr].val = v;
+                       st[newptr].gap = 1;
+                       st[newptr].del = st[nextptr].gap + st[nextptr].del-1;
+                       st[newptr].next = st[ptr].next;
+                       st[ptr].next = newptr;
+               }
+       }
+       if(s->nelts>100 && (s->nelts & 0x03)==0)
+               quant_udaf_hftaZ_compress<T>(s);
+}
+
+template <class T>  void quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       r->length = sizeof(quant_udaf_hfta_struct_t<T>);
+       r->offset = (gs_p_t )b;
+       r->reserved = SHALLOW_COPY;
+}
+
+template <class T>  void quant_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+       if(s->vals != NULL)
+               free(s->vals);
+       if(s->st)
+               free(s->st);
+}
+
+
+template <class T> T extr_quant_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)(v->offset);
+       int t, p;
+
+       if(s->t != NULL){       // separate path for hfta/lfta split
+               return extr_quant_hfta3_fcn<T>(v, phi);
+       }
+
+       if(s->vals){
+//             qsort(s->vals, s->nelts, sizeof(gs_uint32_t), compare_gs_uint32);
+               sort(s->vals,s->vals+s->nelts);
+               gs_int32_t rank = (gs_int32_t) (phi*(float)(s->nelts));
+               if(rank>=s->nelts)
+                       rank=s->nelts-1;
+               return s->vals[rank];
+       }
+
+
+       gs_int32_t rmin=0, rmax, rank, ropt=INT_MAX;
+       gs_uint32_t count=0;
+       supertupleZ_t<T> *st = s->st;
+
+       rank = (gs_int32_t) (phi*(float)(s->nelts));
+
+       for (t=s->used_head; t != -1; t=st[t].next) {
+               rmin += st[t].gap;
+               rmax = rmin+st[t].del;
+               if (max(abs(rmin-rank), abs(rmax-rank)) < ropt) {
+                       p = t;
+                       ropt = max(abs(rmin-rank), abs(rmax-rank));
+               } else break;
+       }
+       return st[p].val;
+}
+template <class T> T extr_med_hftaZ_fcn(vstring *v) {
+       return extr_quant_hftaZ_fcn<T>(v, 0.5);
+}
+
+
+template <class T> int quant_udaf_hftaZ_nelem(gs_sp_t b) {
+       quant_udaf_hfta_struct_t<T> *s = (quant_udaf_hfta_struct_t<T> *)b;
+       supertupleZ_t<T> *st = s->st;
+
+       if(s->vals != NULL)
+               return s->nelts;
+
+       int ctr=0;
+       int t=s->used_head;
+       while(t>=0){
+               ctr++;
+               t=st[t].next;
+       }
+       return ctr;
+}
+
+//     Unsigned int
+void quant_ui_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b){
+         quant_udaf_hftaZ_HFTA_AGGR_INIT_<gs_uint32_t>(b);
+}
+void quant_ui_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint32_t v){
+       quant_udaf_hftaZ_HFTA_AGGR_UPDATE_<gs_uint32_t>(b,v);
+}
+void quant_ui_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_<gs_uint32_t>(r,b);
+}
+void quant_ui_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hftaZ_HFTA_AGGR_DESTROY_<gs_uint32_t>(b);
+}
+gs_uint32_t extr_quant_ui_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       return extr_quant_hftaZ_fcn<gs_uint32_t>(v,phi);
+}
+gs_uint32_t extr_med_ui_hftaZ_fcn(vstring *v){
+       return extr_med_hftaZ_fcn<gs_uint32_t>(v);
+}
+int quant_ui_udaf_hftaZ_nelem(gs_sp_t b) {
+       return quant_udaf_hftaZ_nelem<gs_uint32_t>(b);
+}
+
+//     int
+void quant_i_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b){
+         quant_udaf_hftaZ_HFTA_AGGR_INIT_<gs_int32_t>(b);
+}
+void quant_i_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_int32_t v){
+       quant_udaf_hftaZ_HFTA_AGGR_UPDATE_<gs_int32_t>(b,v);
+}
+void quant_i_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_<gs_int32_t>(r,b);
+}
+void quant_i_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hftaZ_HFTA_AGGR_DESTROY_<gs_int32_t>(b);
+}
+gs_int32_t extr_quant_i_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       return extr_quant_hftaZ_fcn<gs_int32_t>(v,phi);
+}
+gs_int32_t extr_med_i_hftaZ_fcn(vstring *v){
+       return extr_med_hftaZ_fcn<gs_int32_t>(v);
+}
+gs_int32_t quant_i_udaf_hftaZ_nelem(gs_sp_t b) {
+       return quant_udaf_hftaZ_nelem<gs_int32_t>(b);
+}
+
+//     Unsigned long long int
+void quant_ul_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b){
+         quant_udaf_hftaZ_HFTA_AGGR_INIT_<gs_uint64_t>(b);
+}
+void quant_ul_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint64_t v){
+       quant_udaf_hftaZ_HFTA_AGGR_UPDATE_<gs_uint64_t>(b,v);
+}
+void quant_ul_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_<gs_uint64_t>(r,b);
+}
+void quant_ul_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hftaZ_HFTA_AGGR_DESTROY_<gs_uint64_t>(b);
+}
+gs_uint64_t extr_quant_ul_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       return extr_quant_hftaZ_fcn<gs_uint64_t>(v,phi);
+}
+gs_uint64_t extr_med_ul_hftaZ_fcn(vstring *v){
+       return extr_med_hftaZ_fcn<gs_uint64_t>(v);
+}
+int quant_ul_udaf_hftaZ_nelem(gs_sp_t b) {
+       return quant_udaf_hftaZ_nelem<gs_uint64_t>(b);
+}
+
+//     long long int
+void quant_l_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b){
+         quant_udaf_hftaZ_HFTA_AGGR_INIT_<gs_int64_t>(b);
+}
+void quant_l_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_int64_t v){
+       quant_udaf_hftaZ_HFTA_AGGR_UPDATE_<gs_int64_t>(b,v);
+}
+void quant_l_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_<gs_int64_t>(r,b);
+}
+void quant_l_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hftaZ_HFTA_AGGR_DESTROY_<gs_int64_t>(b);
+}
+gs_int64_t extr_quant_l_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       return extr_quant_hftaZ_fcn<gs_int64_t>(v,phi);
+}
+gs_int64_t extr_med_l_hftaZ_fcn(vstring *v){
+       return extr_med_hftaZ_fcn<gs_int64_t>(v);
+}
+int quant_l_udaf_hftaZ_nelem(gs_sp_t b) {
+       return quant_udaf_hftaZ_nelem<gs_int64_t>(b);
+}
+
+
+//     double
+void quant_f_udaf_hftaZ_HFTA_AGGR_INIT_(gs_sp_t b){
+         quant_udaf_hftaZ_HFTA_AGGR_INIT_<gs_float_t>(b);
+}
+void quant_f_udaf_hftaZ_HFTA_AGGR_UPDATE_(gs_sp_t b, gs_float_t v){
+       quant_udaf_hftaZ_HFTA_AGGR_UPDATE_<gs_float_t>(b,v);
+}
+void quant_f_udaf_hftaZ_HFTA_AGGR_OUTPUT_(vstring *r, gs_sp_t b) {
+       quant_udaf_hftaZ_HFTA_AGGR_OUTPUT_<gs_float_t>(r,b);
+}
+void quant_f_udaf_hftaZ_HFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_hftaZ_HFTA_AGGR_DESTROY_<gs_float_t>(b);
+}
+gs_float_t extr_quant_f_hftaZ_fcn(vstring *v, gs_float_t phi) {
+       return extr_quant_hftaZ_fcn<gs_float_t>(v,phi);
+}
+gs_float_t extr_med_f_hftaZ_fcn(vstring *v){
+       return extr_med_hftaZ_fcn<gs_float_t>(v);
+}
+int quant_f_udaf_hftaZ_nelem(gs_sp_t b) {
+       return quant_udaf_hftaZ_nelem<gs_float_t>(b);
+}
+
+
index 91cdb9a..e1c5646 100644 (file)
 #   limitations under the License.
 # -------------------------------------------
 
+CPP=g++ -g -O3  -fexpensive-optimizations -I ../gscpaux/acl/ -I ../gscpaux/ -I ../../../include/lfta/ -I ../../../include/
 CC=gcc -g -O3  -fexpensive-optimizations -I ../gscpaux/acl/ -I ../gscpaux/ -I ../../../include/lfta/ -I ../../../include/
 
-SOURCE =  rts_string.c rts_byteswap.c rts_sample.c rts_udaf.c flip_udaf.c
+SOURCE =  rts_string.c rts_byteswap.c rts_sample.c rts_udaf.c flip_udaf.cc
 
-OBJECTS = $(SOURCE:.c=.o)
+OBJECTS = rts_string.o rts_byteswap.o rts_sample.o rts_udaf.o flip_udaf.o
 
 INCDIR=../../../include
 HFTA_DIR=$(INCDIR/hfta)
@@ -37,11 +38,14 @@ rts_sample.c : $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h) $(LFTA_DIR/rts_external.
 
 rts_udaf.c : $(LFTA_DIR/rts_udaf.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h) 
 
-flip_udaf.c : $(LFTA_DIR/flip_udaf.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h) 
+flip_udaf.cc : $(LFTA_DIR/flip_udaf.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h) 
 
 lpm.c : $(LFTA_DIR/rts_external.h) $(INCDIR/fta.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h)
 
-flip_udaf.c : $(LFTA_DIR/rts_udaf.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h)
+flip_udaf.cc : $(LFTA_DIR/rts_udaf.h) $(INCDIR/gsconfig.h) $(INCDIR/gstypes.h)
+
+flip_udaf.o : flip_udaf.cc
+       $(CPP) -c -o flip_udaf.o flip_udaf.cc
 
 install: all
 
diff --git a/src/lib/gscplftaaux/flip_udaf.cc b/src/lib/gscplftaaux/flip_udaf.cc
new file mode 100644 (file)
index 0000000..1376a7d
--- /dev/null
@@ -0,0 +1,611 @@
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
+#include "rts_udaf.h"
+#include "gsconfig.h"
+#include "gstypes.h"
+
+
+#include "udaf_common.h"
+
+
+/*      Full size
+//     NOTE: does not seem to be stable or correct with this setting
+//             compress only activates with this one, so compress is broken?
+#define QUANT_LFTA1_SIZE 729
+#define QUANT_LFTA2_SIZE 181
+#define QUANT_LFTA3_SIZE 100
+*/
+
+/*      half size
+*/
+// #define QUANT_LFTA1_SIZE 378
+// #define QUANT_LFTA2_SIZE 93
+// #define QUANT_LFTA3_SIZE 50
+
+/*      quarter size
+#define QUANT_LFTA1_SIZE 202
+#define QUANT_LFTA2_SIZE 49
+#define QUANT_LFTA3_SIZE 25
+*/
+
+
+// #define QUANT_EPS 0.01
+// #define SKIPDIR_SIZE 100
+// #define SKIPDIR_HEIGHT_MAX 7
+//#define max(a,b) ((a) > (b) ? (a) : (b))
+
+#define COMPRESSED_XFER
+
+/****************************************************************/
+/* Data Structures                                             */
+/****************************************************************/
+template <class T> struct tuple_t {
+       T val;
+       gs_uint32_t gap;
+       gs_uint32_t del;
+       gs_uint32_t next;
+};
+
+// For skip list
+
+template <class T> struct skipnode_t {
+       T val;
+       gs_uint32_t next;
+       gs_uint32_t down;
+};
+
+template <class T>  struct skipdir_t {
+       gs_uint32_t height;                             // height of tree
+       gs_uint32_t freeptr;                            // cursor space stack
+       gs_uint32_t headptr[SKIPDIR_HEIGHT_MAX+1];      // ptrs to levels
+       skipnode_t<T> list[SKIPDIR_SIZE+1];
+} ;
+
+
+/****************************************************************/
+
+// fstring(5+(QUANT_LFTA3_SIZE+1)*4 +
+//         (2+lg(QUANT_LFTA3_SIZE)+(QUANT_LFTA3_SIZE+1)*3)*4)
+template <class T> struct quant_udaf_lfta3_struct_t {
+       gs_uint32_t nelts;      // # stream elements
+       gs_uint32_t freeptr;    // ptr to cursor stack
+       gs_uint32_t usedptr;    // ptr to allocated memory
+       gs_uint32_t circptr;    // circulating ptr used for compression
+       gs_uint32_t size;
+       tuple_t<T> t[QUANT_LFTA3_SIZE+1];       // samples + auxiliary info
+       skipdir_t<T> sd;                // directory for searching tuples
+} ;
+
+/****************************************************************/
+/* Skip List Functions                                         */
+/****************************************************************/
+
+// Skip list cursor stack operations
+template <class T> gs_uint32_t skipdir_alloc(skipdir_t<T> *sd)
+{
+       gs_uint32_t ptr = sd->freeptr;
+       if (sd->freeptr != 0)
+               sd->freeptr = sd->list[ptr].next;
+//printf("skipdir_alloc %d\n",ptr);
+       return ptr;
+}
+
+template <class T> void skipdir_free(skipdir_t<T> *sd, gs_uint32_t ptr)
+{
+       sd->list[ptr].val = 0;
+       sd->list[ptr].down = 0;
+       sd->list[ptr].next = sd->freeptr;
+       sd->freeptr = ptr;
+//printf("skipdir_free %d\n",ptr);
+}
+
+
+template <class T> void skipdir_create(skipdir_t<T> *sd)
+{
+       gs_int32_t i;
+
+       sd->height = 0;
+       sd->freeptr = 1;
+       for (i=0; i < SKIPDIR_HEIGHT_MAX; i++)
+               sd->headptr[i] = 0;
+       for (i=1; i < SKIPDIR_SIZE; i++)
+               sd->list[i].next = i+1;
+       sd->list[SKIPDIR_SIZE].next = 0;
+}
+
+template <class T> void skipdir_destroy(skipdir_t<T> *sd)
+{
+       sd->height = 0;
+}
+
+
+template <class T> void skipdir_search(skipdir_t<T> *sd, T val, gs_uint32_t *ptrstack)
+{
+       gs_uint32_t ptr;
+       gs_int32_t l;
+
+       if (sd->height == 0) {
+               ptrstack[0] = ptrstack[1] = 0;
+               return;
+       }
+       // search nonleaf nodes
+       ptr = sd->headptr[sd->height-1];
+       for (l=sd->height-1; l >= 0; l--) {
+               if (ptr == 0) {
+                       ptrstack[l+1] = 0;
+                       ptr = (l > 0) ? sd->headptr[l-1] : 0;
+               }
+               else if (val <= sd->list[ptr].val) {
+                       ptrstack[l+1] = 0;
+                       ptr = (l > 0) ? sd->headptr[l-1] : 0;
+               }
+               else {
+                       while ((sd->list[ptr].next != 0) &&
+                       (sd->list[sd->list[ptr].next].val < val))
+                               ptr = sd->list[ptr].next;
+                       ptrstack[l+1] = ptr;
+                       ptr = sd->list[ptr].down;
+               }
+       }
+       ptrstack[0] = ptr;
+}
+
+
+template <class T> void skipdir_insert(skipdir_t<T> *sd, gs_uint32_t *ptrstack,
+                       gs_uint32_t leafptr, T val)
+{
+       gs_uint32_t newptr, oldptr;
+       gs_int32_t l;
+
+       // if path already existed then point to new duplicate
+       if ((ptrstack[1] == 0) && (sd->headptr[0] != 0)
+       && (sd->list[sd->headptr[0]].val == val)) {
+               sd->list[sd->headptr[0]].down = leafptr;
+               return;
+       }
+       if ((ptrstack[1] != 0) && (sd->list[ptrstack[1]].next != 0)
+       && (sd->list[sd->list[ptrstack[1]].next].val == val)) {
+               sd->list[sd->list[ptrstack[1]].next].down = leafptr;
+               return;
+       }
+
+       for (l=0; l < SKIPDIR_HEIGHT_MAX; l++) {
+               if (random() % 2) break;
+               newptr = skipdir_alloc<T>(sd);
+               if (!newptr) break;     // out of memory
+               sd->list[newptr].val = val;
+               //copy(&val, &list[newptr[l]].val);
+               // link new directory node to level below
+               if (l > 0)
+                       sd->list[newptr].down = oldptr;
+               else
+                       sd->list[newptr].down = leafptr;
+               // insert node into current level
+               if ((l >= sd->height) || (ptrstack[l+1] == 0)) {
+                       sd->list[newptr].next = sd->headptr[l];
+                       sd->headptr[l] = newptr;
+               }
+               else {
+                       sd->list[newptr].next = sd->list[ptrstack[l+1]].next;
+                       sd->list[ptrstack[l+1]].next = newptr;
+               }
+               oldptr = newptr;
+       }
+       if (l > sd->height) sd->height = l;
+       //fprintf(stderr,"new height = %u\n",sd->height);
+}
+
+
+template <class T> void  skipdir_delete(skipdir_t<T> *sd, gs_uint32_t *ptrstack, T val)
+{
+       gs_uint32_t delptr;
+       gs_int32_t l;
+
+       for (l=0; l < sd->height; l++) {
+               if (ptrstack[l+1] == 0) {
+                       delptr = sd->headptr[l];
+                       if (delptr == 0) break;
+                       if (sd->list[delptr].val == val) {
+                               sd->headptr[l] = sd->list[delptr].next;
+                               skipdir_free<T>(sd, delptr);
+                       }
+                       else
+                               break;
+               }
+               else {
+                       delptr = sd->list[ptrstack[l+1]].next;
+                       if (delptr == 0) break;
+                       if (sd->list[delptr].val == val) {
+                               sd->list[ptrstack[l+1]].next = sd->list[delptr].next;
+                               skipdir_free<T>(sd, delptr);
+                       }
+                       else
+                               break;
+               }
+       }
+}
+
+// For Debugging
+template <class T> void skipdir_print(skipdir_t<T> *sd)
+{
+       gs_uint32_t ptr;
+       gs_int32_t l;
+
+       for (l=sd->height-1; l >= 0; l--) {
+               for (ptr=sd->headptr[l]; ptr != 0; ptr=sd->list[ptr].next)
+                       fprintf(stderr,"%u ", sd->list[ptr].val);
+               fprintf(stderr,"\n");
+       }
+       fprintf(stderr,"-------\n");
+       for (l=sd->height-1; l > 0; l--) {
+               for (ptr=sd->headptr[l]; ptr != 0; ptr=sd->list[ptr].next)
+                       fprintf(stderr,"%u ", sd->list[sd->list[ptr].down].val);
+               fprintf(stderr,"\n");
+       }
+       fprintf(stderr,"-------\n");
+}
+
+
+
+
+/*************************** Version 3 **************************/
+/* Version 3: LFTA-medium                                      */
+/*                                                             */
+/* NIC performs O(log n) operations at each update.            */
+/****************************************************************/
+
+/****************************************************************/
+/* Helper functions                                            */
+/****************************************************************/
+template <class T> gs_uint32_t quant_udaf_lfta3_cursor_alloc(quant_udaf_lfta3_struct_t<T> *s)
+{
+       gs_uint32_t ptr = s->freeptr;
+       if (s->freeptr != 0) s->freeptr = s->t[ptr].next;
+       s->size++;
+// printf("quant_udaf_lfta3_cursor_alloc %d freeptr %d\n",ptr, s->freeptr);
+       return ptr;
+}
+
+template <class T> void quant_udaf_lfta3_cursor_free(quant_udaf_lfta3_struct_t<T> *s, gs_uint32_t ptr)
+{
+       s->t[ptr].next = s->freeptr;
+       s->freeptr = ptr;
+       s->size--;
+//printf("quant_udaf_lfta3_cursor_free %d\n",ptr);
+}
+
+template <class T> void quant_lfta3_print(quant_udaf_lfta3_struct_t<T> *s)
+{
+       tuple_t<T> *t=s->t;
+       gs_uint32_t ptr = s->usedptr;
+
+       if (ptr == 0) {
+               fprintf(stderr,"<empty>\n");
+               return;
+       }
+       //skipdir_print(&s->sd);
+       for (; ptr != 0; ptr=t[ptr].next) {
+               fprintf(stderr,"(%u, %u, %u) ",t[ptr].val,t[ptr].gap,t[ptr].del);
+       }
+       fprintf(stderr,"\n");
+}
+
+template <class T> void quant_lfta3_compress(quant_udaf_lfta3_struct_t<T> *s)
+{
+       tuple_t<T> *t = s->t;
+       gs_uint32_t delptr;
+       gs_uint32_t threshold;
+       gs_uint32_t ptrstack[SKIPDIR_HEIGHT_MAX+5];
+
+       threshold = (gs_uint32_t)ceil(2.0 * QUANT_EPS * (gs_float_t)s->nelts);
+//if(s->circptr < 0 || s->circptr >= QUANT_LFTA3_SIZE)
+// printf("1) s->circptr = %d\n",s->circptr);
+//if(t[s->circptr].next < 0 || t[s->circptr].next >= QUANT_LFTA3_SIZE)
+// printf("t[s->circptr].next = %d\n",t[s->circptr].next);
+       if ((s->circptr == 0) || (t[s->circptr].next == 0)
+       || (t[t[s->circptr].next].next == 0))
+               s->circptr = s->usedptr;
+       //if ((s->size % 10) != 0) return;
+       if (s->nelts > 2) {
+//if(s->circptr < 0 || s->circptr >= QUANT_LFTA3_SIZE)
+// printf("2) s->circptr = %d\n",s->circptr);
+               delptr = t[s->circptr].next;
+//if(delptr < 0 || delptr >= QUANT_LFTA3_SIZE)
+// printf("delptr = %d\n",delptr);
+//if(t[delptr].next < 0 || t[delptr].next >= QUANT_LFTA3_SIZE)
+// printf("t[delptr].next = %d\n",t[delptr].next);
+               if (t[delptr].gap+t[t[delptr].next].gap+t[t[delptr].next].del < threshold) {
+                       // delete from directory
+                       if (t[s->circptr].val != t[delptr].val) {
+                               // leftmost duplicate (if multiplicity)
+                               skipdir_search<T>(&(s->sd), t[delptr].val, ptrstack);
+                               if (t[delptr].val == t[t[delptr].next].val) {
+//if(s->sd.headptr[0] < 0 || s->sd.headptr[0] >= QUANT_LFTA3_SIZE)
+// printf("s->sd.headptr[0] = %d\n",s->sd.headptr[0]);
+                                       // duplicates case
+                                       if ((ptrstack[1] == 0)
+                                       && (s->sd.headptr[0] != 0)
+                                       && (s->sd.list[s->sd.headptr[0]].val == t[delptr].val))
+                                               s->sd.list[s->sd.headptr[0]].down = t[delptr].next;
+                                       else if ((ptrstack[1] != 0)
+                                       && (s->sd.list[ptrstack[1]].next != 0)
+                                       && (s->sd.list[s->sd.list[ptrstack[1]].next].val == t[delptr].val))
+                                               s->sd.list[s->sd.list[ptrstack[1]].next].down = t[delptr].next;
+                               }
+                               else {
+                                       // non-duplicates case
+                                       skipdir_delete<T>(&(s->sd), ptrstack, t[delptr].val);
+                               }
+                       }
+                       // delete from list
+                       //fprintf(stderr,"DELETED %u\n", t[delptr].val);
+                       t[s->circptr].next = t[delptr].next;
+                       quant_udaf_lfta3_cursor_free<T>(s, delptr);
+               }
+               else {
+                       s->circptr = t[s->circptr].next;
+               }
+       }
+}
+
+
+/****************************************************************/
+/* LFTA3 functions                                             */
+/****************************************************************/
+template <class T> void quant_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b) {
+       gs_uint32_t i;
+//printf("LFTA, sizeof(quant_udaf_lfta3_struct_t) is %lu\n",sizeof(quant_udaf_lfta3_struct_t<T>));
+       quant_udaf_lfta3_struct_t<T> *s = (quant_udaf_lfta3_struct_t<T> *)b;
+       s->nelts = 0;
+       s->usedptr = 0;         // NULL ptr
+       s->circptr = 0;
+       // initialize cursor stack
+       s->freeptr = 1;
+       s->size = 0;
+       for (i=1; i < QUANT_LFTA3_SIZE; i++)
+               s->t[i].next = i+1;
+       s->t[QUANT_LFTA3_SIZE].next = 0;
+       skipdir_create<T>(&(s->sd));
+
+//printf("sizeof(quant_udaf_lfta3_struct_t)=%lu\n",sizeof(quant_udaf_lfta3_struct_t<T>));
+}
+
+template <class T> void quant_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, T v)
+{
+       quant_udaf_lfta3_struct_t<T> *s = (quant_udaf_lfta3_struct_t<T> *)b;
+       tuple_t<T> *t = s->t;
+       gs_uint32_t ptr = s->usedptr;
+       gs_uint32_t newptr, delptr;
+       gs_uint32_t obj;        // objective function
+       gs_uint32_t threshold;
+       gs_uint32_t ptrstack[SKIPDIR_HEIGHT_MAX+5];
+       gs_uint32_t debugptr;
+
+//printf("AGGR_UPDATE start\n");
+       s->nelts++;
+       //fprintf(stderr,"nelts = %u\n",s->nelts);
+       // left boundary case
+       if ((ptr == 0) || (v < t[ptr].val)) {
+               if (t[ptr].val == v) {
+                       t[ptr].gap++;
+//printf("AGGR_UPDATE END 1\n");
+                       return;
+               }
+//printf("allocating (1) for %u   ",v);
+               newptr = quant_udaf_lfta3_cursor_alloc<T>(s);
+               if (newptr == 0) {
+                       gslog(LOG_ALERT, "Out of space in quant_udaf_lfta3_LFTA_AGGR_UPDATE_ (1).\n");
+                       return;
+               }
+               t[newptr].val = v;
+               t[newptr].gap = 1;
+               t[newptr].del = 0;
+               t[newptr].next = s->usedptr;
+               s->usedptr = newptr;
+//printf("AGGR_UPDATE END 2\n");
+               return;
+       }
+
+       // locate $i$ such that (v_i-1 < v <= v_i)
+       skipdir_search<T>(&(s->sd), v, ptrstack);
+
+       //ptr = (ptrstack[0] == 0) ? s->usedptr : s->sd.list[ptrstack[0]].down;
+       ptr = (ptrstack[0] == 0) ? s->usedptr : ptrstack[0];
+       while ((t[ptr].next != 0) && (t[t[ptr].next].val < v))
+               ptr = t[ptr].next;
+
+/*
+       // duplicate value
+       if ((t[ptr].next != 0) && (t[t[ptr].next].val == v)) {
+               t[t[ptr].next].gap++;
+printf("AGGR_UPDATE END 3\n");
+               return;
+       }
+*/
+
+       // right boundary case
+       if (t[ptr].next == 0) {
+//printf("allocating (2) for %u   ",v);
+               newptr = quant_udaf_lfta3_cursor_alloc<T>(s);
+               if (newptr == 0) {
+                       gslog(LOG_ALERT, "Out of space in quant_udaf_lfta3_LFTA_AGGR_UPDATE_ (2).\n");
+                       return;
+               }
+               t[newptr].val = v;
+               t[newptr].gap = 1;
+               t[newptr].del = 0;
+               t[newptr].next = 0;
+               t[ptr].next = newptr;
+//printf("AGGR_UPDATE END 4\n");
+               return;
+       }
+
+       // non-boundary case
+//printf("1) t[ptr].next =%d, ptr=%d\n",t[ptr].next,ptr);
+       obj = t[ptr].gap+t[t[ptr].next].gap+t[t[ptr].next].del;
+       threshold = (gs_uint32_t)ceil(2.0 * QUANT_EPS * (gs_float_t)s->nelts);
+       if (obj > threshold) {
+//printf("allocating (3) for %u   ",v);
+               newptr = quant_udaf_lfta3_cursor_alloc<T>(s);
+               if (newptr == 0) {
+                       gslog(LOG_ALERT, "Out of space in quant_udaf_lfta3_LFTA_AGGR_UPDATE_ (3).\n");
+                       return;
+               }
+//printf("newptr=%d\n",newptr);
+               t[newptr].val = v;
+               t[newptr].gap = 1;
+               t[newptr].del = t[t[ptr].next].gap+t[t[ptr].next].del - 1;
+               t[newptr].next = t[ptr].next;
+               t[ptr].next = newptr;
+               skipdir_insert<T>(&(s->sd), ptrstack, newptr, v);
+       }
+       else {
+               // insert into existing bucket
+//printf("t[ptr].next =%d\n",t[ptr].next);
+               t[t[ptr].next].gap++;
+       }
+       quant_lfta3_compress<T>(s);
+//printf("AGGR_UPDATE END 5\n");
+}
+
+template <class T> gs_int32_t quant_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b) {
+       quant_udaf_lfta3_struct_t<T> *s = (quant_udaf_lfta3_struct_t<T> *)b;
+
+
+       if (s->freeptr == 0)
+               return 1;
+       else
+               return 0;
+}
+
+template <class T> void quant_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b)
+{
+#ifdef COMPRESSED_XFER
+       quant_udaf_lfta3_struct_t<T> *s = (quant_udaf_lfta3_struct_t<T> *)b;
+       tuple_t<T> tmp[QUANT_LFTA3_SIZE+1];
+       gs_uint32_t ptr=s->usedptr;
+       gs_int32_t i=0,j;
+
+       for (; ptr != 0; ptr=s->t[ptr].next) {
+               tmp[i].val = s->t[ptr].val;
+               tmp[i].gap = s->t[ptr].gap;
+               tmp[i].del = s->t[ptr].del;
+               i++;
+       }
+       for (j=1; j <= i; j++) {
+               s->t[j].val = tmp[j-1].val;
+               s->t[j].gap = tmp[j-1].gap;
+               s->t[j].del = tmp[j-1].del;
+               s->t[j].next = j+1;
+       }
+       s->t[i].next = 0;
+       s->usedptr = 1;
+
+//     r->length = (5 + 4*(i+1))*sizeof(gs_uint32_t);
+       r->length = 5*sizeof(gs_uint32_t) + (i+1)*sizeof(tuple_t<T>);
+#endif
+#ifndef COMPRESSED_XFER
+       r->length = sizeof(quant_udaf_lfta3_struct_t<T>);
+#endif
+//printf("OUTPUT, size is %d\n",r->length);
+       r->data = b;
+}
+
+template <class T> void quant_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b)
+{
+       return;
+}
+
+
+// -----------------------------------------------------------------
+//             Instantiations
+
+//     unsigned int
+void quant_ui_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_INIT_<gs_uint32_t>(b);
+}
+void quant_ui_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint32_t v){
+       quant_udaf_lfta3_LFTA_AGGR_UPDATE_<gs_uint32_t>(b, v);
+}
+gs_int32_t quant_ui_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b){
+       return quant_udaf_lfta3_LFTA_AGGR_FLUSHME_<gs_uint32_t>(b);
+}
+void quant_ui_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_OUTPUT_<gs_uint32_t>(r, b);
+}
+void quant_ui_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_DESTROY_<gs_uint32_t>(b);
+}
+
+//     int
+void quant_i_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_INIT_<gs_int32_t>(b);
+}
+void quant_i_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, gs_int32_t v){
+       quant_udaf_lfta3_LFTA_AGGR_UPDATE_<gs_int32_t>(b, v);
+}
+gs_int32_t quant_i_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b){
+       return quant_udaf_lfta3_LFTA_AGGR_FLUSHME_<gs_int32_t>(b);
+}
+void quant_i_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_OUTPUT_<gs_int32_t>(r, b);
+}
+void quant_i_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_DESTROY_<gs_int32_t>(b);
+}
+
+// unsigned long
+void quant_ul_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_INIT_<gs_uint64_t>(b);
+}
+void quant_ul_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, gs_uint64_t v){
+       quant_udaf_lfta3_LFTA_AGGR_UPDATE_<gs_uint64_t>(b, v);
+}
+gs_int32_t quant_ul_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b){
+       return quant_udaf_lfta3_LFTA_AGGR_FLUSHME_<gs_uint64_t>(b);
+}
+void quant_ul_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_OUTPUT_<gs_uint64_t>(r, b);
+}
+void quant_ul_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_DESTROY_<gs_uint64_t>(b);
+}
+
+
+// long
+void quant_l_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_INIT_<gs_int64_t>(b);
+}
+void quant_l_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, gs_int64_t v){
+       quant_udaf_lfta3_LFTA_AGGR_UPDATE_<gs_int64_t>(b, v);
+}
+gs_int32_t quant_l_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b){
+       return quant_udaf_lfta3_LFTA_AGGR_FLUSHME_<gs_int64_t>(b);
+}
+void quant_l_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_OUTPUT_<gs_int64_t>(r, b);
+}
+void quant_l_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_DESTROY_<gs_int64_t>(b);
+}
+
+
+// float
+void quant_f_udaf_lfta3_LFTA_AGGR_INIT_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_INIT_<gs_float_t>(b);
+}
+void quant_f_udaf_lfta3_LFTA_AGGR_UPDATE_(gs_sp_t b, gs_float_t v){
+       quant_udaf_lfta3_LFTA_AGGR_UPDATE_<gs_float_t>(b, v);
+}
+gs_int32_t quant_f_udaf_lfta3_LFTA_AGGR_FLUSHME_(gs_sp_t b){
+       return quant_udaf_lfta3_LFTA_AGGR_FLUSHME_<gs_float_t>(b);
+}
+void quant_f_udaf_lfta3_LFTA_AGGR_OUTPUT_(struct gs_string *r, gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_OUTPUT_<gs_float_t>(r, b);
+}
+void quant_f_udaf_lfta3_LFTA_AGGR_DESTROY_(gs_sp_t b){
+       quant_udaf_lfta3_LFTA_AGGR_DESTROY_<gs_float_t>(b);
+}
+
+