- ///////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////
// Matching predicates
///////////////////////////////////////////////////////////
PRED [LFTA_LEGAL, COST HIGH]str_exists_substr[string, string];
///////////////////////////////////////////////////////////
-// Truncating strings
+// Truncating strings and substrings
///////////////////////////////////////////////////////////
+// str_truncate(str, n) : return the 1st n chars of str, or as much as possible
string FUN [LFTA_LEGAL,PARTIAL,COST FREE] str_truncate (string, uint);
+// str_suffix(str, n) : return the last n chars of str, or as much as possible
+ string FUN [LFTA_LEGAL,PARTIAL,COST FREE] str_suffix (string, uint);
+// get_list_entry(list_str, sep_str, i) : split the list by sep,
+// return the i-th value, if it exists.
+ string FUN [LFTA_LEGAL,PARTIAL,COST FREE] get_list_entry (string, string, uint CONST);
///////////////////////////////////////////////////////////
llong FUN [LFTA_LEGAL,COST FREE]LLONG(llong);
llong FUN [LFTA_LEGAL,COST FREE]LLONG(float);
+ llong FUN [LFTA_LEGAL,COST LOW] to_llong(string);
+
///////////////////////////////////////////////////////////
// Numeric functions
///////////////////////////////////////////////////////////
IP FUN [LFTA_LEGAL,COST LOW] strtoip_c(string HANDLE);
+/////////////////////////////////////////////////////////////
+// Add up uints in a delimited ascii list.
+// return 135 on '34|45|56' and sep '|'
+
+ llong FUN [LFTA_LEGAL,COST LOW] sum_uint_in_list(string, string CONST);
+
+
///////////////////////////////////////////////////////////
// Compute running sum
///////////////////////////////////////////////////////////
// Flips approximate quantile
///////////////////////////////////////////////////////////
- uint EXTR quantile_of quant_udaf_hfta0 extr_quant_hfta0_fcn (uint, float);
- uint FUN [COST LOW] extr_quant_hfta0_fcn(string, float);
- uint EXTR median_of quant_udaf_hfta0 extr_med_hfta0_fcn (uint);
- uint FUN [COST LOW] extr_med_hfta0_fcn(string);
- uint EXTR qspace_of quant_udaf_hfta0 extr_quant_hfta0_space (uint);
- uint FUN [COST LOW] extr_quant_hfta0_space(string);
- string UDAF [SUBAGGR quant_udaf_lfta3, SUPERAGGR quant_udaf_hfta3] quant_udaf_hfta0 fstring24 (uint);
- string UDAF quant_udaf_hfta3 fstring24 (string);
- string UDAF quant_udaf_lfta3 fstring6600 (uint);
+// uint
+ uint EXTR quantile_of quant_ui_udaf_hftaZ extr_quant_ui_hftaZ_fcn (uint, float);
+ uint FUN [COST LOW] extr_quant_ui_hftaZ_fcn(string, float);
+ uint EXTR median_of quant_ui_udaf_hftaZ extr_med_ui_hftaZ_fcn (uint);
+ uint FUN [COST LOW] extr_med_ui_hftaZ_fcn(string);
+
+ string UDAF [SUBAGGR quant_ui_udaf_lfta3, SUPERAGGR quant_ui_udaf_hfta3] quant_ui_udaf_hftaZ fstring32 (uint);
+ string UDAF quant_ui_udaf_hfta3 fstring32 (string);
+ string UDAF quant_ui_udaf_lfta3 fstring2088 (uint);
+// int
+ int EXTR quantile_of quant_i_udaf_hftaZ extr_quant_i_hftaZ_fcn (int, float);
+ int FUN [COST LOW] extr_quant_i_hftaZ_fcn(string, float);
+ int EXTR median_of quant_i_udaf_hftaZ extr_med_i_hftaZ_fcn (int);
+ int FUN [COST LOW] extr_med_i_hftaZ_fcn(string);
+
+ string UDAF [SUBAGGR quant_i_udaf_lfta3, SUPERAGGR quant_i_udaf_hfta3] quant_i_udaf_hftaZ fstring32 (int);
+ string UDAF quant_i_udaf_hfta3 fstring32 (string);
+ string UDAF quant_i_udaf_lfta3 fstring2088 (int);
+// ullong
+ ullong EXTR quantile_of quant_ul_udaf_hftaZ extr_quant_ul_hftaZ_fcn (ullong, float);
+ ullong FUN [COST LOW] extr_quant_ul_hftaZ_fcn(string, float);
+ ullong EXTR median_of quant_ul_udaf_hftaZ extr_med_ul_hftaZ_fcn (ullong);
+ ullong FUN [COST LOW] extr_med_ul_hftaZ_fcn(string);
+
+ string UDAF [SUBAGGR quant_ul_udaf_lfta3, SUPERAGGR quant_ul_udaf_hfta3] quant_ul_udaf_hftaZ fstring32 (ullong);
+ string UDAF quant_ul_udaf_hfta3 fstring32 (string);
+ string UDAF quant_ul_udaf_lfta3 fstring2904 (ullong);
+// llong
+ llong EXTR quantile_of quant_l_udaf_hftaZ extr_quant_l_hftaZ_fcn (llong, float);
+ llong FUN [COST LOW] extr_quant_l_hftaZ_fcn(string, float);
+ llong EXTR median_of quant_l_udaf_hftaZ extr_med_l_hftaZ_fcn (llong);
+ llong FUN [COST LOW] extr_med_l_hftaZ_fcn(string);
+
+ string UDAF [SUBAGGR quant_l_udaf_lfta3, SUPERAGGR quant_l_udaf_hfta3] quant_l_udaf_hftaZ fstring32 (llong);
+ string UDAF quant_l_udaf_hfta3 fstring32 (string);
+ string UDAF quant_l_udaf_lfta3 fstring2904 (llong);
+// float
+ float EXTR quantile_of quant_f_udaf_hftaZ extr_quant_f_hftaZ_fcn (float, float);
+ float FUN [COST LOW] extr_quant_f_hftaZ_fcn(string, float);
+ float EXTR median_of quant_f_udaf_hftaZ extr_med_f_hftaZ_fcn (float);
+ float FUN [COST LOW] extr_med_f_hftaZ_fcn(string);
+
+ string UDAF [SUBAGGR quant_f_udaf_lfta3, SUPERAGGR quant_f_udaf_hfta3] quant_f_udaf_hftaZ fstring32 (float);
+ string UDAF quant_f_udaf_hfta3 fstring32 (string);
+ string UDAF quant_f_udaf_lfta3 fstring2904 (float);
///////////////////////////////////////////////////////////
// FIRST and LAST aggregation funciton
string UDAF [HFTA_ONLY] CAT_aggr fstring8 (string, string);
+////////////////////////////////////////////////////////////////
+// string aggregation via catenation, only collect strings different than the previous
+//////////////////////////////////////////////////////
+
+ string UDAF [HFTA_ONLY,RUNNING] CAT_aggr_diff fstring8 (string);
+
///////////////////////////////////////////////////////////
// integer array aggregation function
// We are going to store 4 values in LFTA in fixed size buffer
// to_hex_string : allow printing of binary strings
string FUN [HFTA_ONLY, COST EXPENSIVE] to_hex_string(string);
+
+// -------------------------------------------------------------------
+// approx_count_distinct : approximate the count distinct by
+// taking minhashes. accurate to 7%, tested up to 100M distinct
+// items.
+//
+// TODO : allow running aggregates to be used in regular aggregation?
+
+ float EXTR approx_count_distinct approx_count_distinct_udaf extr_approx_count_distinct(string);
+ string UDAF approx_count_distinct_udaf fstring1000 (string);
+ float FUN extr_approx_count_distinct(string);
+
+ float EXTR running_approx_count_distinct running_approx_count_distinct_udaf extr_approx_count_distinct(string);
+ string UDAF [RUNNING] running_approx_count_distinct_udaf fstring1000 (string);
+ float FUN extr_approx_count_distinct(string);