From 92a6faeff2b67350b7177951c16367f4ed920b7f Mon Sep 17 00:00:00 2001 From: Kyle Brandt Date: Fri, 31 Oct 2025 10:45:56 -0400 Subject: [PATCH] SQL Expressions: Add Functions to Allow list (#113291) Add assorted math, windowing, json, date/time, regex, string, conditional, and aggregation functions to allow list --- pkg/expr/sql/parser_allow.go | 54 +++++++++++++++++++++++------- pkg/expr/sql/parser_allow_test.go | 55 +++++++++++++++++++++++++++++-- 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/pkg/expr/sql/parser_allow.go b/pkg/expr/sql/parser_allow.go index 6f8cfc1d063..4bac508b1a4 100644 --- a/pkg/expr/sql/parser_allow.go +++ b/pkg/expr/sql/parser_allow.go @@ -183,19 +183,25 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { // Conditional functions case "if", "coalesce", "ifnull", "nullif": return + case "least": + return // Aggregation functions case "sum", "avg", "count", "min", "max": return - case "stddev", "std", "stddev_pop": + case "stddev", "std", "stddev_pop", "stddev_sample": return - case "variance", "var_pop": + case "variance", "var_pop", "var_samp": return case "group_concat": return - case "row_number", "rank", "dense_rank", "lead", "lag": + + // Window Functions + case "row_number", "rank", "dense_rank", "percent_rank": return - case "first_value", "last_value": + case "first_value", "last_value", "ntile": + return + case "lead", "lag": return // Mathematical functions @@ -205,14 +211,16 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "sqrt", "pow", "power": return - case "mod", "log", "log10", "exp": + case "mod", "log", "log2", "log10", "exp": return case "sign", "ln", "truncate": return - case "sin", "cos", "tan": + case "sin", "cos", "tan", "cot": return case "asin", "acos", "atan", "atan2": return + case "conv", "degrees", "radians": + return case "rand", "pi": return @@ -235,23 +243,29 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "ascii", "ord", "char": return - case "regexp_substr": + case "elt", "quote": + return + case "from_base64", "format": + return + case "regexp_substr", "regexp_replace", "regexp_instr", "regexp_like": return // Date functions case "str_to_date": return - case "date_format": + case "date_format", "get_format": return - case "date_add", "date_sub": + case "date_add", "adddate", "date_sub", "subdate": return - case "year", "month", "day", "weekday": + case "year", "month", "day", "weekday", "last_day": + return + case "yearweek", "weekofyear": return case "datediff": return case "unix_timestamp", "from_unixtime": return - case "extract", "hour", "minute", "second": + case "extract", "hour", "minute", "second", "microsecond": return case "dayname", "monthname", "dayofweek", "dayofmonth", "dayofyear": return @@ -259,20 +273,36 @@ func allowedFunction(f *sqlparser.FuncExpr) (b bool) { return case "timestampdiff", "timestampadd": return + case "from_days", "to_days": + return + case "time_format", "time", "timediff": + return // Type conversion case "cast", "convert": return // JSON functions - case "json_extract", "json_object", "json_array", "json_merge_patch", "json_valid": + case "json_extract", "json_object", "json_array", "json_valid": + return + case "json_merge", "json_merge_patch", "json_merge_preserve": return case "json_contains", "json_length", "json_type", "json_keys": return + case "json_contains_path", "json_depth": + return case "json_search", "json_quote", "json_unquote": return case "json_set", "json_insert", "json_replace", "json_remove": return + case "json_array_append", "json_array_insert": + return + case "json_objectagg", "json_arrayagg": + return + case "json_overlaps": + return + case "json_pretty", "json_value": + return default: return false diff --git a/pkg/expr/sql/parser_allow_test.go b/pkg/expr/sql/parser_allow_test.go index 9a36b3ad8e8..ef843fdc8fe 100644 --- a/pkg/expr/sql/parser_allow_test.go +++ b/pkg/expr/sql/parser_allow_test.go @@ -92,6 +92,16 @@ func TestAllowQuery(t *testing.T) { q: "SELECT * FROM mockGitHubIssuesDSResponse, JSON_TABLE(labels, '$[*]' COLUMNS(val VARCHAR(255) PATH '$')) AS jt WHERE CAST(jt.val AS CHAR) LIKE 'type%'", err: nil, }, + { + name: "json aggregation", + q: `SELECT JSON_ARRAYAGG(JSON_OBJECT('color', color, 'value', value)) AS result + FROM ( + SELECT 'red' AS color, 10 AS value UNION ALL + SELECT 'blue', 20 UNION ALL + SELECT 'green', 30 + ) AS t;`, + err: nil, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { @@ -215,9 +225,11 @@ SELECT STDDEV(value) AS agg_stddev, STD(value) AS agg_std, STDDEV_POP(value) AS agg_stddev_pop, + STDDEV_SAMPLE(value) AS agg_stddev_sample, VARIANCE(value) AS agg_variance, VAR_POP(value) AS agg_var_pop, - + VAR_SAMP(value) AS agg_var_samp, + -- Mathematical functions ABS(value) AS math_abs, ROUND(value, 2) AS math_round, @@ -229,6 +241,7 @@ SELECT POWER(value, 2) AS math_power, MOD(value, 10) AS math_mod, LOG(value) AS math_log, + LOG2(value) AS math_log2, LOG10(value) AS math_log10, EXP(value) AS math_exp, SIGN(value) AS math_sign, @@ -245,13 +258,25 @@ SELECT -- Date functions STR_TO_DATE('2023-01-01', '%Y-%m-%d') AS date_str_to_date, DATE_FORMAT('2025-01-01 00:00:00', '%Y-%m-%d') AS date_format, + DATE_FORMAT('2003-10-03',GET_FORMAT(DATE,'EUR')) AS date_format_eur, '2025-01-01 00:00:00' AS date_now, DATE_ADD(created_at, INTERVAL 1 DAY) AS date_add, DATE_SUB(created_at, INTERVAL 1 DAY) AS date_sub, + ADDDATE(created_at, INTERVAL 1 DAY) AS date_adddate, + SUBDATE(created_at, INTERVAL 1 DAY) AS date_subdate, YEAR(created_at) AS date_year, MONTH(created_at) AS date_month, DAY(created_at) AS date_day, + MICROSECOND(created_at) AS date_microsecond, + FROM_DAYS(738123) AS date_from_days, + TO_DAYS(created_at) AS date_to_days, + TIME(created_at) AS date_time, + TIME_FORMAT(created_at, '%H:%i:%s') AS date_time_format, + TIMEDIFF(created_at, '2025-01-01 00:00:00') AS date_timediff, WEEKDAY(created_at) AS date_weekday, + LAST_DAY(created_at) AS date_last_day, + YEARWEEK(created_at) AS date_yearweek, + WEEKOFYEAR(created_at) AS date_weekofyear, DATEDIFF('2025-01-01 00:00:00', created_at) AS date_datediff, UNIX_TIMESTAMP(created_at) AS date_unix_timestamp, FROM_UNIXTIME(1634567890) AS date_from_unixtime, @@ -266,6 +291,8 @@ LIMIT 10` var example_json_functions = `SELECT JSON_OBJECT('key1', 'value1', 'key2', 10) AS json_obj, JSON_ARRAY(1, 'abc', NULL, TRUE) AS json_arr, + JSON_ARRAY_APPEND('{"a": 1}', '$.b', 2) AS json_array_append, + JSON_ARRAY_INSERT('{"a": 1}', '$.b', 2) AS json_array_insert, JSON_EXTRACT('{"id": 123, "name": "test"}', '$.id') AS json_ext, JSON_UNQUOTE(JSON_EXTRACT('{"name": "test"}', '$.name')) AS json_unq, JSON_CONTAINS('{"a": 1, "b": 2}', '{"a": 1}') AS json_contains, @@ -273,6 +300,13 @@ var example_json_functions = `SELECT JSON_REMOVE('{"a": 1, "b": 2}', '$.b') AS json_remove, JSON_LENGTH('{"a": 1, "b": {"c": 3}}') AS json_len, JSON_SEARCH('{"a": "xyz", "b": "abc"}', 'one', 'abc') AS json_search, + JSON_MERGE('{"a": 1}', '{"b": 2}') AS json_merge, + JSON_MERGE_PRESERVE('{"a": 1}', '{"b": 2}') AS json_merge_preserve, + JSON_CONTAINS_PATH('{"a": 1, "b": 2}', 'one', '$.a') AS json_contains_path, + JSON_DEPTH('{"a": 1, "b": {"c": 2}}') AS json_depth, + JSON_OVERLAPS('{"a": 1, "b": 2}', '{"b": 2, "c": 3}') AS json_overlaps, + JSON_PRETTY('{"a": 1, "b": 2}') AS json_pretty, + JSON_VALUE('{"a": 1, "b": 2}', '$.a') AS json_value, JSON_TYPE('{"a": 1}') AS json_type` var example_many_more_allowed_functions = ` @@ -280,13 +314,17 @@ SELECT -- Math functions LN(10) as ln_val, TRUNCATE(12.345, 2) as truncate_val, + CONV('a',16,2) as conv_val, SIN(0.5) as sin_val, COS(0.5) as cos_val, + COT(0.5) as cot_val, TAN(0.5) as tan_val, ASIN(0.5) as asin_val, ACOS(0.5) as acos_val, ATAN(0.5) as atan_val, ATAN2(1, 2) as atan2_val, + DEGREES(0.5) as degrees_val, + RADIANS(0.5) as radians_val, RAND() as rand_val, PI() as pi_val, @@ -307,8 +345,19 @@ SELECT ASCII('A') as ascii_val, ORD('A') as ord_val, CHAR(65) as char_val, + ELT(2, 'one', 'two', 'three') as elt_val, + FROM_BASE64('SGVsbG8sIFdvcmxkIQ==') as from_base64_val, + FORMAT(12332.123456, 4) as format_val, + QUOTE('hello') as quote_val, + + -- Regex + 'a' REGEXP '^[a-d]' AS regexp_val, REGEXP_SUBSTR('hello world', 'world') as regexp_substr_val, - + REGEXP_REPLACE('hello world', 'world', 'gopher') as regexp_replace_val, + REGEXP_INSTR('dog cat dog', 'dog') as regexp_instr_val, + REGEXP_LIKE('Michael!', '.*') as regexp_like_val, + + -- Date functions EXTRACT(YEAR FROM '2023-01-01') as extract_val, HOUR('12:34:56') as hour_val, @@ -350,8 +399,10 @@ SELECT ROW_NUMBER() OVER (ORDER BY val) as row_num, RANK() OVER (ORDER BY val) as rank_val, DENSE_RANK() OVER (ORDER BY val) as dense_rank_val, + PERCENT_RANK() OVER (ORDER BY val) as percent_rank_val, LEAD(val) OVER (ORDER BY val) as lead_val, LAG(val) OVER (ORDER BY val) as lag_val, FIRST_VALUE(val) OVER (ORDER BY val) as first_val, + NTILE(2) OVER (ORDER BY val) as ntile_val, LAST_VALUE(val) OVER (ORDER BY val ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as last_val FROM dummy_data;`