sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23) 24from sqlglot.generator import Generator 25from sqlglot.helper import is_int, seq_get 26from sqlglot.tokens import Token, TokenType 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 tz = expression.args.get("zone") 111 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 112 ts = expression.this 113 if tz: 114 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 115 datatype = exp.DataType.build( 116 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 117 expressions=[exp.DataTypeParam(this=tz)], 118 ) 119 120 if isinstance(ts, exp.Literal): 121 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 122 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 123 ts_without_tz = ( 124 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 125 ) 126 ts = exp.Literal.string(ts_without_tz) 127 128 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 129 130 131class ClickHouse(Dialect): 132 NORMALIZE_FUNCTIONS: bool | str = False 133 NULL_ORDERING = "nulls_are_last" 134 SUPPORTS_USER_DEFINED_TYPES = False 135 SAFE_DIVISION = True 136 LOG_BASE_FIRST: t.Optional[bool] = None 137 FORCE_EARLY_ALIAS_REF_EXPANSION = True 138 139 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 140 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 141 142 UNESCAPED_SEQUENCES = { 143 "\\0": "\0", 144 } 145 146 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 147 148 class Tokenizer(tokens.Tokenizer): 149 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 150 IDENTIFIERS = ['"', "`"] 151 STRING_ESCAPES = ["'", "\\"] 152 BIT_STRINGS = [("0b", "")] 153 HEX_STRINGS = [("0x", ""), ("0X", "")] 154 HEREDOC_STRINGS = ["$"] 155 156 KEYWORDS = { 157 **tokens.Tokenizer.KEYWORDS, 158 "ATTACH": TokenType.COMMAND, 159 "DATE32": TokenType.DATE32, 160 "DATETIME64": TokenType.DATETIME64, 161 "DICTIONARY": TokenType.DICTIONARY, 162 "ENUM8": TokenType.ENUM8, 163 "ENUM16": TokenType.ENUM16, 164 "FINAL": TokenType.FINAL, 165 "FIXEDSTRING": TokenType.FIXEDSTRING, 166 "FLOAT32": TokenType.FLOAT, 167 "FLOAT64": TokenType.DOUBLE, 168 "GLOBAL": TokenType.GLOBAL, 169 "INT256": TokenType.INT256, 170 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 171 "MAP": TokenType.MAP, 172 "NESTED": TokenType.NESTED, 173 "SAMPLE": TokenType.TABLE_SAMPLE, 174 "TUPLE": TokenType.STRUCT, 175 "UINT128": TokenType.UINT128, 176 "UINT16": TokenType.USMALLINT, 177 "UINT256": TokenType.UINT256, 178 "UINT32": TokenType.UINT, 179 "UINT64": TokenType.UBIGINT, 180 "UINT8": TokenType.UTINYINT, 181 "IPV4": TokenType.IPV4, 182 "IPV6": TokenType.IPV6, 183 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 184 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 185 "SYSTEM": TokenType.COMMAND, 186 "PREWHERE": TokenType.PREWHERE, 187 } 188 KEYWORDS.pop("/*+") 189 190 SINGLE_TOKENS = { 191 **tokens.Tokenizer.SINGLE_TOKENS, 192 "$": TokenType.HEREDOC_STRING, 193 } 194 195 class Parser(parser.Parser): 196 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 197 # * select x from t1 union all select x from t2 limit 1; 198 # * select x from t1 union all (select x from t2 limit 1); 199 MODIFIERS_ATTACHED_TO_SET_OP = False 200 INTERVAL_SPANS = False 201 202 FUNCTIONS = { 203 **parser.Parser.FUNCTIONS, 204 "ANY": exp.AnyValue.from_arg_list, 205 "ARRAYSUM": exp.ArraySum.from_arg_list, 206 "COUNTIF": _build_count_if, 207 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 208 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 209 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 210 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 211 "DATE_FORMAT": _build_date_format, 212 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 213 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 214 "FORMATDATETIME": _build_date_format, 215 "JSONEXTRACTSTRING": build_json_extract_path( 216 exp.JSONExtractScalar, zero_based_indexing=False 217 ), 218 "MAP": parser.build_var_map, 219 "MATCH": exp.RegexpLike.from_arg_list, 220 "RANDCANONICAL": exp.Rand.from_arg_list, 221 "STR_TO_DATE": _build_str_to_date, 222 "TUPLE": exp.Struct.from_arg_list, 223 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 224 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 225 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 226 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 227 "UNIQ": exp.ApproxDistinct.from_arg_list, 228 "XOR": lambda args: exp.Xor(expressions=args), 229 "MD5": exp.MD5Digest.from_arg_list, 230 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 231 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 232 } 233 234 AGG_FUNCTIONS = { 235 "count", 236 "min", 237 "max", 238 "sum", 239 "avg", 240 "any", 241 "stddevPop", 242 "stddevSamp", 243 "varPop", 244 "varSamp", 245 "corr", 246 "covarPop", 247 "covarSamp", 248 "entropy", 249 "exponentialMovingAverage", 250 "intervalLengthSum", 251 "kolmogorovSmirnovTest", 252 "mannWhitneyUTest", 253 "median", 254 "rankCorr", 255 "sumKahan", 256 "studentTTest", 257 "welchTTest", 258 "anyHeavy", 259 "anyLast", 260 "boundingRatio", 261 "first_value", 262 "last_value", 263 "argMin", 264 "argMax", 265 "avgWeighted", 266 "topK", 267 "topKWeighted", 268 "deltaSum", 269 "deltaSumTimestamp", 270 "groupArray", 271 "groupArrayLast", 272 "groupUniqArray", 273 "groupArrayInsertAt", 274 "groupArrayMovingAvg", 275 "groupArrayMovingSum", 276 "groupArraySample", 277 "groupBitAnd", 278 "groupBitOr", 279 "groupBitXor", 280 "groupBitmap", 281 "groupBitmapAnd", 282 "groupBitmapOr", 283 "groupBitmapXor", 284 "sumWithOverflow", 285 "sumMap", 286 "minMap", 287 "maxMap", 288 "skewSamp", 289 "skewPop", 290 "kurtSamp", 291 "kurtPop", 292 "uniq", 293 "uniqExact", 294 "uniqCombined", 295 "uniqCombined64", 296 "uniqHLL12", 297 "uniqTheta", 298 "quantile", 299 "quantiles", 300 "quantileExact", 301 "quantilesExact", 302 "quantileExactLow", 303 "quantilesExactLow", 304 "quantileExactHigh", 305 "quantilesExactHigh", 306 "quantileExactWeighted", 307 "quantilesExactWeighted", 308 "quantileTiming", 309 "quantilesTiming", 310 "quantileTimingWeighted", 311 "quantilesTimingWeighted", 312 "quantileDeterministic", 313 "quantilesDeterministic", 314 "quantileTDigest", 315 "quantilesTDigest", 316 "quantileTDigestWeighted", 317 "quantilesTDigestWeighted", 318 "quantileBFloat16", 319 "quantilesBFloat16", 320 "quantileBFloat16Weighted", 321 "quantilesBFloat16Weighted", 322 "simpleLinearRegression", 323 "stochasticLinearRegression", 324 "stochasticLogisticRegression", 325 "categoricalInformationValue", 326 "contingency", 327 "cramersV", 328 "cramersVBiasCorrected", 329 "theilsU", 330 "maxIntersections", 331 "maxIntersectionsPosition", 332 "meanZTest", 333 "quantileInterpolatedWeighted", 334 "quantilesInterpolatedWeighted", 335 "quantileGK", 336 "quantilesGK", 337 "sparkBar", 338 "sumCount", 339 "largestTriangleThreeBuckets", 340 "histogram", 341 "sequenceMatch", 342 "sequenceCount", 343 "windowFunnel", 344 "retention", 345 "uniqUpTo", 346 "sequenceNextNode", 347 "exponentialTimeDecayedAvg", 348 } 349 350 AGG_FUNCTIONS_SUFFIXES = [ 351 "If", 352 "Array", 353 "ArrayIf", 354 "Map", 355 "SimpleState", 356 "State", 357 "Merge", 358 "MergeState", 359 "ForEach", 360 "Distinct", 361 "OrDefault", 362 "OrNull", 363 "Resample", 364 "ArgMin", 365 "ArgMax", 366 ] 367 368 FUNC_TOKENS = { 369 *parser.Parser.FUNC_TOKENS, 370 TokenType.SET, 371 } 372 373 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 374 375 ID_VAR_TOKENS = { 376 *parser.Parser.ID_VAR_TOKENS, 377 TokenType.LIKE, 378 } 379 380 AGG_FUNC_MAPPING = ( 381 lambda functions, suffixes: { 382 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 383 } 384 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 385 386 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 387 388 FUNCTION_PARSERS = { 389 **parser.Parser.FUNCTION_PARSERS, 390 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 391 "QUANTILE": lambda self: self._parse_quantile(), 392 } 393 394 FUNCTION_PARSERS.pop("MATCH") 395 396 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 397 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 398 399 RANGE_PARSERS = { 400 **parser.Parser.RANGE_PARSERS, 401 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 402 and self._parse_in(this, is_global=True), 403 } 404 405 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 406 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 407 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 408 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 409 410 JOIN_KINDS = { 411 *parser.Parser.JOIN_KINDS, 412 TokenType.ANY, 413 TokenType.ASOF, 414 TokenType.ARRAY, 415 } 416 417 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 418 TokenType.ANY, 419 TokenType.ARRAY, 420 TokenType.FINAL, 421 TokenType.FORMAT, 422 TokenType.SETTINGS, 423 } 424 425 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 426 TokenType.FORMAT, 427 } 428 429 LOG_DEFAULTS_TO_LN = True 430 431 QUERY_MODIFIER_PARSERS = { 432 **parser.Parser.QUERY_MODIFIER_PARSERS, 433 TokenType.SETTINGS: lambda self: ( 434 "settings", 435 self._advance() or self._parse_csv(self._parse_assignment), 436 ), 437 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 438 } 439 440 CONSTRAINT_PARSERS = { 441 **parser.Parser.CONSTRAINT_PARSERS, 442 "INDEX": lambda self: self._parse_index_constraint(), 443 "CODEC": lambda self: self._parse_compress(), 444 } 445 446 ALTER_PARSERS = { 447 **parser.Parser.ALTER_PARSERS, 448 "REPLACE": lambda self: self._parse_alter_table_replace(), 449 } 450 451 SCHEMA_UNNAMED_CONSTRAINTS = { 452 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 453 "INDEX", 454 } 455 456 PLACEHOLDER_PARSERS = { 457 **parser.Parser.PLACEHOLDER_PARSERS, 458 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 459 } 460 461 def _parse_types( 462 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 463 ) -> t.Optional[exp.Expression]: 464 dtype = super()._parse_types( 465 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 466 ) 467 if isinstance(dtype, exp.DataType): 468 # Mark every type as non-nullable which is ClickHouse's default. This marker 469 # helps us transpile types from other dialects to ClickHouse, so that we can 470 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 471 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 472 # `Nullable` type constructor 473 dtype.set("nullable", False) 474 475 return dtype 476 477 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 478 index = self._index 479 this = self._parse_bitwise() 480 if self._match(TokenType.FROM): 481 self._retreat(index) 482 return super()._parse_extract() 483 484 # We return Anonymous here because extract and regexpExtract have different semantics, 485 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 486 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 487 # 488 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 489 self._match(TokenType.COMMA) 490 return self.expression( 491 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 492 ) 493 494 def _parse_assignment(self) -> t.Optional[exp.Expression]: 495 this = super()._parse_assignment() 496 497 if self._match(TokenType.PLACEHOLDER): 498 return self.expression( 499 exp.If, 500 this=this, 501 true=self._parse_assignment(), 502 false=self._match(TokenType.COLON) and self._parse_assignment(), 503 ) 504 505 return this 506 507 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 508 """ 509 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 510 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 511 """ 512 this = self._parse_id_var() 513 self._match(TokenType.COLON) 514 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 515 self._match_text_seq("IDENTIFIER") and "Identifier" 516 ) 517 518 if not kind: 519 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 520 elif not self._match(TokenType.R_BRACE): 521 self.raise_error("Expecting }") 522 523 return self.expression(exp.Placeholder, this=this, kind=kind) 524 525 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 526 this = super()._parse_in(this) 527 this.set("is_global", is_global) 528 return this 529 530 def _parse_table( 531 self, 532 schema: bool = False, 533 joins: bool = False, 534 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 535 parse_bracket: bool = False, 536 is_db_reference: bool = False, 537 parse_partition: bool = False, 538 ) -> t.Optional[exp.Expression]: 539 this = super()._parse_table( 540 schema=schema, 541 joins=joins, 542 alias_tokens=alias_tokens, 543 parse_bracket=parse_bracket, 544 is_db_reference=is_db_reference, 545 ) 546 547 if self._match(TokenType.FINAL): 548 this = self.expression(exp.Final, this=this) 549 550 return this 551 552 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 553 return super()._parse_position(haystack_first=True) 554 555 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 556 def _parse_cte(self) -> exp.CTE: 557 # WITH <identifier> AS <subquery expression> 558 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 559 560 if not cte: 561 # WITH <expression> AS <identifier> 562 cte = self.expression( 563 exp.CTE, 564 this=self._parse_assignment(), 565 alias=self._parse_table_alias(), 566 scalar=True, 567 ) 568 569 return cte 570 571 def _parse_join_parts( 572 self, 573 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 574 is_global = self._match(TokenType.GLOBAL) and self._prev 575 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 576 577 if kind_pre: 578 kind = self._match_set(self.JOIN_KINDS) and self._prev 579 side = self._match_set(self.JOIN_SIDES) and self._prev 580 return is_global, side, kind 581 582 return ( 583 is_global, 584 self._match_set(self.JOIN_SIDES) and self._prev, 585 self._match_set(self.JOIN_KINDS) and self._prev, 586 ) 587 588 def _parse_join( 589 self, skip_join_token: bool = False, parse_bracket: bool = False 590 ) -> t.Optional[exp.Join]: 591 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 592 if join: 593 join.set("global", join.args.pop("method", None)) 594 595 return join 596 597 def _parse_function( 598 self, 599 functions: t.Optional[t.Dict[str, t.Callable]] = None, 600 anonymous: bool = False, 601 optional_parens: bool = True, 602 any_token: bool = False, 603 ) -> t.Optional[exp.Expression]: 604 expr = super()._parse_function( 605 functions=functions, 606 anonymous=anonymous, 607 optional_parens=optional_parens, 608 any_token=any_token, 609 ) 610 611 func = expr.this if isinstance(expr, exp.Window) else expr 612 613 # Aggregate functions can be split in 2 parts: <func_name><suffix> 614 parts = ( 615 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 616 ) 617 618 if parts: 619 params = self._parse_func_params(func) 620 621 kwargs = { 622 "this": func.this, 623 "expressions": func.expressions, 624 } 625 if parts[1]: 626 kwargs["parts"] = parts 627 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 628 else: 629 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 630 631 kwargs["exp_class"] = exp_class 632 if params: 633 kwargs["params"] = params 634 635 func = self.expression(**kwargs) 636 637 if isinstance(expr, exp.Window): 638 # The window's func was parsed as Anonymous in base parser, fix its 639 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 640 expr.set("this", func) 641 elif params: 642 # Params have blocked super()._parse_function() from parsing the following window 643 # (if that exists) as they're standing between the function call and the window spec 644 expr = self._parse_window(func) 645 else: 646 expr = func 647 648 return expr 649 650 def _parse_func_params( 651 self, this: t.Optional[exp.Func] = None 652 ) -> t.Optional[t.List[exp.Expression]]: 653 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 654 return self._parse_csv(self._parse_lambda) 655 656 if self._match(TokenType.L_PAREN): 657 params = self._parse_csv(self._parse_lambda) 658 self._match_r_paren(this) 659 return params 660 661 return None 662 663 def _parse_quantile(self) -> exp.Quantile: 664 this = self._parse_lambda() 665 params = self._parse_func_params() 666 if params: 667 return self.expression(exp.Quantile, this=params[0], quantile=this) 668 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 669 670 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 671 return super()._parse_wrapped_id_vars(optional=True) 672 673 def _parse_primary_key( 674 self, wrapped_optional: bool = False, in_props: bool = False 675 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 676 return super()._parse_primary_key( 677 wrapped_optional=wrapped_optional or in_props, in_props=in_props 678 ) 679 680 def _parse_on_property(self) -> t.Optional[exp.Expression]: 681 index = self._index 682 if self._match_text_seq("CLUSTER"): 683 this = self._parse_id_var() 684 if this: 685 return self.expression(exp.OnCluster, this=this) 686 else: 687 self._retreat(index) 688 return None 689 690 def _parse_index_constraint( 691 self, kind: t.Optional[str] = None 692 ) -> exp.IndexColumnConstraint: 693 # INDEX name1 expr TYPE type1(args) GRANULARITY value 694 this = self._parse_id_var() 695 expression = self._parse_assignment() 696 697 index_type = self._match_text_seq("TYPE") and ( 698 self._parse_function() or self._parse_var() 699 ) 700 701 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 702 703 return self.expression( 704 exp.IndexColumnConstraint, 705 this=this, 706 expression=expression, 707 index_type=index_type, 708 granularity=granularity, 709 ) 710 711 def _parse_partition(self) -> t.Optional[exp.Partition]: 712 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 713 if not self._match(TokenType.PARTITION): 714 return None 715 716 if self._match_text_seq("ID"): 717 # Corresponds to the PARTITION ID <string_value> syntax 718 expressions: t.List[exp.Expression] = [ 719 self.expression(exp.PartitionId, this=self._parse_string()) 720 ] 721 else: 722 expressions = self._parse_expressions() 723 724 return self.expression(exp.Partition, expressions=expressions) 725 726 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 727 partition = self._parse_partition() 728 729 if not partition or not self._match(TokenType.FROM): 730 return None 731 732 return self.expression( 733 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 734 ) 735 736 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 737 if not self._match_text_seq("PROJECTION"): 738 return None 739 740 return self.expression( 741 exp.ProjectionDef, 742 this=self._parse_id_var(), 743 expression=self._parse_wrapped(self._parse_statement), 744 ) 745 746 def _parse_constraint(self) -> t.Optional[exp.Expression]: 747 return super()._parse_constraint() or self._parse_projection_def() 748 749 class Generator(generator.Generator): 750 QUERY_HINTS = False 751 STRUCT_DELIMITER = ("(", ")") 752 NVL2_SUPPORTED = False 753 TABLESAMPLE_REQUIRES_PARENS = False 754 TABLESAMPLE_SIZE_IS_ROWS = False 755 TABLESAMPLE_KEYWORDS = "SAMPLE" 756 LAST_DAY_SUPPORTS_DATE_PART = False 757 CAN_IMPLEMENT_ARRAY_ANY = True 758 SUPPORTS_TO_NUMBER = False 759 JOIN_HINTS = False 760 TABLE_HINTS = False 761 EXPLICIT_SET_OP = True 762 GROUPINGS_SEP = "" 763 SET_OP_MODIFIERS = False 764 SUPPORTS_TABLE_ALIAS_COLUMNS = False 765 VALUES_AS_TABLE = False 766 767 STRING_TYPE_MAPPING = { 768 exp.DataType.Type.CHAR: "String", 769 exp.DataType.Type.LONGBLOB: "String", 770 exp.DataType.Type.LONGTEXT: "String", 771 exp.DataType.Type.MEDIUMBLOB: "String", 772 exp.DataType.Type.MEDIUMTEXT: "String", 773 exp.DataType.Type.TINYBLOB: "String", 774 exp.DataType.Type.TINYTEXT: "String", 775 exp.DataType.Type.TEXT: "String", 776 exp.DataType.Type.VARBINARY: "String", 777 exp.DataType.Type.VARCHAR: "String", 778 } 779 780 SUPPORTED_JSON_PATH_PARTS = { 781 exp.JSONPathKey, 782 exp.JSONPathRoot, 783 exp.JSONPathSubscript, 784 } 785 786 TYPE_MAPPING = { 787 **generator.Generator.TYPE_MAPPING, 788 **STRING_TYPE_MAPPING, 789 exp.DataType.Type.ARRAY: "Array", 790 exp.DataType.Type.BIGINT: "Int64", 791 exp.DataType.Type.DATE32: "Date32", 792 exp.DataType.Type.DATETIME: "DateTime", 793 exp.DataType.Type.DATETIME64: "DateTime64", 794 exp.DataType.Type.TIMESTAMP: "DateTime", 795 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 796 exp.DataType.Type.DOUBLE: "Float64", 797 exp.DataType.Type.ENUM: "Enum", 798 exp.DataType.Type.ENUM8: "Enum8", 799 exp.DataType.Type.ENUM16: "Enum16", 800 exp.DataType.Type.FIXEDSTRING: "FixedString", 801 exp.DataType.Type.FLOAT: "Float32", 802 exp.DataType.Type.INT: "Int32", 803 exp.DataType.Type.MEDIUMINT: "Int32", 804 exp.DataType.Type.INT128: "Int128", 805 exp.DataType.Type.INT256: "Int256", 806 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 807 exp.DataType.Type.MAP: "Map", 808 exp.DataType.Type.NESTED: "Nested", 809 exp.DataType.Type.NULLABLE: "Nullable", 810 exp.DataType.Type.SMALLINT: "Int16", 811 exp.DataType.Type.STRUCT: "Tuple", 812 exp.DataType.Type.TINYINT: "Int8", 813 exp.DataType.Type.UBIGINT: "UInt64", 814 exp.DataType.Type.UINT: "UInt32", 815 exp.DataType.Type.UINT128: "UInt128", 816 exp.DataType.Type.UINT256: "UInt256", 817 exp.DataType.Type.USMALLINT: "UInt16", 818 exp.DataType.Type.UTINYINT: "UInt8", 819 exp.DataType.Type.IPV4: "IPv4", 820 exp.DataType.Type.IPV6: "IPv6", 821 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 822 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 823 } 824 825 TRANSFORMS = { 826 **generator.Generator.TRANSFORMS, 827 exp.AnyValue: rename_func("any"), 828 exp.ApproxDistinct: rename_func("uniq"), 829 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 830 exp.ArraySize: rename_func("LENGTH"), 831 exp.ArraySum: rename_func("arraySum"), 832 exp.ArgMax: arg_max_or_min_no_count("argMax"), 833 exp.ArgMin: arg_max_or_min_no_count("argMin"), 834 exp.Array: inline_array_sql, 835 exp.CastToStrType: rename_func("CAST"), 836 exp.CountIf: rename_func("countIf"), 837 exp.CompressColumnConstraint: lambda self, 838 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 839 exp.ComputedColumnConstraint: lambda self, 840 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 841 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 842 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 843 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 844 exp.DateStrToDate: rename_func("toDate"), 845 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 846 exp.Explode: rename_func("arrayJoin"), 847 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 848 exp.IsNan: rename_func("isNaN"), 849 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 850 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 851 exp.JSONPathKey: json_path_key_only_name, 852 exp.JSONPathRoot: lambda *_: "", 853 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 854 exp.Nullif: rename_func("nullIf"), 855 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 856 exp.Pivot: no_pivot_sql, 857 exp.Quantile: _quantile_sql, 858 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 859 exp.Rand: rename_func("randCanonical"), 860 exp.StartsWith: rename_func("startsWith"), 861 exp.StrPosition: lambda self, e: self.func( 862 "position", e.this, e.args.get("substr"), e.args.get("position") 863 ), 864 exp.TimeToStr: lambda self, e: self.func( 865 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 866 ), 867 exp.TimeStrToTime: _timestrtotime_sql, 868 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 869 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 870 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 871 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 872 exp.MD5Digest: rename_func("MD5"), 873 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 874 exp.SHA: rename_func("SHA1"), 875 exp.SHA2: sha256_sql, 876 exp.UnixToTime: _unix_to_time_sql, 877 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 878 exp.Variance: rename_func("varSamp"), 879 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 880 exp.Stddev: rename_func("stddevSamp"), 881 } 882 883 PROPERTIES_LOCATION = { 884 **generator.Generator.PROPERTIES_LOCATION, 885 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 886 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 887 exp.OnCluster: exp.Properties.Location.POST_NAME, 888 } 889 890 # There's no list in docs, but it can be found in Clickhouse code 891 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 892 ON_CLUSTER_TARGETS = { 893 "DATABASE", 894 "TABLE", 895 "VIEW", 896 "DICTIONARY", 897 "INDEX", 898 "FUNCTION", 899 "NAMED COLLECTION", 900 } 901 902 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 903 NON_NULLABLE_TYPES = { 904 exp.DataType.Type.ARRAY, 905 exp.DataType.Type.MAP, 906 exp.DataType.Type.NULLABLE, 907 exp.DataType.Type.STRUCT, 908 } 909 910 def strtodate_sql(self, expression: exp.StrToDate) -> str: 911 strtodate_sql = self.function_fallback_sql(expression) 912 913 if not isinstance(expression.parent, exp.Cast): 914 # StrToDate returns DATEs in other dialects (eg. postgres), so 915 # this branch aims to improve the transpilation to clickhouse 916 return f"CAST({strtodate_sql} AS DATE)" 917 918 return strtodate_sql 919 920 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 921 this = expression.this 922 923 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 924 return self.sql(this) 925 926 return super().cast_sql(expression, safe_prefix=safe_prefix) 927 928 def trycast_sql(self, expression: exp.TryCast) -> str: 929 dtype = expression.to 930 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 931 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 932 dtype.set("nullable", True) 933 934 return super().cast_sql(expression) 935 936 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 937 this = self.json_path_part(expression.this) 938 return str(int(this) + 1) if is_int(this) else this 939 940 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 941 return f"AS {self.sql(expression, 'this')}" 942 943 def _any_to_has( 944 self, 945 expression: exp.EQ | exp.NEQ, 946 default: t.Callable[[t.Any], str], 947 prefix: str = "", 948 ) -> str: 949 if isinstance(expression.left, exp.Any): 950 arr = expression.left 951 this = expression.right 952 elif isinstance(expression.right, exp.Any): 953 arr = expression.right 954 this = expression.left 955 else: 956 return default(expression) 957 958 return prefix + self.func("has", arr.this.unnest(), this) 959 960 def eq_sql(self, expression: exp.EQ) -> str: 961 return self._any_to_has(expression, super().eq_sql) 962 963 def neq_sql(self, expression: exp.NEQ) -> str: 964 return self._any_to_has(expression, super().neq_sql, "NOT ") 965 966 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 967 # Manually add a flag to make the search case-insensitive 968 regex = self.func("CONCAT", "'(?i)'", expression.expression) 969 return self.func("match", expression.this, regex) 970 971 def datatype_sql(self, expression: exp.DataType) -> str: 972 # String is the standard ClickHouse type, every other variant is just an alias. 973 # Additionally, any supplied length parameter will be ignored. 974 # 975 # https://clickhouse.com/docs/en/sql-reference/data-types/string 976 if expression.this in self.STRING_TYPE_MAPPING: 977 dtype = "String" 978 else: 979 dtype = super().datatype_sql(expression) 980 981 # This section changes the type to `Nullable(...)` if the following conditions hold: 982 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 983 # and change their semantics 984 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 985 # constraint: "Type of Map key must be a type, that can be represented by integer or 986 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 987 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 988 parent = expression.parent 989 if ( 990 expression.args.get("nullable") is not False 991 and not ( 992 isinstance(parent, exp.DataType) 993 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 994 and expression.index in (None, 0) 995 ) 996 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 997 ): 998 dtype = f"Nullable({dtype})" 999 1000 return dtype 1001 1002 def cte_sql(self, expression: exp.CTE) -> str: 1003 if expression.args.get("scalar"): 1004 this = self.sql(expression, "this") 1005 alias = self.sql(expression, "alias") 1006 return f"{this} AS {alias}" 1007 1008 return super().cte_sql(expression) 1009 1010 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1011 return super().after_limit_modifiers(expression) + [ 1012 ( 1013 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1014 if expression.args.get("settings") 1015 else "" 1016 ), 1017 ( 1018 self.seg("FORMAT ") + self.sql(expression, "format") 1019 if expression.args.get("format") 1020 else "" 1021 ), 1022 ] 1023 1024 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1025 params = self.expressions(expression, key="params", flat=True) 1026 return self.func(expression.name, *expression.expressions) + f"({params})" 1027 1028 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1029 return self.func(expression.name, *expression.expressions) 1030 1031 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1032 return self.anonymousaggfunc_sql(expression) 1033 1034 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1035 return self.parameterizedagg_sql(expression) 1036 1037 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1038 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1039 1040 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1041 return f"ON CLUSTER {self.sql(expression, 'this')}" 1042 1043 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1044 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1045 exp.Properties.Location.POST_NAME 1046 ): 1047 this_name = self.sql( 1048 expression.this if isinstance(expression.this, exp.Schema) else expression, 1049 "this", 1050 ) 1051 this_properties = " ".join( 1052 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1053 ) 1054 this_schema = self.schema_columns_sql(expression.this) 1055 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1056 1057 return super().createable_sql(expression, locations) 1058 1059 def create_sql(self, expression: exp.Create) -> str: 1060 # The comment property comes last in CTAS statements, i.e. after the query 1061 query = expression.expression 1062 if isinstance(query, exp.Query): 1063 comment_prop = expression.find(exp.SchemaCommentProperty) 1064 if comment_prop: 1065 comment_prop.pop() 1066 query.replace(exp.paren(query)) 1067 else: 1068 comment_prop = None 1069 1070 create_sql = super().create_sql(expression) 1071 1072 comment_sql = self.sql(comment_prop) 1073 comment_sql = f" {comment_sql}" if comment_sql else "" 1074 1075 return f"{create_sql}{comment_sql}" 1076 1077 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1078 this = self.indent(self.sql(expression, "this")) 1079 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1080 1081 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1082 this = self.sql(expression, "this") 1083 this = f" {this}" if this else "" 1084 expr = self.sql(expression, "expression") 1085 expr = f" {expr}" if expr else "" 1086 index_type = self.sql(expression, "index_type") 1087 index_type = f" TYPE {index_type}" if index_type else "" 1088 granularity = self.sql(expression, "granularity") 1089 granularity = f" GRANULARITY {granularity}" if granularity else "" 1090 1091 return f"INDEX{this}{expr}{index_type}{granularity}" 1092 1093 def partition_sql(self, expression: exp.Partition) -> str: 1094 return f"PARTITION {self.expressions(expression, flat=True)}" 1095 1096 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1097 return f"ID {self.sql(expression.this)}" 1098 1099 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1100 return ( 1101 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1102 ) 1103 1104 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1105 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 class Tokenizer(tokens.Tokenizer): 150 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 151 IDENTIFIERS = ['"', "`"] 152 STRING_ESCAPES = ["'", "\\"] 153 BIT_STRINGS = [("0b", "")] 154 HEX_STRINGS = [("0x", ""), ("0X", "")] 155 HEREDOC_STRINGS = ["$"] 156 157 KEYWORDS = { 158 **tokens.Tokenizer.KEYWORDS, 159 "ATTACH": TokenType.COMMAND, 160 "DATE32": TokenType.DATE32, 161 "DATETIME64": TokenType.DATETIME64, 162 "DICTIONARY": TokenType.DICTIONARY, 163 "ENUM8": TokenType.ENUM8, 164 "ENUM16": TokenType.ENUM16, 165 "FINAL": TokenType.FINAL, 166 "FIXEDSTRING": TokenType.FIXEDSTRING, 167 "FLOAT32": TokenType.FLOAT, 168 "FLOAT64": TokenType.DOUBLE, 169 "GLOBAL": TokenType.GLOBAL, 170 "INT256": TokenType.INT256, 171 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 172 "MAP": TokenType.MAP, 173 "NESTED": TokenType.NESTED, 174 "SAMPLE": TokenType.TABLE_SAMPLE, 175 "TUPLE": TokenType.STRUCT, 176 "UINT128": TokenType.UINT128, 177 "UINT16": TokenType.USMALLINT, 178 "UINT256": TokenType.UINT256, 179 "UINT32": TokenType.UINT, 180 "UINT64": TokenType.UBIGINT, 181 "UINT8": TokenType.UTINYINT, 182 "IPV4": TokenType.IPV4, 183 "IPV6": TokenType.IPV6, 184 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 185 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 186 "SYSTEM": TokenType.COMMAND, 187 "PREWHERE": TokenType.PREWHERE, 188 } 189 KEYWORDS.pop("/*+") 190 191 SINGLE_TOKENS = { 192 **tokens.Tokenizer.SINGLE_TOKENS, 193 "$": TokenType.HEREDOC_STRING, 194 } 195 196 class Parser(parser.Parser): 197 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 198 # * select x from t1 union all select x from t2 limit 1; 199 # * select x from t1 union all (select x from t2 limit 1); 200 MODIFIERS_ATTACHED_TO_SET_OP = False 201 INTERVAL_SPANS = False 202 203 FUNCTIONS = { 204 **parser.Parser.FUNCTIONS, 205 "ANY": exp.AnyValue.from_arg_list, 206 "ARRAYSUM": exp.ArraySum.from_arg_list, 207 "COUNTIF": _build_count_if, 208 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 209 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 210 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 211 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 212 "DATE_FORMAT": _build_date_format, 213 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 214 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 215 "FORMATDATETIME": _build_date_format, 216 "JSONEXTRACTSTRING": build_json_extract_path( 217 exp.JSONExtractScalar, zero_based_indexing=False 218 ), 219 "MAP": parser.build_var_map, 220 "MATCH": exp.RegexpLike.from_arg_list, 221 "RANDCANONICAL": exp.Rand.from_arg_list, 222 "STR_TO_DATE": _build_str_to_date, 223 "TUPLE": exp.Struct.from_arg_list, 224 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 225 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 226 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 227 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 228 "UNIQ": exp.ApproxDistinct.from_arg_list, 229 "XOR": lambda args: exp.Xor(expressions=args), 230 "MD5": exp.MD5Digest.from_arg_list, 231 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 232 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 233 } 234 235 AGG_FUNCTIONS = { 236 "count", 237 "min", 238 "max", 239 "sum", 240 "avg", 241 "any", 242 "stddevPop", 243 "stddevSamp", 244 "varPop", 245 "varSamp", 246 "corr", 247 "covarPop", 248 "covarSamp", 249 "entropy", 250 "exponentialMovingAverage", 251 "intervalLengthSum", 252 "kolmogorovSmirnovTest", 253 "mannWhitneyUTest", 254 "median", 255 "rankCorr", 256 "sumKahan", 257 "studentTTest", 258 "welchTTest", 259 "anyHeavy", 260 "anyLast", 261 "boundingRatio", 262 "first_value", 263 "last_value", 264 "argMin", 265 "argMax", 266 "avgWeighted", 267 "topK", 268 "topKWeighted", 269 "deltaSum", 270 "deltaSumTimestamp", 271 "groupArray", 272 "groupArrayLast", 273 "groupUniqArray", 274 "groupArrayInsertAt", 275 "groupArrayMovingAvg", 276 "groupArrayMovingSum", 277 "groupArraySample", 278 "groupBitAnd", 279 "groupBitOr", 280 "groupBitXor", 281 "groupBitmap", 282 "groupBitmapAnd", 283 "groupBitmapOr", 284 "groupBitmapXor", 285 "sumWithOverflow", 286 "sumMap", 287 "minMap", 288 "maxMap", 289 "skewSamp", 290 "skewPop", 291 "kurtSamp", 292 "kurtPop", 293 "uniq", 294 "uniqExact", 295 "uniqCombined", 296 "uniqCombined64", 297 "uniqHLL12", 298 "uniqTheta", 299 "quantile", 300 "quantiles", 301 "quantileExact", 302 "quantilesExact", 303 "quantileExactLow", 304 "quantilesExactLow", 305 "quantileExactHigh", 306 "quantilesExactHigh", 307 "quantileExactWeighted", 308 "quantilesExactWeighted", 309 "quantileTiming", 310 "quantilesTiming", 311 "quantileTimingWeighted", 312 "quantilesTimingWeighted", 313 "quantileDeterministic", 314 "quantilesDeterministic", 315 "quantileTDigest", 316 "quantilesTDigest", 317 "quantileTDigestWeighted", 318 "quantilesTDigestWeighted", 319 "quantileBFloat16", 320 "quantilesBFloat16", 321 "quantileBFloat16Weighted", 322 "quantilesBFloat16Weighted", 323 "simpleLinearRegression", 324 "stochasticLinearRegression", 325 "stochasticLogisticRegression", 326 "categoricalInformationValue", 327 "contingency", 328 "cramersV", 329 "cramersVBiasCorrected", 330 "theilsU", 331 "maxIntersections", 332 "maxIntersectionsPosition", 333 "meanZTest", 334 "quantileInterpolatedWeighted", 335 "quantilesInterpolatedWeighted", 336 "quantileGK", 337 "quantilesGK", 338 "sparkBar", 339 "sumCount", 340 "largestTriangleThreeBuckets", 341 "histogram", 342 "sequenceMatch", 343 "sequenceCount", 344 "windowFunnel", 345 "retention", 346 "uniqUpTo", 347 "sequenceNextNode", 348 "exponentialTimeDecayedAvg", 349 } 350 351 AGG_FUNCTIONS_SUFFIXES = [ 352 "If", 353 "Array", 354 "ArrayIf", 355 "Map", 356 "SimpleState", 357 "State", 358 "Merge", 359 "MergeState", 360 "ForEach", 361 "Distinct", 362 "OrDefault", 363 "OrNull", 364 "Resample", 365 "ArgMin", 366 "ArgMax", 367 ] 368 369 FUNC_TOKENS = { 370 *parser.Parser.FUNC_TOKENS, 371 TokenType.SET, 372 } 373 374 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 375 376 ID_VAR_TOKENS = { 377 *parser.Parser.ID_VAR_TOKENS, 378 TokenType.LIKE, 379 } 380 381 AGG_FUNC_MAPPING = ( 382 lambda functions, suffixes: { 383 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 384 } 385 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 386 387 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 388 389 FUNCTION_PARSERS = { 390 **parser.Parser.FUNCTION_PARSERS, 391 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 392 "QUANTILE": lambda self: self._parse_quantile(), 393 } 394 395 FUNCTION_PARSERS.pop("MATCH") 396 397 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 398 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 399 400 RANGE_PARSERS = { 401 **parser.Parser.RANGE_PARSERS, 402 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 403 and self._parse_in(this, is_global=True), 404 } 405 406 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 407 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 408 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 409 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 410 411 JOIN_KINDS = { 412 *parser.Parser.JOIN_KINDS, 413 TokenType.ANY, 414 TokenType.ASOF, 415 TokenType.ARRAY, 416 } 417 418 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 419 TokenType.ANY, 420 TokenType.ARRAY, 421 TokenType.FINAL, 422 TokenType.FORMAT, 423 TokenType.SETTINGS, 424 } 425 426 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 427 TokenType.FORMAT, 428 } 429 430 LOG_DEFAULTS_TO_LN = True 431 432 QUERY_MODIFIER_PARSERS = { 433 **parser.Parser.QUERY_MODIFIER_PARSERS, 434 TokenType.SETTINGS: lambda self: ( 435 "settings", 436 self._advance() or self._parse_csv(self._parse_assignment), 437 ), 438 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 439 } 440 441 CONSTRAINT_PARSERS = { 442 **parser.Parser.CONSTRAINT_PARSERS, 443 "INDEX": lambda self: self._parse_index_constraint(), 444 "CODEC": lambda self: self._parse_compress(), 445 } 446 447 ALTER_PARSERS = { 448 **parser.Parser.ALTER_PARSERS, 449 "REPLACE": lambda self: self._parse_alter_table_replace(), 450 } 451 452 SCHEMA_UNNAMED_CONSTRAINTS = { 453 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 454 "INDEX", 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 460 } 461 462 def _parse_types( 463 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 464 ) -> t.Optional[exp.Expression]: 465 dtype = super()._parse_types( 466 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 467 ) 468 if isinstance(dtype, exp.DataType): 469 # Mark every type as non-nullable which is ClickHouse's default. This marker 470 # helps us transpile types from other dialects to ClickHouse, so that we can 471 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 472 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 473 # `Nullable` type constructor 474 dtype.set("nullable", False) 475 476 return dtype 477 478 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 479 index = self._index 480 this = self._parse_bitwise() 481 if self._match(TokenType.FROM): 482 self._retreat(index) 483 return super()._parse_extract() 484 485 # We return Anonymous here because extract and regexpExtract have different semantics, 486 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 487 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 488 # 489 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 490 self._match(TokenType.COMMA) 491 return self.expression( 492 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 493 ) 494 495 def _parse_assignment(self) -> t.Optional[exp.Expression]: 496 this = super()._parse_assignment() 497 498 if self._match(TokenType.PLACEHOLDER): 499 return self.expression( 500 exp.If, 501 this=this, 502 true=self._parse_assignment(), 503 false=self._match(TokenType.COLON) and self._parse_assignment(), 504 ) 505 506 return this 507 508 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 509 """ 510 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 511 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 512 """ 513 this = self._parse_id_var() 514 self._match(TokenType.COLON) 515 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 516 self._match_text_seq("IDENTIFIER") and "Identifier" 517 ) 518 519 if not kind: 520 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 521 elif not self._match(TokenType.R_BRACE): 522 self.raise_error("Expecting }") 523 524 return self.expression(exp.Placeholder, this=this, kind=kind) 525 526 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 527 this = super()._parse_in(this) 528 this.set("is_global", is_global) 529 return this 530 531 def _parse_table( 532 self, 533 schema: bool = False, 534 joins: bool = False, 535 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 536 parse_bracket: bool = False, 537 is_db_reference: bool = False, 538 parse_partition: bool = False, 539 ) -> t.Optional[exp.Expression]: 540 this = super()._parse_table( 541 schema=schema, 542 joins=joins, 543 alias_tokens=alias_tokens, 544 parse_bracket=parse_bracket, 545 is_db_reference=is_db_reference, 546 ) 547 548 if self._match(TokenType.FINAL): 549 this = self.expression(exp.Final, this=this) 550 551 return this 552 553 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 554 return super()._parse_position(haystack_first=True) 555 556 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 557 def _parse_cte(self) -> exp.CTE: 558 # WITH <identifier> AS <subquery expression> 559 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 560 561 if not cte: 562 # WITH <expression> AS <identifier> 563 cte = self.expression( 564 exp.CTE, 565 this=self._parse_assignment(), 566 alias=self._parse_table_alias(), 567 scalar=True, 568 ) 569 570 return cte 571 572 def _parse_join_parts( 573 self, 574 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 575 is_global = self._match(TokenType.GLOBAL) and self._prev 576 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 577 578 if kind_pre: 579 kind = self._match_set(self.JOIN_KINDS) and self._prev 580 side = self._match_set(self.JOIN_SIDES) and self._prev 581 return is_global, side, kind 582 583 return ( 584 is_global, 585 self._match_set(self.JOIN_SIDES) and self._prev, 586 self._match_set(self.JOIN_KINDS) and self._prev, 587 ) 588 589 def _parse_join( 590 self, skip_join_token: bool = False, parse_bracket: bool = False 591 ) -> t.Optional[exp.Join]: 592 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 593 if join: 594 join.set("global", join.args.pop("method", None)) 595 596 return join 597 598 def _parse_function( 599 self, 600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 601 anonymous: bool = False, 602 optional_parens: bool = True, 603 any_token: bool = False, 604 ) -> t.Optional[exp.Expression]: 605 expr = super()._parse_function( 606 functions=functions, 607 anonymous=anonymous, 608 optional_parens=optional_parens, 609 any_token=any_token, 610 ) 611 612 func = expr.this if isinstance(expr, exp.Window) else expr 613 614 # Aggregate functions can be split in 2 parts: <func_name><suffix> 615 parts = ( 616 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 617 ) 618 619 if parts: 620 params = self._parse_func_params(func) 621 622 kwargs = { 623 "this": func.this, 624 "expressions": func.expressions, 625 } 626 if parts[1]: 627 kwargs["parts"] = parts 628 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 629 else: 630 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 631 632 kwargs["exp_class"] = exp_class 633 if params: 634 kwargs["params"] = params 635 636 func = self.expression(**kwargs) 637 638 if isinstance(expr, exp.Window): 639 # The window's func was parsed as Anonymous in base parser, fix its 640 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 641 expr.set("this", func) 642 elif params: 643 # Params have blocked super()._parse_function() from parsing the following window 644 # (if that exists) as they're standing between the function call and the window spec 645 expr = self._parse_window(func) 646 else: 647 expr = func 648 649 return expr 650 651 def _parse_func_params( 652 self, this: t.Optional[exp.Func] = None 653 ) -> t.Optional[t.List[exp.Expression]]: 654 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 655 return self._parse_csv(self._parse_lambda) 656 657 if self._match(TokenType.L_PAREN): 658 params = self._parse_csv(self._parse_lambda) 659 self._match_r_paren(this) 660 return params 661 662 return None 663 664 def _parse_quantile(self) -> exp.Quantile: 665 this = self._parse_lambda() 666 params = self._parse_func_params() 667 if params: 668 return self.expression(exp.Quantile, this=params[0], quantile=this) 669 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 670 671 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 672 return super()._parse_wrapped_id_vars(optional=True) 673 674 def _parse_primary_key( 675 self, wrapped_optional: bool = False, in_props: bool = False 676 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 677 return super()._parse_primary_key( 678 wrapped_optional=wrapped_optional or in_props, in_props=in_props 679 ) 680 681 def _parse_on_property(self) -> t.Optional[exp.Expression]: 682 index = self._index 683 if self._match_text_seq("CLUSTER"): 684 this = self._parse_id_var() 685 if this: 686 return self.expression(exp.OnCluster, this=this) 687 else: 688 self._retreat(index) 689 return None 690 691 def _parse_index_constraint( 692 self, kind: t.Optional[str] = None 693 ) -> exp.IndexColumnConstraint: 694 # INDEX name1 expr TYPE type1(args) GRANULARITY value 695 this = self._parse_id_var() 696 expression = self._parse_assignment() 697 698 index_type = self._match_text_seq("TYPE") and ( 699 self._parse_function() or self._parse_var() 700 ) 701 702 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 703 704 return self.expression( 705 exp.IndexColumnConstraint, 706 this=this, 707 expression=expression, 708 index_type=index_type, 709 granularity=granularity, 710 ) 711 712 def _parse_partition(self) -> t.Optional[exp.Partition]: 713 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 714 if not self._match(TokenType.PARTITION): 715 return None 716 717 if self._match_text_seq("ID"): 718 # Corresponds to the PARTITION ID <string_value> syntax 719 expressions: t.List[exp.Expression] = [ 720 self.expression(exp.PartitionId, this=self._parse_string()) 721 ] 722 else: 723 expressions = self._parse_expressions() 724 725 return self.expression(exp.Partition, expressions=expressions) 726 727 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 728 partition = self._parse_partition() 729 730 if not partition or not self._match(TokenType.FROM): 731 return None 732 733 return self.expression( 734 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 735 ) 736 737 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 738 if not self._match_text_seq("PROJECTION"): 739 return None 740 741 return self.expression( 742 exp.ProjectionDef, 743 this=self._parse_id_var(), 744 expression=self._parse_wrapped(self._parse_statement), 745 ) 746 747 def _parse_constraint(self) -> t.Optional[exp.Expression]: 748 return super()._parse_constraint() or self._parse_projection_def() 749 750 class Generator(generator.Generator): 751 QUERY_HINTS = False 752 STRUCT_DELIMITER = ("(", ")") 753 NVL2_SUPPORTED = False 754 TABLESAMPLE_REQUIRES_PARENS = False 755 TABLESAMPLE_SIZE_IS_ROWS = False 756 TABLESAMPLE_KEYWORDS = "SAMPLE" 757 LAST_DAY_SUPPORTS_DATE_PART = False 758 CAN_IMPLEMENT_ARRAY_ANY = True 759 SUPPORTS_TO_NUMBER = False 760 JOIN_HINTS = False 761 TABLE_HINTS = False 762 EXPLICIT_SET_OP = True 763 GROUPINGS_SEP = "" 764 SET_OP_MODIFIERS = False 765 SUPPORTS_TABLE_ALIAS_COLUMNS = False 766 VALUES_AS_TABLE = False 767 768 STRING_TYPE_MAPPING = { 769 exp.DataType.Type.CHAR: "String", 770 exp.DataType.Type.LONGBLOB: "String", 771 exp.DataType.Type.LONGTEXT: "String", 772 exp.DataType.Type.MEDIUMBLOB: "String", 773 exp.DataType.Type.MEDIUMTEXT: "String", 774 exp.DataType.Type.TINYBLOB: "String", 775 exp.DataType.Type.TINYTEXT: "String", 776 exp.DataType.Type.TEXT: "String", 777 exp.DataType.Type.VARBINARY: "String", 778 exp.DataType.Type.VARCHAR: "String", 779 } 780 781 SUPPORTED_JSON_PATH_PARTS = { 782 exp.JSONPathKey, 783 exp.JSONPathRoot, 784 exp.JSONPathSubscript, 785 } 786 787 TYPE_MAPPING = { 788 **generator.Generator.TYPE_MAPPING, 789 **STRING_TYPE_MAPPING, 790 exp.DataType.Type.ARRAY: "Array", 791 exp.DataType.Type.BIGINT: "Int64", 792 exp.DataType.Type.DATE32: "Date32", 793 exp.DataType.Type.DATETIME: "DateTime", 794 exp.DataType.Type.DATETIME64: "DateTime64", 795 exp.DataType.Type.TIMESTAMP: "DateTime", 796 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 797 exp.DataType.Type.DOUBLE: "Float64", 798 exp.DataType.Type.ENUM: "Enum", 799 exp.DataType.Type.ENUM8: "Enum8", 800 exp.DataType.Type.ENUM16: "Enum16", 801 exp.DataType.Type.FIXEDSTRING: "FixedString", 802 exp.DataType.Type.FLOAT: "Float32", 803 exp.DataType.Type.INT: "Int32", 804 exp.DataType.Type.MEDIUMINT: "Int32", 805 exp.DataType.Type.INT128: "Int128", 806 exp.DataType.Type.INT256: "Int256", 807 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 808 exp.DataType.Type.MAP: "Map", 809 exp.DataType.Type.NESTED: "Nested", 810 exp.DataType.Type.NULLABLE: "Nullable", 811 exp.DataType.Type.SMALLINT: "Int16", 812 exp.DataType.Type.STRUCT: "Tuple", 813 exp.DataType.Type.TINYINT: "Int8", 814 exp.DataType.Type.UBIGINT: "UInt64", 815 exp.DataType.Type.UINT: "UInt32", 816 exp.DataType.Type.UINT128: "UInt128", 817 exp.DataType.Type.UINT256: "UInt256", 818 exp.DataType.Type.USMALLINT: "UInt16", 819 exp.DataType.Type.UTINYINT: "UInt8", 820 exp.DataType.Type.IPV4: "IPv4", 821 exp.DataType.Type.IPV6: "IPv6", 822 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 823 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 824 } 825 826 TRANSFORMS = { 827 **generator.Generator.TRANSFORMS, 828 exp.AnyValue: rename_func("any"), 829 exp.ApproxDistinct: rename_func("uniq"), 830 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 831 exp.ArraySize: rename_func("LENGTH"), 832 exp.ArraySum: rename_func("arraySum"), 833 exp.ArgMax: arg_max_or_min_no_count("argMax"), 834 exp.ArgMin: arg_max_or_min_no_count("argMin"), 835 exp.Array: inline_array_sql, 836 exp.CastToStrType: rename_func("CAST"), 837 exp.CountIf: rename_func("countIf"), 838 exp.CompressColumnConstraint: lambda self, 839 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 840 exp.ComputedColumnConstraint: lambda self, 841 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 842 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 843 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 844 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 845 exp.DateStrToDate: rename_func("toDate"), 846 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 847 exp.Explode: rename_func("arrayJoin"), 848 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 849 exp.IsNan: rename_func("isNaN"), 850 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 851 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 852 exp.JSONPathKey: json_path_key_only_name, 853 exp.JSONPathRoot: lambda *_: "", 854 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 855 exp.Nullif: rename_func("nullIf"), 856 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 857 exp.Pivot: no_pivot_sql, 858 exp.Quantile: _quantile_sql, 859 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 860 exp.Rand: rename_func("randCanonical"), 861 exp.StartsWith: rename_func("startsWith"), 862 exp.StrPosition: lambda self, e: self.func( 863 "position", e.this, e.args.get("substr"), e.args.get("position") 864 ), 865 exp.TimeToStr: lambda self, e: self.func( 866 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 867 ), 868 exp.TimeStrToTime: _timestrtotime_sql, 869 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 870 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 871 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 872 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 873 exp.MD5Digest: rename_func("MD5"), 874 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 875 exp.SHA: rename_func("SHA1"), 876 exp.SHA2: sha256_sql, 877 exp.UnixToTime: _unix_to_time_sql, 878 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 879 exp.Variance: rename_func("varSamp"), 880 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 881 exp.Stddev: rename_func("stddevSamp"), 882 } 883 884 PROPERTIES_LOCATION = { 885 **generator.Generator.PROPERTIES_LOCATION, 886 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 887 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 888 exp.OnCluster: exp.Properties.Location.POST_NAME, 889 } 890 891 # There's no list in docs, but it can be found in Clickhouse code 892 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 893 ON_CLUSTER_TARGETS = { 894 "DATABASE", 895 "TABLE", 896 "VIEW", 897 "DICTIONARY", 898 "INDEX", 899 "FUNCTION", 900 "NAMED COLLECTION", 901 } 902 903 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 904 NON_NULLABLE_TYPES = { 905 exp.DataType.Type.ARRAY, 906 exp.DataType.Type.MAP, 907 exp.DataType.Type.NULLABLE, 908 exp.DataType.Type.STRUCT, 909 } 910 911 def strtodate_sql(self, expression: exp.StrToDate) -> str: 912 strtodate_sql = self.function_fallback_sql(expression) 913 914 if not isinstance(expression.parent, exp.Cast): 915 # StrToDate returns DATEs in other dialects (eg. postgres), so 916 # this branch aims to improve the transpilation to clickhouse 917 return f"CAST({strtodate_sql} AS DATE)" 918 919 return strtodate_sql 920 921 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 922 this = expression.this 923 924 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 925 return self.sql(this) 926 927 return super().cast_sql(expression, safe_prefix=safe_prefix) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 dtype = expression.to 931 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 932 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 933 dtype.set("nullable", True) 934 935 return super().cast_sql(expression) 936 937 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 938 this = self.json_path_part(expression.this) 939 return str(int(this) + 1) if is_int(this) else this 940 941 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 942 return f"AS {self.sql(expression, 'this')}" 943 944 def _any_to_has( 945 self, 946 expression: exp.EQ | exp.NEQ, 947 default: t.Callable[[t.Any], str], 948 prefix: str = "", 949 ) -> str: 950 if isinstance(expression.left, exp.Any): 951 arr = expression.left 952 this = expression.right 953 elif isinstance(expression.right, exp.Any): 954 arr = expression.right 955 this = expression.left 956 else: 957 return default(expression) 958 959 return prefix + self.func("has", arr.this.unnest(), this) 960 961 def eq_sql(self, expression: exp.EQ) -> str: 962 return self._any_to_has(expression, super().eq_sql) 963 964 def neq_sql(self, expression: exp.NEQ) -> str: 965 return self._any_to_has(expression, super().neq_sql, "NOT ") 966 967 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 968 # Manually add a flag to make the search case-insensitive 969 regex = self.func("CONCAT", "'(?i)'", expression.expression) 970 return self.func("match", expression.this, regex) 971 972 def datatype_sql(self, expression: exp.DataType) -> str: 973 # String is the standard ClickHouse type, every other variant is just an alias. 974 # Additionally, any supplied length parameter will be ignored. 975 # 976 # https://clickhouse.com/docs/en/sql-reference/data-types/string 977 if expression.this in self.STRING_TYPE_MAPPING: 978 dtype = "String" 979 else: 980 dtype = super().datatype_sql(expression) 981 982 # This section changes the type to `Nullable(...)` if the following conditions hold: 983 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 984 # and change their semantics 985 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 986 # constraint: "Type of Map key must be a type, that can be represented by integer or 987 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 988 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 989 parent = expression.parent 990 if ( 991 expression.args.get("nullable") is not False 992 and not ( 993 isinstance(parent, exp.DataType) 994 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 995 and expression.index in (None, 0) 996 ) 997 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 998 ): 999 dtype = f"Nullable({dtype})" 1000 1001 return dtype 1002 1003 def cte_sql(self, expression: exp.CTE) -> str: 1004 if expression.args.get("scalar"): 1005 this = self.sql(expression, "this") 1006 alias = self.sql(expression, "alias") 1007 return f"{this} AS {alias}" 1008 1009 return super().cte_sql(expression) 1010 1011 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1012 return super().after_limit_modifiers(expression) + [ 1013 ( 1014 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1015 if expression.args.get("settings") 1016 else "" 1017 ), 1018 ( 1019 self.seg("FORMAT ") + self.sql(expression, "format") 1020 if expression.args.get("format") 1021 else "" 1022 ), 1023 ] 1024 1025 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1026 params = self.expressions(expression, key="params", flat=True) 1027 return self.func(expression.name, *expression.expressions) + f"({params})" 1028 1029 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1030 return self.func(expression.name, *expression.expressions) 1031 1032 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1033 return self.anonymousaggfunc_sql(expression) 1034 1035 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1036 return self.parameterizedagg_sql(expression) 1037 1038 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1039 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1040 1041 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1042 return f"ON CLUSTER {self.sql(expression, 'this')}" 1043 1044 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1045 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1046 exp.Properties.Location.POST_NAME 1047 ): 1048 this_name = self.sql( 1049 expression.this if isinstance(expression.this, exp.Schema) else expression, 1050 "this", 1051 ) 1052 this_properties = " ".join( 1053 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1054 ) 1055 this_schema = self.schema_columns_sql(expression.this) 1056 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1057 1058 return super().createable_sql(expression, locations) 1059 1060 def create_sql(self, expression: exp.Create) -> str: 1061 # The comment property comes last in CTAS statements, i.e. after the query 1062 query = expression.expression 1063 if isinstance(query, exp.Query): 1064 comment_prop = expression.find(exp.SchemaCommentProperty) 1065 if comment_prop: 1066 comment_prop.pop() 1067 query.replace(exp.paren(query)) 1068 else: 1069 comment_prop = None 1070 1071 create_sql = super().create_sql(expression) 1072 1073 comment_sql = self.sql(comment_prop) 1074 comment_sql = f" {comment_sql}" if comment_sql else "" 1075 1076 return f"{create_sql}{comment_sql}" 1077 1078 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1079 this = self.indent(self.sql(expression, "this")) 1080 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1081 1082 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1083 this = self.sql(expression, "this") 1084 this = f" {this}" if this else "" 1085 expr = self.sql(expression, "expression") 1086 expr = f" {expr}" if expr else "" 1087 index_type = self.sql(expression, "index_type") 1088 index_type = f" TYPE {index_type}" if index_type else "" 1089 granularity = self.sql(expression, "granularity") 1090 granularity = f" GRANULARITY {granularity}" if granularity else "" 1091 1092 return f"INDEX{this}{expr}{index_type}{granularity}" 1093 1094 def partition_sql(self, expression: exp.Partition) -> str: 1095 return f"PARTITION {self.expressions(expression, flat=True)}" 1096 1097 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1098 return f"ID {self.sql(expression.this)}" 1099 1100 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1101 return ( 1102 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1103 ) 1104 1105 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1106 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
149 class Tokenizer(tokens.Tokenizer): 150 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 151 IDENTIFIERS = ['"', "`"] 152 STRING_ESCAPES = ["'", "\\"] 153 BIT_STRINGS = [("0b", "")] 154 HEX_STRINGS = [("0x", ""), ("0X", "")] 155 HEREDOC_STRINGS = ["$"] 156 157 KEYWORDS = { 158 **tokens.Tokenizer.KEYWORDS, 159 "ATTACH": TokenType.COMMAND, 160 "DATE32": TokenType.DATE32, 161 "DATETIME64": TokenType.DATETIME64, 162 "DICTIONARY": TokenType.DICTIONARY, 163 "ENUM8": TokenType.ENUM8, 164 "ENUM16": TokenType.ENUM16, 165 "FINAL": TokenType.FINAL, 166 "FIXEDSTRING": TokenType.FIXEDSTRING, 167 "FLOAT32": TokenType.FLOAT, 168 "FLOAT64": TokenType.DOUBLE, 169 "GLOBAL": TokenType.GLOBAL, 170 "INT256": TokenType.INT256, 171 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 172 "MAP": TokenType.MAP, 173 "NESTED": TokenType.NESTED, 174 "SAMPLE": TokenType.TABLE_SAMPLE, 175 "TUPLE": TokenType.STRUCT, 176 "UINT128": TokenType.UINT128, 177 "UINT16": TokenType.USMALLINT, 178 "UINT256": TokenType.UINT256, 179 "UINT32": TokenType.UINT, 180 "UINT64": TokenType.UBIGINT, 181 "UINT8": TokenType.UTINYINT, 182 "IPV4": TokenType.IPV4, 183 "IPV6": TokenType.IPV6, 184 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 185 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 186 "SYSTEM": TokenType.COMMAND, 187 "PREWHERE": TokenType.PREWHERE, 188 } 189 KEYWORDS.pop("/*+") 190 191 SINGLE_TOKENS = { 192 **tokens.Tokenizer.SINGLE_TOKENS, 193 "$": TokenType.HEREDOC_STRING, 194 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
196 class Parser(parser.Parser): 197 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 198 # * select x from t1 union all select x from t2 limit 1; 199 # * select x from t1 union all (select x from t2 limit 1); 200 MODIFIERS_ATTACHED_TO_SET_OP = False 201 INTERVAL_SPANS = False 202 203 FUNCTIONS = { 204 **parser.Parser.FUNCTIONS, 205 "ANY": exp.AnyValue.from_arg_list, 206 "ARRAYSUM": exp.ArraySum.from_arg_list, 207 "COUNTIF": _build_count_if, 208 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 209 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 210 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 211 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 212 "DATE_FORMAT": _build_date_format, 213 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 214 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 215 "FORMATDATETIME": _build_date_format, 216 "JSONEXTRACTSTRING": build_json_extract_path( 217 exp.JSONExtractScalar, zero_based_indexing=False 218 ), 219 "MAP": parser.build_var_map, 220 "MATCH": exp.RegexpLike.from_arg_list, 221 "RANDCANONICAL": exp.Rand.from_arg_list, 222 "STR_TO_DATE": _build_str_to_date, 223 "TUPLE": exp.Struct.from_arg_list, 224 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 225 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 226 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 227 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 228 "UNIQ": exp.ApproxDistinct.from_arg_list, 229 "XOR": lambda args: exp.Xor(expressions=args), 230 "MD5": exp.MD5Digest.from_arg_list, 231 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 232 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 233 } 234 235 AGG_FUNCTIONS = { 236 "count", 237 "min", 238 "max", 239 "sum", 240 "avg", 241 "any", 242 "stddevPop", 243 "stddevSamp", 244 "varPop", 245 "varSamp", 246 "corr", 247 "covarPop", 248 "covarSamp", 249 "entropy", 250 "exponentialMovingAverage", 251 "intervalLengthSum", 252 "kolmogorovSmirnovTest", 253 "mannWhitneyUTest", 254 "median", 255 "rankCorr", 256 "sumKahan", 257 "studentTTest", 258 "welchTTest", 259 "anyHeavy", 260 "anyLast", 261 "boundingRatio", 262 "first_value", 263 "last_value", 264 "argMin", 265 "argMax", 266 "avgWeighted", 267 "topK", 268 "topKWeighted", 269 "deltaSum", 270 "deltaSumTimestamp", 271 "groupArray", 272 "groupArrayLast", 273 "groupUniqArray", 274 "groupArrayInsertAt", 275 "groupArrayMovingAvg", 276 "groupArrayMovingSum", 277 "groupArraySample", 278 "groupBitAnd", 279 "groupBitOr", 280 "groupBitXor", 281 "groupBitmap", 282 "groupBitmapAnd", 283 "groupBitmapOr", 284 "groupBitmapXor", 285 "sumWithOverflow", 286 "sumMap", 287 "minMap", 288 "maxMap", 289 "skewSamp", 290 "skewPop", 291 "kurtSamp", 292 "kurtPop", 293 "uniq", 294 "uniqExact", 295 "uniqCombined", 296 "uniqCombined64", 297 "uniqHLL12", 298 "uniqTheta", 299 "quantile", 300 "quantiles", 301 "quantileExact", 302 "quantilesExact", 303 "quantileExactLow", 304 "quantilesExactLow", 305 "quantileExactHigh", 306 "quantilesExactHigh", 307 "quantileExactWeighted", 308 "quantilesExactWeighted", 309 "quantileTiming", 310 "quantilesTiming", 311 "quantileTimingWeighted", 312 "quantilesTimingWeighted", 313 "quantileDeterministic", 314 "quantilesDeterministic", 315 "quantileTDigest", 316 "quantilesTDigest", 317 "quantileTDigestWeighted", 318 "quantilesTDigestWeighted", 319 "quantileBFloat16", 320 "quantilesBFloat16", 321 "quantileBFloat16Weighted", 322 "quantilesBFloat16Weighted", 323 "simpleLinearRegression", 324 "stochasticLinearRegression", 325 "stochasticLogisticRegression", 326 "categoricalInformationValue", 327 "contingency", 328 "cramersV", 329 "cramersVBiasCorrected", 330 "theilsU", 331 "maxIntersections", 332 "maxIntersectionsPosition", 333 "meanZTest", 334 "quantileInterpolatedWeighted", 335 "quantilesInterpolatedWeighted", 336 "quantileGK", 337 "quantilesGK", 338 "sparkBar", 339 "sumCount", 340 "largestTriangleThreeBuckets", 341 "histogram", 342 "sequenceMatch", 343 "sequenceCount", 344 "windowFunnel", 345 "retention", 346 "uniqUpTo", 347 "sequenceNextNode", 348 "exponentialTimeDecayedAvg", 349 } 350 351 AGG_FUNCTIONS_SUFFIXES = [ 352 "If", 353 "Array", 354 "ArrayIf", 355 "Map", 356 "SimpleState", 357 "State", 358 "Merge", 359 "MergeState", 360 "ForEach", 361 "Distinct", 362 "OrDefault", 363 "OrNull", 364 "Resample", 365 "ArgMin", 366 "ArgMax", 367 ] 368 369 FUNC_TOKENS = { 370 *parser.Parser.FUNC_TOKENS, 371 TokenType.SET, 372 } 373 374 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 375 376 ID_VAR_TOKENS = { 377 *parser.Parser.ID_VAR_TOKENS, 378 TokenType.LIKE, 379 } 380 381 AGG_FUNC_MAPPING = ( 382 lambda functions, suffixes: { 383 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 384 } 385 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 386 387 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 388 389 FUNCTION_PARSERS = { 390 **parser.Parser.FUNCTION_PARSERS, 391 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 392 "QUANTILE": lambda self: self._parse_quantile(), 393 } 394 395 FUNCTION_PARSERS.pop("MATCH") 396 397 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 398 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 399 400 RANGE_PARSERS = { 401 **parser.Parser.RANGE_PARSERS, 402 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 403 and self._parse_in(this, is_global=True), 404 } 405 406 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 407 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 408 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 409 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 410 411 JOIN_KINDS = { 412 *parser.Parser.JOIN_KINDS, 413 TokenType.ANY, 414 TokenType.ASOF, 415 TokenType.ARRAY, 416 } 417 418 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 419 TokenType.ANY, 420 TokenType.ARRAY, 421 TokenType.FINAL, 422 TokenType.FORMAT, 423 TokenType.SETTINGS, 424 } 425 426 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 427 TokenType.FORMAT, 428 } 429 430 LOG_DEFAULTS_TO_LN = True 431 432 QUERY_MODIFIER_PARSERS = { 433 **parser.Parser.QUERY_MODIFIER_PARSERS, 434 TokenType.SETTINGS: lambda self: ( 435 "settings", 436 self._advance() or self._parse_csv(self._parse_assignment), 437 ), 438 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 439 } 440 441 CONSTRAINT_PARSERS = { 442 **parser.Parser.CONSTRAINT_PARSERS, 443 "INDEX": lambda self: self._parse_index_constraint(), 444 "CODEC": lambda self: self._parse_compress(), 445 } 446 447 ALTER_PARSERS = { 448 **parser.Parser.ALTER_PARSERS, 449 "REPLACE": lambda self: self._parse_alter_table_replace(), 450 } 451 452 SCHEMA_UNNAMED_CONSTRAINTS = { 453 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 454 "INDEX", 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 460 } 461 462 def _parse_types( 463 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 464 ) -> t.Optional[exp.Expression]: 465 dtype = super()._parse_types( 466 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 467 ) 468 if isinstance(dtype, exp.DataType): 469 # Mark every type as non-nullable which is ClickHouse's default. This marker 470 # helps us transpile types from other dialects to ClickHouse, so that we can 471 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 472 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 473 # `Nullable` type constructor 474 dtype.set("nullable", False) 475 476 return dtype 477 478 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 479 index = self._index 480 this = self._parse_bitwise() 481 if self._match(TokenType.FROM): 482 self._retreat(index) 483 return super()._parse_extract() 484 485 # We return Anonymous here because extract and regexpExtract have different semantics, 486 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 487 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 488 # 489 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 490 self._match(TokenType.COMMA) 491 return self.expression( 492 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 493 ) 494 495 def _parse_assignment(self) -> t.Optional[exp.Expression]: 496 this = super()._parse_assignment() 497 498 if self._match(TokenType.PLACEHOLDER): 499 return self.expression( 500 exp.If, 501 this=this, 502 true=self._parse_assignment(), 503 false=self._match(TokenType.COLON) and self._parse_assignment(), 504 ) 505 506 return this 507 508 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 509 """ 510 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 511 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 512 """ 513 this = self._parse_id_var() 514 self._match(TokenType.COLON) 515 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 516 self._match_text_seq("IDENTIFIER") and "Identifier" 517 ) 518 519 if not kind: 520 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 521 elif not self._match(TokenType.R_BRACE): 522 self.raise_error("Expecting }") 523 524 return self.expression(exp.Placeholder, this=this, kind=kind) 525 526 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 527 this = super()._parse_in(this) 528 this.set("is_global", is_global) 529 return this 530 531 def _parse_table( 532 self, 533 schema: bool = False, 534 joins: bool = False, 535 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 536 parse_bracket: bool = False, 537 is_db_reference: bool = False, 538 parse_partition: bool = False, 539 ) -> t.Optional[exp.Expression]: 540 this = super()._parse_table( 541 schema=schema, 542 joins=joins, 543 alias_tokens=alias_tokens, 544 parse_bracket=parse_bracket, 545 is_db_reference=is_db_reference, 546 ) 547 548 if self._match(TokenType.FINAL): 549 this = self.expression(exp.Final, this=this) 550 551 return this 552 553 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 554 return super()._parse_position(haystack_first=True) 555 556 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 557 def _parse_cte(self) -> exp.CTE: 558 # WITH <identifier> AS <subquery expression> 559 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 560 561 if not cte: 562 # WITH <expression> AS <identifier> 563 cte = self.expression( 564 exp.CTE, 565 this=self._parse_assignment(), 566 alias=self._parse_table_alias(), 567 scalar=True, 568 ) 569 570 return cte 571 572 def _parse_join_parts( 573 self, 574 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 575 is_global = self._match(TokenType.GLOBAL) and self._prev 576 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 577 578 if kind_pre: 579 kind = self._match_set(self.JOIN_KINDS) and self._prev 580 side = self._match_set(self.JOIN_SIDES) and self._prev 581 return is_global, side, kind 582 583 return ( 584 is_global, 585 self._match_set(self.JOIN_SIDES) and self._prev, 586 self._match_set(self.JOIN_KINDS) and self._prev, 587 ) 588 589 def _parse_join( 590 self, skip_join_token: bool = False, parse_bracket: bool = False 591 ) -> t.Optional[exp.Join]: 592 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 593 if join: 594 join.set("global", join.args.pop("method", None)) 595 596 return join 597 598 def _parse_function( 599 self, 600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 601 anonymous: bool = False, 602 optional_parens: bool = True, 603 any_token: bool = False, 604 ) -> t.Optional[exp.Expression]: 605 expr = super()._parse_function( 606 functions=functions, 607 anonymous=anonymous, 608 optional_parens=optional_parens, 609 any_token=any_token, 610 ) 611 612 func = expr.this if isinstance(expr, exp.Window) else expr 613 614 # Aggregate functions can be split in 2 parts: <func_name><suffix> 615 parts = ( 616 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 617 ) 618 619 if parts: 620 params = self._parse_func_params(func) 621 622 kwargs = { 623 "this": func.this, 624 "expressions": func.expressions, 625 } 626 if parts[1]: 627 kwargs["parts"] = parts 628 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 629 else: 630 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 631 632 kwargs["exp_class"] = exp_class 633 if params: 634 kwargs["params"] = params 635 636 func = self.expression(**kwargs) 637 638 if isinstance(expr, exp.Window): 639 # The window's func was parsed as Anonymous in base parser, fix its 640 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 641 expr.set("this", func) 642 elif params: 643 # Params have blocked super()._parse_function() from parsing the following window 644 # (if that exists) as they're standing between the function call and the window spec 645 expr = self._parse_window(func) 646 else: 647 expr = func 648 649 return expr 650 651 def _parse_func_params( 652 self, this: t.Optional[exp.Func] = None 653 ) -> t.Optional[t.List[exp.Expression]]: 654 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 655 return self._parse_csv(self._parse_lambda) 656 657 if self._match(TokenType.L_PAREN): 658 params = self._parse_csv(self._parse_lambda) 659 self._match_r_paren(this) 660 return params 661 662 return None 663 664 def _parse_quantile(self) -> exp.Quantile: 665 this = self._parse_lambda() 666 params = self._parse_func_params() 667 if params: 668 return self.expression(exp.Quantile, this=params[0], quantile=this) 669 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 670 671 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 672 return super()._parse_wrapped_id_vars(optional=True) 673 674 def _parse_primary_key( 675 self, wrapped_optional: bool = False, in_props: bool = False 676 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 677 return super()._parse_primary_key( 678 wrapped_optional=wrapped_optional or in_props, in_props=in_props 679 ) 680 681 def _parse_on_property(self) -> t.Optional[exp.Expression]: 682 index = self._index 683 if self._match_text_seq("CLUSTER"): 684 this = self._parse_id_var() 685 if this: 686 return self.expression(exp.OnCluster, this=this) 687 else: 688 self._retreat(index) 689 return None 690 691 def _parse_index_constraint( 692 self, kind: t.Optional[str] = None 693 ) -> exp.IndexColumnConstraint: 694 # INDEX name1 expr TYPE type1(args) GRANULARITY value 695 this = self._parse_id_var() 696 expression = self._parse_assignment() 697 698 index_type = self._match_text_seq("TYPE") and ( 699 self._parse_function() or self._parse_var() 700 ) 701 702 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 703 704 return self.expression( 705 exp.IndexColumnConstraint, 706 this=this, 707 expression=expression, 708 index_type=index_type, 709 granularity=granularity, 710 ) 711 712 def _parse_partition(self) -> t.Optional[exp.Partition]: 713 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 714 if not self._match(TokenType.PARTITION): 715 return None 716 717 if self._match_text_seq("ID"): 718 # Corresponds to the PARTITION ID <string_value> syntax 719 expressions: t.List[exp.Expression] = [ 720 self.expression(exp.PartitionId, this=self._parse_string()) 721 ] 722 else: 723 expressions = self._parse_expressions() 724 725 return self.expression(exp.Partition, expressions=expressions) 726 727 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 728 partition = self._parse_partition() 729 730 if not partition or not self._match(TokenType.FROM): 731 return None 732 733 return self.expression( 734 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 735 ) 736 737 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 738 if not self._match_text_seq("PROJECTION"): 739 return None 740 741 return self.expression( 742 exp.ProjectionDef, 743 this=self._parse_id_var(), 744 expression=self._parse_wrapped(self._parse_statement), 745 ) 746 747 def _parse_constraint(self) -> t.Optional[exp.Expression]: 748 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
750 class Generator(generator.Generator): 751 QUERY_HINTS = False 752 STRUCT_DELIMITER = ("(", ")") 753 NVL2_SUPPORTED = False 754 TABLESAMPLE_REQUIRES_PARENS = False 755 TABLESAMPLE_SIZE_IS_ROWS = False 756 TABLESAMPLE_KEYWORDS = "SAMPLE" 757 LAST_DAY_SUPPORTS_DATE_PART = False 758 CAN_IMPLEMENT_ARRAY_ANY = True 759 SUPPORTS_TO_NUMBER = False 760 JOIN_HINTS = False 761 TABLE_HINTS = False 762 EXPLICIT_SET_OP = True 763 GROUPINGS_SEP = "" 764 SET_OP_MODIFIERS = False 765 SUPPORTS_TABLE_ALIAS_COLUMNS = False 766 VALUES_AS_TABLE = False 767 768 STRING_TYPE_MAPPING = { 769 exp.DataType.Type.CHAR: "String", 770 exp.DataType.Type.LONGBLOB: "String", 771 exp.DataType.Type.LONGTEXT: "String", 772 exp.DataType.Type.MEDIUMBLOB: "String", 773 exp.DataType.Type.MEDIUMTEXT: "String", 774 exp.DataType.Type.TINYBLOB: "String", 775 exp.DataType.Type.TINYTEXT: "String", 776 exp.DataType.Type.TEXT: "String", 777 exp.DataType.Type.VARBINARY: "String", 778 exp.DataType.Type.VARCHAR: "String", 779 } 780 781 SUPPORTED_JSON_PATH_PARTS = { 782 exp.JSONPathKey, 783 exp.JSONPathRoot, 784 exp.JSONPathSubscript, 785 } 786 787 TYPE_MAPPING = { 788 **generator.Generator.TYPE_MAPPING, 789 **STRING_TYPE_MAPPING, 790 exp.DataType.Type.ARRAY: "Array", 791 exp.DataType.Type.BIGINT: "Int64", 792 exp.DataType.Type.DATE32: "Date32", 793 exp.DataType.Type.DATETIME: "DateTime", 794 exp.DataType.Type.DATETIME64: "DateTime64", 795 exp.DataType.Type.TIMESTAMP: "DateTime", 796 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 797 exp.DataType.Type.DOUBLE: "Float64", 798 exp.DataType.Type.ENUM: "Enum", 799 exp.DataType.Type.ENUM8: "Enum8", 800 exp.DataType.Type.ENUM16: "Enum16", 801 exp.DataType.Type.FIXEDSTRING: "FixedString", 802 exp.DataType.Type.FLOAT: "Float32", 803 exp.DataType.Type.INT: "Int32", 804 exp.DataType.Type.MEDIUMINT: "Int32", 805 exp.DataType.Type.INT128: "Int128", 806 exp.DataType.Type.INT256: "Int256", 807 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 808 exp.DataType.Type.MAP: "Map", 809 exp.DataType.Type.NESTED: "Nested", 810 exp.DataType.Type.NULLABLE: "Nullable", 811 exp.DataType.Type.SMALLINT: "Int16", 812 exp.DataType.Type.STRUCT: "Tuple", 813 exp.DataType.Type.TINYINT: "Int8", 814 exp.DataType.Type.UBIGINT: "UInt64", 815 exp.DataType.Type.UINT: "UInt32", 816 exp.DataType.Type.UINT128: "UInt128", 817 exp.DataType.Type.UINT256: "UInt256", 818 exp.DataType.Type.USMALLINT: "UInt16", 819 exp.DataType.Type.UTINYINT: "UInt8", 820 exp.DataType.Type.IPV4: "IPv4", 821 exp.DataType.Type.IPV6: "IPv6", 822 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 823 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 824 } 825 826 TRANSFORMS = { 827 **generator.Generator.TRANSFORMS, 828 exp.AnyValue: rename_func("any"), 829 exp.ApproxDistinct: rename_func("uniq"), 830 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 831 exp.ArraySize: rename_func("LENGTH"), 832 exp.ArraySum: rename_func("arraySum"), 833 exp.ArgMax: arg_max_or_min_no_count("argMax"), 834 exp.ArgMin: arg_max_or_min_no_count("argMin"), 835 exp.Array: inline_array_sql, 836 exp.CastToStrType: rename_func("CAST"), 837 exp.CountIf: rename_func("countIf"), 838 exp.CompressColumnConstraint: lambda self, 839 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 840 exp.ComputedColumnConstraint: lambda self, 841 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 842 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 843 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 844 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 845 exp.DateStrToDate: rename_func("toDate"), 846 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 847 exp.Explode: rename_func("arrayJoin"), 848 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 849 exp.IsNan: rename_func("isNaN"), 850 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 851 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 852 exp.JSONPathKey: json_path_key_only_name, 853 exp.JSONPathRoot: lambda *_: "", 854 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 855 exp.Nullif: rename_func("nullIf"), 856 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 857 exp.Pivot: no_pivot_sql, 858 exp.Quantile: _quantile_sql, 859 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 860 exp.Rand: rename_func("randCanonical"), 861 exp.StartsWith: rename_func("startsWith"), 862 exp.StrPosition: lambda self, e: self.func( 863 "position", e.this, e.args.get("substr"), e.args.get("position") 864 ), 865 exp.TimeToStr: lambda self, e: self.func( 866 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 867 ), 868 exp.TimeStrToTime: _timestrtotime_sql, 869 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 870 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 871 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 872 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 873 exp.MD5Digest: rename_func("MD5"), 874 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 875 exp.SHA: rename_func("SHA1"), 876 exp.SHA2: sha256_sql, 877 exp.UnixToTime: _unix_to_time_sql, 878 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 879 exp.Variance: rename_func("varSamp"), 880 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 881 exp.Stddev: rename_func("stddevSamp"), 882 } 883 884 PROPERTIES_LOCATION = { 885 **generator.Generator.PROPERTIES_LOCATION, 886 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 887 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 888 exp.OnCluster: exp.Properties.Location.POST_NAME, 889 } 890 891 # There's no list in docs, but it can be found in Clickhouse code 892 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 893 ON_CLUSTER_TARGETS = { 894 "DATABASE", 895 "TABLE", 896 "VIEW", 897 "DICTIONARY", 898 "INDEX", 899 "FUNCTION", 900 "NAMED COLLECTION", 901 } 902 903 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 904 NON_NULLABLE_TYPES = { 905 exp.DataType.Type.ARRAY, 906 exp.DataType.Type.MAP, 907 exp.DataType.Type.NULLABLE, 908 exp.DataType.Type.STRUCT, 909 } 910 911 def strtodate_sql(self, expression: exp.StrToDate) -> str: 912 strtodate_sql = self.function_fallback_sql(expression) 913 914 if not isinstance(expression.parent, exp.Cast): 915 # StrToDate returns DATEs in other dialects (eg. postgres), so 916 # this branch aims to improve the transpilation to clickhouse 917 return f"CAST({strtodate_sql} AS DATE)" 918 919 return strtodate_sql 920 921 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 922 this = expression.this 923 924 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 925 return self.sql(this) 926 927 return super().cast_sql(expression, safe_prefix=safe_prefix) 928 929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 dtype = expression.to 931 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 932 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 933 dtype.set("nullable", True) 934 935 return super().cast_sql(expression) 936 937 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 938 this = self.json_path_part(expression.this) 939 return str(int(this) + 1) if is_int(this) else this 940 941 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 942 return f"AS {self.sql(expression, 'this')}" 943 944 def _any_to_has( 945 self, 946 expression: exp.EQ | exp.NEQ, 947 default: t.Callable[[t.Any], str], 948 prefix: str = "", 949 ) -> str: 950 if isinstance(expression.left, exp.Any): 951 arr = expression.left 952 this = expression.right 953 elif isinstance(expression.right, exp.Any): 954 arr = expression.right 955 this = expression.left 956 else: 957 return default(expression) 958 959 return prefix + self.func("has", arr.this.unnest(), this) 960 961 def eq_sql(self, expression: exp.EQ) -> str: 962 return self._any_to_has(expression, super().eq_sql) 963 964 def neq_sql(self, expression: exp.NEQ) -> str: 965 return self._any_to_has(expression, super().neq_sql, "NOT ") 966 967 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 968 # Manually add a flag to make the search case-insensitive 969 regex = self.func("CONCAT", "'(?i)'", expression.expression) 970 return self.func("match", expression.this, regex) 971 972 def datatype_sql(self, expression: exp.DataType) -> str: 973 # String is the standard ClickHouse type, every other variant is just an alias. 974 # Additionally, any supplied length parameter will be ignored. 975 # 976 # https://clickhouse.com/docs/en/sql-reference/data-types/string 977 if expression.this in self.STRING_TYPE_MAPPING: 978 dtype = "String" 979 else: 980 dtype = super().datatype_sql(expression) 981 982 # This section changes the type to `Nullable(...)` if the following conditions hold: 983 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 984 # and change their semantics 985 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 986 # constraint: "Type of Map key must be a type, that can be represented by integer or 987 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 988 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 989 parent = expression.parent 990 if ( 991 expression.args.get("nullable") is not False 992 and not ( 993 isinstance(parent, exp.DataType) 994 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 995 and expression.index in (None, 0) 996 ) 997 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 998 ): 999 dtype = f"Nullable({dtype})" 1000 1001 return dtype 1002 1003 def cte_sql(self, expression: exp.CTE) -> str: 1004 if expression.args.get("scalar"): 1005 this = self.sql(expression, "this") 1006 alias = self.sql(expression, "alias") 1007 return f"{this} AS {alias}" 1008 1009 return super().cte_sql(expression) 1010 1011 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1012 return super().after_limit_modifiers(expression) + [ 1013 ( 1014 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1015 if expression.args.get("settings") 1016 else "" 1017 ), 1018 ( 1019 self.seg("FORMAT ") + self.sql(expression, "format") 1020 if expression.args.get("format") 1021 else "" 1022 ), 1023 ] 1024 1025 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1026 params = self.expressions(expression, key="params", flat=True) 1027 return self.func(expression.name, *expression.expressions) + f"({params})" 1028 1029 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1030 return self.func(expression.name, *expression.expressions) 1031 1032 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1033 return self.anonymousaggfunc_sql(expression) 1034 1035 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1036 return self.parameterizedagg_sql(expression) 1037 1038 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1039 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1040 1041 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1042 return f"ON CLUSTER {self.sql(expression, 'this')}" 1043 1044 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1045 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1046 exp.Properties.Location.POST_NAME 1047 ): 1048 this_name = self.sql( 1049 expression.this if isinstance(expression.this, exp.Schema) else expression, 1050 "this", 1051 ) 1052 this_properties = " ".join( 1053 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1054 ) 1055 this_schema = self.schema_columns_sql(expression.this) 1056 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1057 1058 return super().createable_sql(expression, locations) 1059 1060 def create_sql(self, expression: exp.Create) -> str: 1061 # The comment property comes last in CTAS statements, i.e. after the query 1062 query = expression.expression 1063 if isinstance(query, exp.Query): 1064 comment_prop = expression.find(exp.SchemaCommentProperty) 1065 if comment_prop: 1066 comment_prop.pop() 1067 query.replace(exp.paren(query)) 1068 else: 1069 comment_prop = None 1070 1071 create_sql = super().create_sql(expression) 1072 1073 comment_sql = self.sql(comment_prop) 1074 comment_sql = f" {comment_sql}" if comment_sql else "" 1075 1076 return f"{create_sql}{comment_sql}" 1077 1078 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1079 this = self.indent(self.sql(expression, "this")) 1080 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1081 1082 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1083 this = self.sql(expression, "this") 1084 this = f" {this}" if this else "" 1085 expr = self.sql(expression, "expression") 1086 expr = f" {expr}" if expr else "" 1087 index_type = self.sql(expression, "index_type") 1088 index_type = f" TYPE {index_type}" if index_type else "" 1089 granularity = self.sql(expression, "granularity") 1090 granularity = f" GRANULARITY {granularity}" if granularity else "" 1091 1092 return f"INDEX{this}{expr}{index_type}{granularity}" 1093 1094 def partition_sql(self, expression: exp.Partition) -> str: 1095 return f"PARTITION {self.expressions(expression, flat=True)}" 1096 1097 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1098 return f"ID {self.sql(expression.this)}" 1099 1100 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1101 return ( 1102 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1103 ) 1104 1105 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1106 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
911 def strtodate_sql(self, expression: exp.StrToDate) -> str: 912 strtodate_sql = self.function_fallback_sql(expression) 913 914 if not isinstance(expression.parent, exp.Cast): 915 # StrToDate returns DATEs in other dialects (eg. postgres), so 916 # this branch aims to improve the transpilation to clickhouse 917 return f"CAST({strtodate_sql} AS DATE)" 918 919 return strtodate_sql
921 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 922 this = expression.this 923 924 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 925 return self.sql(this) 926 927 return super().cast_sql(expression, safe_prefix=safe_prefix)
929 def trycast_sql(self, expression: exp.TryCast) -> str: 930 dtype = expression.to 931 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 932 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 933 dtype.set("nullable", True) 934 935 return super().cast_sql(expression)
972 def datatype_sql(self, expression: exp.DataType) -> str: 973 # String is the standard ClickHouse type, every other variant is just an alias. 974 # Additionally, any supplied length parameter will be ignored. 975 # 976 # https://clickhouse.com/docs/en/sql-reference/data-types/string 977 if expression.this in self.STRING_TYPE_MAPPING: 978 dtype = "String" 979 else: 980 dtype = super().datatype_sql(expression) 981 982 # This section changes the type to `Nullable(...)` if the following conditions hold: 983 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 984 # and change their semantics 985 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 986 # constraint: "Type of Map key must be a type, that can be represented by integer or 987 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 988 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 989 parent = expression.parent 990 if ( 991 expression.args.get("nullable") is not False 992 and not ( 993 isinstance(parent, exp.DataType) 994 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 995 and expression.index in (None, 0) 996 ) 997 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 998 ): 999 dtype = f"Nullable({dtype})" 1000 1001 return dtype
1011 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1012 return super().after_limit_modifiers(expression) + [ 1013 ( 1014 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1015 if expression.args.get("settings") 1016 else "" 1017 ), 1018 ( 1019 self.seg("FORMAT ") + self.sql(expression, "format") 1020 if expression.args.get("format") 1021 else "" 1022 ), 1023 ]
1044 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1045 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1046 exp.Properties.Location.POST_NAME 1047 ): 1048 this_name = self.sql( 1049 expression.this if isinstance(expression.this, exp.Schema) else expression, 1050 "this", 1051 ) 1052 this_properties = " ".join( 1053 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1054 ) 1055 this_schema = self.schema_columns_sql(expression.this) 1056 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1057 1058 return super().createable_sql(expression, locations)
1060 def create_sql(self, expression: exp.Create) -> str: 1061 # The comment property comes last in CTAS statements, i.e. after the query 1062 query = expression.expression 1063 if isinstance(query, exp.Query): 1064 comment_prop = expression.find(exp.SchemaCommentProperty) 1065 if comment_prop: 1066 comment_prop.pop() 1067 query.replace(exp.paren(query)) 1068 else: 1069 comment_prop = None 1070 1071 create_sql = super().create_sql(expression) 1072 1073 comment_sql = self.sql(comment_prop) 1074 comment_sql = f" {comment_sql}" if comment_sql else "" 1075 1076 return f"{create_sql}{comment_sql}"
1082 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1083 this = self.sql(expression, "this") 1084 this = f" {this}" if this else "" 1085 expr = self.sql(expression, "expression") 1086 expr = f" {expr}" if expr else "" 1087 index_type = self.sql(expression, "index_type") 1088 index_type = f" TYPE {index_type}" if index_type else "" 1089 granularity = self.sql(expression, "granularity") 1090 granularity = f" GRANULARITY {granularity}" if granularity else "" 1091 1092 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_NULLABLE_TYPES
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql