sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.VOLATILE, 317 TokenType.WINDOW, 318 *CREATABLES, 319 *SUBQUERY_PREDICATES, 320 *TYPE_TOKENS, 321 *NO_PAREN_FUNCTIONS, 322 } 323 324 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 325 326 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 327 TokenType.ANTI, 328 TokenType.APPLY, 329 TokenType.ASOF, 330 TokenType.FULL, 331 TokenType.LEFT, 332 TokenType.LOCK, 333 TokenType.NATURAL, 334 TokenType.OFFSET, 335 TokenType.RIGHT, 336 TokenType.SEMI, 337 TokenType.WINDOW, 338 } 339 340 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 341 342 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 343 344 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 345 346 FUNC_TOKENS = { 347 TokenType.COLLATE, 348 TokenType.COMMAND, 349 TokenType.CURRENT_DATE, 350 TokenType.CURRENT_DATETIME, 351 TokenType.CURRENT_TIMESTAMP, 352 TokenType.CURRENT_TIME, 353 TokenType.CURRENT_USER, 354 TokenType.FILTER, 355 TokenType.FIRST, 356 TokenType.FORMAT, 357 TokenType.GLOB, 358 TokenType.IDENTIFIER, 359 TokenType.INDEX, 360 TokenType.ISNULL, 361 TokenType.ILIKE, 362 TokenType.INSERT, 363 TokenType.LIKE, 364 TokenType.MERGE, 365 TokenType.OFFSET, 366 TokenType.PRIMARY_KEY, 367 TokenType.RANGE, 368 TokenType.REPLACE, 369 TokenType.RLIKE, 370 TokenType.ROW, 371 TokenType.UNNEST, 372 TokenType.VAR, 373 TokenType.LEFT, 374 TokenType.RIGHT, 375 TokenType.DATE, 376 TokenType.DATETIME, 377 TokenType.TABLE, 378 TokenType.TIMESTAMP, 379 TokenType.TIMESTAMPTZ, 380 TokenType.WINDOW, 381 TokenType.XOR, 382 *TYPE_TOKENS, 383 *SUBQUERY_PREDICATES, 384 } 385 386 CONJUNCTION = { 387 TokenType.AND: exp.And, 388 TokenType.OR: exp.Or, 389 } 390 391 EQUALITY = { 392 TokenType.EQ: exp.EQ, 393 TokenType.NEQ: exp.NEQ, 394 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 395 } 396 397 COMPARISON = { 398 TokenType.GT: exp.GT, 399 TokenType.GTE: exp.GTE, 400 TokenType.LT: exp.LT, 401 TokenType.LTE: exp.LTE, 402 } 403 404 BITWISE = { 405 TokenType.AMP: exp.BitwiseAnd, 406 TokenType.CARET: exp.BitwiseXor, 407 TokenType.PIPE: exp.BitwiseOr, 408 TokenType.DPIPE: exp.DPipe, 409 } 410 411 TERM = { 412 TokenType.DASH: exp.Sub, 413 TokenType.PLUS: exp.Add, 414 TokenType.MOD: exp.Mod, 415 TokenType.COLLATE: exp.Collate, 416 } 417 418 FACTOR = { 419 TokenType.DIV: exp.IntDiv, 420 TokenType.LR_ARROW: exp.Distance, 421 TokenType.SLASH: exp.Div, 422 TokenType.STAR: exp.Mul, 423 } 424 425 TIMES = { 426 TokenType.TIME, 427 TokenType.TIMETZ, 428 } 429 430 TIMESTAMPS = { 431 TokenType.TIMESTAMP, 432 TokenType.TIMESTAMPTZ, 433 TokenType.TIMESTAMPLTZ, 434 *TIMES, 435 } 436 437 SET_OPERATIONS = { 438 TokenType.UNION, 439 TokenType.INTERSECT, 440 TokenType.EXCEPT, 441 } 442 443 JOIN_METHODS = { 444 TokenType.NATURAL, 445 TokenType.ASOF, 446 } 447 448 JOIN_SIDES = { 449 TokenType.LEFT, 450 TokenType.RIGHT, 451 TokenType.FULL, 452 } 453 454 JOIN_KINDS = { 455 TokenType.INNER, 456 TokenType.OUTER, 457 TokenType.CROSS, 458 TokenType.SEMI, 459 TokenType.ANTI, 460 } 461 462 JOIN_HINTS: t.Set[str] = set() 463 464 LAMBDAS = { 465 TokenType.ARROW: lambda self, expressions: self.expression( 466 exp.Lambda, 467 this=self._replace_lambda( 468 self._parse_conjunction(), 469 {node.name for node in expressions}, 470 ), 471 expressions=expressions, 472 ), 473 TokenType.FARROW: lambda self, expressions: self.expression( 474 exp.Kwarg, 475 this=exp.var(expressions[0].name), 476 expression=self._parse_conjunction(), 477 ), 478 } 479 480 COLUMN_OPERATORS = { 481 TokenType.DOT: None, 482 TokenType.DCOLON: lambda self, this, to: self.expression( 483 exp.Cast if self.STRICT_CAST else exp.TryCast, 484 this=this, 485 to=to, 486 ), 487 TokenType.ARROW: lambda self, this, path: self.expression( 488 exp.JSONExtract, 489 this=this, 490 expression=path, 491 ), 492 TokenType.DARROW: lambda self, this, path: self.expression( 493 exp.JSONExtractScalar, 494 this=this, 495 expression=path, 496 ), 497 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 498 exp.JSONBExtract, 499 this=this, 500 expression=path, 501 ), 502 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 503 exp.JSONBExtractScalar, 504 this=this, 505 expression=path, 506 ), 507 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 508 exp.JSONBContains, 509 this=this, 510 expression=key, 511 ), 512 } 513 514 EXPRESSION_PARSERS = { 515 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 516 exp.Column: lambda self: self._parse_column(), 517 exp.Condition: lambda self: self._parse_conjunction(), 518 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 519 exp.Expression: lambda self: self._parse_statement(), 520 exp.From: lambda self: self._parse_from(), 521 exp.Group: lambda self: self._parse_group(), 522 exp.Having: lambda self: self._parse_having(), 523 exp.Identifier: lambda self: self._parse_id_var(), 524 exp.Join: lambda self: self._parse_join(), 525 exp.Lambda: lambda self: self._parse_lambda(), 526 exp.Lateral: lambda self: self._parse_lateral(), 527 exp.Limit: lambda self: self._parse_limit(), 528 exp.Offset: lambda self: self._parse_offset(), 529 exp.Order: lambda self: self._parse_order(), 530 exp.Ordered: lambda self: self._parse_ordered(), 531 exp.Properties: lambda self: self._parse_properties(), 532 exp.Qualify: lambda self: self._parse_qualify(), 533 exp.Returning: lambda self: self._parse_returning(), 534 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 535 exp.Table: lambda self: self._parse_table_parts(), 536 exp.TableAlias: lambda self: self._parse_table_alias(), 537 exp.Where: lambda self: self._parse_where(), 538 exp.Window: lambda self: self._parse_named_window(), 539 exp.With: lambda self: self._parse_with(), 540 "JOIN_TYPE": lambda self: self._parse_join_parts(), 541 } 542 543 STATEMENT_PARSERS = { 544 TokenType.ALTER: lambda self: self._parse_alter(), 545 TokenType.BEGIN: lambda self: self._parse_transaction(), 546 TokenType.CACHE: lambda self: self._parse_cache(), 547 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 548 TokenType.COMMENT: lambda self: self._parse_comment(), 549 TokenType.CREATE: lambda self: self._parse_create(), 550 TokenType.DELETE: lambda self: self._parse_delete(), 551 TokenType.DESC: lambda self: self._parse_describe(), 552 TokenType.DESCRIBE: lambda self: self._parse_describe(), 553 TokenType.DROP: lambda self: self._parse_drop(), 554 TokenType.INSERT: lambda self: self._parse_insert(), 555 TokenType.KILL: lambda self: self._parse_kill(), 556 TokenType.LOAD: lambda self: self._parse_load(), 557 TokenType.MERGE: lambda self: self._parse_merge(), 558 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 559 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 560 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 561 TokenType.SET: lambda self: self._parse_set(), 562 TokenType.UNCACHE: lambda self: self._parse_uncache(), 563 TokenType.UPDATE: lambda self: self._parse_update(), 564 TokenType.USE: lambda self: self.expression( 565 exp.Use, 566 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 567 and exp.var(self._prev.text), 568 this=self._parse_table(schema=False), 569 ), 570 } 571 572 UNARY_PARSERS = { 573 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 574 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 575 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 576 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 577 } 578 579 PRIMARY_PARSERS = { 580 TokenType.STRING: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=True 582 ), 583 TokenType.NUMBER: lambda self, token: self.expression( 584 exp.Literal, this=token.text, is_string=False 585 ), 586 TokenType.STAR: lambda self, _: self.expression( 587 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 588 ), 589 TokenType.NULL: lambda self, _: self.expression(exp.Null), 590 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 591 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 592 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 593 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 594 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 595 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 596 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 597 exp.National, this=token.text 598 ), 599 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 600 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 601 exp.RawString, this=token.text 602 ), 603 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 604 } 605 606 PLACEHOLDER_PARSERS = { 607 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 608 TokenType.PARAMETER: lambda self: self._parse_parameter(), 609 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 610 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 611 else None, 612 } 613 614 RANGE_PARSERS = { 615 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 616 TokenType.GLOB: binary_range_parser(exp.Glob), 617 TokenType.ILIKE: binary_range_parser(exp.ILike), 618 TokenType.IN: lambda self, this: self._parse_in(this), 619 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 620 TokenType.IS: lambda self, this: self._parse_is(this), 621 TokenType.LIKE: binary_range_parser(exp.Like), 622 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 623 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 624 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 625 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 626 } 627 628 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 629 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 630 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 631 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 632 "CHARACTER SET": lambda self: self._parse_character_set(), 633 "CHECKSUM": lambda self: self._parse_checksum(), 634 "CLUSTER BY": lambda self: self._parse_cluster(), 635 "CLUSTERED": lambda self: self._parse_clustered_by(), 636 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 637 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 638 "COPY": lambda self: self._parse_copy_property(), 639 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 640 "DEFINER": lambda self: self._parse_definer(), 641 "DETERMINISTIC": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 643 ), 644 "DISTKEY": lambda self: self._parse_distkey(), 645 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 646 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 647 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 648 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 649 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 650 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 651 "FREESPACE": lambda self: self._parse_freespace(), 652 "HEAP": lambda self: self.expression(exp.HeapProperty), 653 "IMMUTABLE": lambda self: self.expression( 654 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 655 ), 656 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 657 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 658 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 659 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 660 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 661 "LIKE": lambda self: self._parse_create_like(), 662 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 663 "LOCK": lambda self: self._parse_locking(), 664 "LOCKING": lambda self: self._parse_locking(), 665 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 666 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 667 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 668 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 669 "NO": lambda self: self._parse_no_property(), 670 "ON": lambda self: self._parse_on_property(), 671 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 672 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 673 "PARTITION BY": lambda self: self._parse_partitioned_by(), 674 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 676 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 677 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 678 "REMOTE": lambda self: self._parse_remote_with_connection(), 679 "RETURNS": lambda self: self._parse_returns(), 680 "ROW": lambda self: self._parse_row(), 681 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 682 "SAMPLE": lambda self: self.expression( 683 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 684 ), 685 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 686 "SETTINGS": lambda self: self.expression( 687 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 688 ), 689 "SORTKEY": lambda self: self._parse_sortkey(), 690 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 691 "STABLE": lambda self: self.expression( 692 exp.StabilityProperty, this=exp.Literal.string("STABLE") 693 ), 694 "STORED": lambda self: self._parse_stored(), 695 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 696 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 697 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 698 "TO": lambda self: self._parse_to_table(), 699 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 700 "TRANSFORM": lambda self: self.expression( 701 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 702 ), 703 "TTL": lambda self: self._parse_ttl(), 704 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 705 "VOLATILE": lambda self: self._parse_volatile_property(), 706 "WITH": lambda self: self._parse_with_property(), 707 } 708 709 CONSTRAINT_PARSERS = { 710 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 711 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 712 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 713 "CHARACTER SET": lambda self: self.expression( 714 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 715 ), 716 "CHECK": lambda self: self.expression( 717 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 718 ), 719 "COLLATE": lambda self: self.expression( 720 exp.CollateColumnConstraint, this=self._parse_var() 721 ), 722 "COMMENT": lambda self: self.expression( 723 exp.CommentColumnConstraint, this=self._parse_string() 724 ), 725 "COMPRESS": lambda self: self._parse_compress(), 726 "CLUSTERED": lambda self: self.expression( 727 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 728 ), 729 "NONCLUSTERED": lambda self: self.expression( 730 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 731 ), 732 "DEFAULT": lambda self: self.expression( 733 exp.DefaultColumnConstraint, this=self._parse_bitwise() 734 ), 735 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 736 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 737 "FORMAT": lambda self: self.expression( 738 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 739 ), 740 "GENERATED": lambda self: self._parse_generated_as_identity(), 741 "IDENTITY": lambda self: self._parse_auto_increment(), 742 "INLINE": lambda self: self._parse_inline(), 743 "LIKE": lambda self: self._parse_create_like(), 744 "NOT": lambda self: self._parse_not_constraint(), 745 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 746 "ON": lambda self: ( 747 self._match(TokenType.UPDATE) 748 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 749 ) 750 or self.expression(exp.OnProperty, this=self._parse_id_var()), 751 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 752 "PRIMARY KEY": lambda self: self._parse_primary_key(), 753 "REFERENCES": lambda self: self._parse_references(match=False), 754 "TITLE": lambda self: self.expression( 755 exp.TitleColumnConstraint, this=self._parse_var_or_string() 756 ), 757 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 758 "UNIQUE": lambda self: self._parse_unique(), 759 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 760 "WITH": lambda self: self.expression( 761 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 762 ), 763 } 764 765 ALTER_PARSERS = { 766 "ADD": lambda self: self._parse_alter_table_add(), 767 "ALTER": lambda self: self._parse_alter_table_alter(), 768 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 769 "DROP": lambda self: self._parse_alter_table_drop(), 770 "RENAME": lambda self: self._parse_alter_table_rename(), 771 } 772 773 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 774 775 NO_PAREN_FUNCTION_PARSERS = { 776 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 777 "CASE": lambda self: self._parse_case(), 778 "IF": lambda self: self._parse_if(), 779 "NEXT": lambda self: self._parse_next_value_for(), 780 } 781 782 INVALID_FUNC_NAME_TOKENS = { 783 TokenType.IDENTIFIER, 784 TokenType.STRING, 785 } 786 787 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 788 789 FUNCTION_PARSERS = { 790 "ANY_VALUE": lambda self: self._parse_any_value(), 791 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 792 "CONCAT": lambda self: self._parse_concat(), 793 "CONCAT_WS": lambda self: self._parse_concat_ws(), 794 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 795 "DECODE": lambda self: self._parse_decode(), 796 "EXTRACT": lambda self: self._parse_extract(), 797 "JSON_OBJECT": lambda self: self._parse_json_object(), 798 "JSON_TABLE": lambda self: self._parse_json_table(), 799 "LOG": lambda self: self._parse_logarithm(), 800 "MATCH": lambda self: self._parse_match_against(), 801 "OPENJSON": lambda self: self._parse_open_json(), 802 "POSITION": lambda self: self._parse_position(), 803 "PREDICT": lambda self: self._parse_predict(), 804 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 805 "STRING_AGG": lambda self: self._parse_string_agg(), 806 "SUBSTRING": lambda self: self._parse_substring(), 807 "TRIM": lambda self: self._parse_trim(), 808 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 809 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 810 } 811 812 QUERY_MODIFIER_PARSERS = { 813 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 814 TokenType.WHERE: lambda self: ("where", self._parse_where()), 815 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 816 TokenType.HAVING: lambda self: ("having", self._parse_having()), 817 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 818 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 819 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 820 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 821 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 822 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 823 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 824 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 825 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 826 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.CLUSTER_BY: lambda self: ( 828 "cluster", 829 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 830 ), 831 TokenType.DISTRIBUTE_BY: lambda self: ( 832 "distribute", 833 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 834 ), 835 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 836 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 837 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 838 } 839 840 SET_PARSERS = { 841 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 842 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 843 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 844 "TRANSACTION": lambda self: self._parse_set_transaction(), 845 } 846 847 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 848 849 TYPE_LITERAL_PARSERS = { 850 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 851 } 852 853 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 854 855 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 856 857 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 858 859 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 860 TRANSACTION_CHARACTERISTICS = { 861 "ISOLATION LEVEL REPEATABLE READ", 862 "ISOLATION LEVEL READ COMMITTED", 863 "ISOLATION LEVEL READ UNCOMMITTED", 864 "ISOLATION LEVEL SERIALIZABLE", 865 "READ WRITE", 866 "READ ONLY", 867 } 868 869 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 870 871 CLONE_KEYWORDS = {"CLONE", "COPY"} 872 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 873 874 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 875 876 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 877 878 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 879 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 880 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 881 882 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 883 884 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 885 886 DISTINCT_TOKENS = {TokenType.DISTINCT} 887 888 NULL_TOKENS = {TokenType.NULL} 889 890 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 891 892 STRICT_CAST = True 893 894 # A NULL arg in CONCAT yields NULL by default 895 CONCAT_NULL_OUTPUTS_STRING = False 896 897 PREFIXED_PIVOT_COLUMNS = False 898 IDENTIFY_PIVOT_STRINGS = False 899 900 LOG_BASE_FIRST = True 901 LOG_DEFAULTS_TO_LN = False 902 903 # Whether or not ADD is present for each column added by ALTER TABLE 904 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 905 906 # Whether or not the table sample clause expects CSV syntax 907 TABLESAMPLE_CSV = False 908 909 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 910 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 911 912 # Whether the TRIM function expects the characters to trim as its first argument 913 TRIM_PATTERN_FIRST = False 914 915 __slots__ = ( 916 "error_level", 917 "error_message_context", 918 "max_errors", 919 "sql", 920 "errors", 921 "_tokens", 922 "_index", 923 "_curr", 924 "_next", 925 "_prev", 926 "_prev_comments", 927 "_tokenizer", 928 ) 929 930 # Autofilled 931 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 932 INDEX_OFFSET: int = 0 933 UNNEST_COLUMN_ONLY: bool = False 934 ALIAS_POST_TABLESAMPLE: bool = False 935 STRICT_STRING_CONCAT = False 936 SUPPORTS_USER_DEFINED_TYPES = True 937 NORMALIZE_FUNCTIONS = "upper" 938 NULL_ORDERING: str = "nulls_are_small" 939 SHOW_TRIE: t.Dict = {} 940 SET_TRIE: t.Dict = {} 941 FORMAT_MAPPING: t.Dict[str, str] = {} 942 FORMAT_TRIE: t.Dict = {} 943 TIME_MAPPING: t.Dict[str, str] = {} 944 TIME_TRIE: t.Dict = {} 945 946 def __init__( 947 self, 948 error_level: t.Optional[ErrorLevel] = None, 949 error_message_context: int = 100, 950 max_errors: int = 3, 951 ): 952 self.error_level = error_level or ErrorLevel.IMMEDIATE 953 self.error_message_context = error_message_context 954 self.max_errors = max_errors 955 self._tokenizer = self.TOKENIZER_CLASS() 956 self.reset() 957 958 def reset(self): 959 self.sql = "" 960 self.errors = [] 961 self._tokens = [] 962 self._index = 0 963 self._curr = None 964 self._next = None 965 self._prev = None 966 self._prev_comments = None 967 968 def parse( 969 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 970 ) -> t.List[t.Optional[exp.Expression]]: 971 """ 972 Parses a list of tokens and returns a list of syntax trees, one tree 973 per parsed SQL statement. 974 975 Args: 976 raw_tokens: The list of tokens. 977 sql: The original SQL string, used to produce helpful debug messages. 978 979 Returns: 980 The list of the produced syntax trees. 981 """ 982 return self._parse( 983 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 984 ) 985 986 def parse_into( 987 self, 988 expression_types: exp.IntoType, 989 raw_tokens: t.List[Token], 990 sql: t.Optional[str] = None, 991 ) -> t.List[t.Optional[exp.Expression]]: 992 """ 993 Parses a list of tokens into a given Expression type. If a collection of Expression 994 types is given instead, this method will try to parse the token list into each one 995 of them, stopping at the first for which the parsing succeeds. 996 997 Args: 998 expression_types: The expression type(s) to try and parse the token list into. 999 raw_tokens: The list of tokens. 1000 sql: The original SQL string, used to produce helpful debug messages. 1001 1002 Returns: 1003 The target Expression. 1004 """ 1005 errors = [] 1006 for expression_type in ensure_list(expression_types): 1007 parser = self.EXPRESSION_PARSERS.get(expression_type) 1008 if not parser: 1009 raise TypeError(f"No parser registered for {expression_type}") 1010 1011 try: 1012 return self._parse(parser, raw_tokens, sql) 1013 except ParseError as e: 1014 e.errors[0]["into_expression"] = expression_type 1015 errors.append(e) 1016 1017 raise ParseError( 1018 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1019 errors=merge_errors(errors), 1020 ) from errors[-1] 1021 1022 def _parse( 1023 self, 1024 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1025 raw_tokens: t.List[Token], 1026 sql: t.Optional[str] = None, 1027 ) -> t.List[t.Optional[exp.Expression]]: 1028 self.reset() 1029 self.sql = sql or "" 1030 1031 total = len(raw_tokens) 1032 chunks: t.List[t.List[Token]] = [[]] 1033 1034 for i, token in enumerate(raw_tokens): 1035 if token.token_type == TokenType.SEMICOLON: 1036 if i < total - 1: 1037 chunks.append([]) 1038 else: 1039 chunks[-1].append(token) 1040 1041 expressions = [] 1042 1043 for tokens in chunks: 1044 self._index = -1 1045 self._tokens = tokens 1046 self._advance() 1047 1048 expressions.append(parse_method(self)) 1049 1050 if self._index < len(self._tokens): 1051 self.raise_error("Invalid expression / Unexpected token") 1052 1053 self.check_errors() 1054 1055 return expressions 1056 1057 def check_errors(self) -> None: 1058 """Logs or raises any found errors, depending on the chosen error level setting.""" 1059 if self.error_level == ErrorLevel.WARN: 1060 for error in self.errors: 1061 logger.error(str(error)) 1062 elif self.error_level == ErrorLevel.RAISE and self.errors: 1063 raise ParseError( 1064 concat_messages(self.errors, self.max_errors), 1065 errors=merge_errors(self.errors), 1066 ) 1067 1068 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1069 """ 1070 Appends an error in the list of recorded errors or raises it, depending on the chosen 1071 error level setting. 1072 """ 1073 token = token or self._curr or self._prev or Token.string("") 1074 start = token.start 1075 end = token.end + 1 1076 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1077 highlight = self.sql[start:end] 1078 end_context = self.sql[end : end + self.error_message_context] 1079 1080 error = ParseError.new( 1081 f"{message}. Line {token.line}, Col: {token.col}.\n" 1082 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1083 description=message, 1084 line=token.line, 1085 col=token.col, 1086 start_context=start_context, 1087 highlight=highlight, 1088 end_context=end_context, 1089 ) 1090 1091 if self.error_level == ErrorLevel.IMMEDIATE: 1092 raise error 1093 1094 self.errors.append(error) 1095 1096 def expression( 1097 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1098 ) -> E: 1099 """ 1100 Creates a new, validated Expression. 1101 1102 Args: 1103 exp_class: The expression class to instantiate. 1104 comments: An optional list of comments to attach to the expression. 1105 kwargs: The arguments to set for the expression along with their respective values. 1106 1107 Returns: 1108 The target expression. 1109 """ 1110 instance = exp_class(**kwargs) 1111 instance.add_comments(comments) if comments else self._add_comments(instance) 1112 return self.validate_expression(instance) 1113 1114 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1115 if expression and self._prev_comments: 1116 expression.add_comments(self._prev_comments) 1117 self._prev_comments = None 1118 1119 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1120 """ 1121 Validates an Expression, making sure that all its mandatory arguments are set. 1122 1123 Args: 1124 expression: The expression to validate. 1125 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1126 1127 Returns: 1128 The validated expression. 1129 """ 1130 if self.error_level != ErrorLevel.IGNORE: 1131 for error_message in expression.error_messages(args): 1132 self.raise_error(error_message) 1133 1134 return expression 1135 1136 def _find_sql(self, start: Token, end: Token) -> str: 1137 return self.sql[start.start : end.end + 1] 1138 1139 def _advance(self, times: int = 1) -> None: 1140 self._index += times 1141 self._curr = seq_get(self._tokens, self._index) 1142 self._next = seq_get(self._tokens, self._index + 1) 1143 1144 if self._index > 0: 1145 self._prev = self._tokens[self._index - 1] 1146 self._prev_comments = self._prev.comments 1147 else: 1148 self._prev = None 1149 self._prev_comments = None 1150 1151 def _retreat(self, index: int) -> None: 1152 if index != self._index: 1153 self._advance(index - self._index) 1154 1155 def _parse_command(self) -> exp.Command: 1156 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1157 1158 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1159 start = self._prev 1160 exists = self._parse_exists() if allow_exists else None 1161 1162 self._match(TokenType.ON) 1163 1164 kind = self._match_set(self.CREATABLES) and self._prev 1165 if not kind: 1166 return self._parse_as_command(start) 1167 1168 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1169 this = self._parse_user_defined_function(kind=kind.token_type) 1170 elif kind.token_type == TokenType.TABLE: 1171 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1172 elif kind.token_type == TokenType.COLUMN: 1173 this = self._parse_column() 1174 else: 1175 this = self._parse_id_var() 1176 1177 self._match(TokenType.IS) 1178 1179 return self.expression( 1180 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1181 ) 1182 1183 def _parse_to_table( 1184 self, 1185 ) -> exp.ToTableProperty: 1186 table = self._parse_table_parts(schema=True) 1187 return self.expression(exp.ToTableProperty, this=table) 1188 1189 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1190 def _parse_ttl(self) -> exp.Expression: 1191 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1192 this = self._parse_bitwise() 1193 1194 if self._match_text_seq("DELETE"): 1195 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1196 if self._match_text_seq("RECOMPRESS"): 1197 return self.expression( 1198 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1199 ) 1200 if self._match_text_seq("TO", "DISK"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1203 ) 1204 if self._match_text_seq("TO", "VOLUME"): 1205 return self.expression( 1206 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1207 ) 1208 1209 return this 1210 1211 expressions = self._parse_csv(_parse_ttl_action) 1212 where = self._parse_where() 1213 group = self._parse_group() 1214 1215 aggregates = None 1216 if group and self._match(TokenType.SET): 1217 aggregates = self._parse_csv(self._parse_set_item) 1218 1219 return self.expression( 1220 exp.MergeTreeTTL, 1221 expressions=expressions, 1222 where=where, 1223 group=group, 1224 aggregates=aggregates, 1225 ) 1226 1227 def _parse_statement(self) -> t.Optional[exp.Expression]: 1228 if self._curr is None: 1229 return None 1230 1231 if self._match_set(self.STATEMENT_PARSERS): 1232 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1233 1234 if self._match_set(Tokenizer.COMMANDS): 1235 return self._parse_command() 1236 1237 expression = self._parse_expression() 1238 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1239 return self._parse_query_modifiers(expression) 1240 1241 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1242 start = self._prev 1243 temporary = self._match(TokenType.TEMPORARY) 1244 materialized = self._match_text_seq("MATERIALIZED") 1245 1246 kind = self._match_set(self.CREATABLES) and self._prev.text 1247 if not kind: 1248 return self._parse_as_command(start) 1249 1250 return self.expression( 1251 exp.Drop, 1252 comments=start.comments, 1253 exists=exists or self._parse_exists(), 1254 this=self._parse_table(schema=True), 1255 kind=kind, 1256 temporary=temporary, 1257 materialized=materialized, 1258 cascade=self._match_text_seq("CASCADE"), 1259 constraints=self._match_text_seq("CONSTRAINTS"), 1260 purge=self._match_text_seq("PURGE"), 1261 ) 1262 1263 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1264 return ( 1265 self._match_text_seq("IF") 1266 and (not not_ or self._match(TokenType.NOT)) 1267 and self._match(TokenType.EXISTS) 1268 ) 1269 1270 def _parse_create(self) -> exp.Create | exp.Command: 1271 # Note: this can't be None because we've matched a statement parser 1272 start = self._prev 1273 comments = self._prev_comments 1274 1275 replace = start.text.upper() == "REPLACE" or self._match_pair( 1276 TokenType.OR, TokenType.REPLACE 1277 ) 1278 unique = self._match(TokenType.UNIQUE) 1279 1280 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1281 self._advance() 1282 1283 properties = None 1284 create_token = self._match_set(self.CREATABLES) and self._prev 1285 1286 if not create_token: 1287 # exp.Properties.Location.POST_CREATE 1288 properties = self._parse_properties() 1289 create_token = self._match_set(self.CREATABLES) and self._prev 1290 1291 if not properties or not create_token: 1292 return self._parse_as_command(start) 1293 1294 exists = self._parse_exists(not_=True) 1295 this = None 1296 expression: t.Optional[exp.Expression] = None 1297 indexes = None 1298 no_schema_binding = None 1299 begin = None 1300 end = None 1301 clone = None 1302 1303 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1304 nonlocal properties 1305 if properties and temp_props: 1306 properties.expressions.extend(temp_props.expressions) 1307 elif temp_props: 1308 properties = temp_props 1309 1310 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1311 this = self._parse_user_defined_function(kind=create_token.token_type) 1312 1313 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1314 extend_props(self._parse_properties()) 1315 1316 self._match(TokenType.ALIAS) 1317 1318 if self._match(TokenType.COMMAND): 1319 expression = self._parse_as_command(self._prev) 1320 else: 1321 begin = self._match(TokenType.BEGIN) 1322 return_ = self._match_text_seq("RETURN") 1323 1324 if self._match(TokenType.STRING, advance=False): 1325 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1326 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1327 expression = self._parse_string() 1328 extend_props(self._parse_properties()) 1329 else: 1330 expression = self._parse_statement() 1331 1332 end = self._match_text_seq("END") 1333 1334 if return_: 1335 expression = self.expression(exp.Return, this=expression) 1336 elif create_token.token_type == TokenType.INDEX: 1337 this = self._parse_index(index=self._parse_id_var()) 1338 elif create_token.token_type in self.DB_CREATABLES: 1339 table_parts = self._parse_table_parts(schema=True) 1340 1341 # exp.Properties.Location.POST_NAME 1342 self._match(TokenType.COMMA) 1343 extend_props(self._parse_properties(before=True)) 1344 1345 this = self._parse_schema(this=table_parts) 1346 1347 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1348 extend_props(self._parse_properties()) 1349 1350 self._match(TokenType.ALIAS) 1351 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1352 # exp.Properties.Location.POST_ALIAS 1353 extend_props(self._parse_properties()) 1354 1355 expression = self._parse_ddl_select() 1356 1357 if create_token.token_type == TokenType.TABLE: 1358 # exp.Properties.Location.POST_EXPRESSION 1359 extend_props(self._parse_properties()) 1360 1361 indexes = [] 1362 while True: 1363 index = self._parse_index() 1364 1365 # exp.Properties.Location.POST_INDEX 1366 extend_props(self._parse_properties()) 1367 1368 if not index: 1369 break 1370 else: 1371 self._match(TokenType.COMMA) 1372 indexes.append(index) 1373 elif create_token.token_type == TokenType.VIEW: 1374 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1375 no_schema_binding = True 1376 1377 shallow = self._match_text_seq("SHALLOW") 1378 1379 if self._match_texts(self.CLONE_KEYWORDS): 1380 copy = self._prev.text.lower() == "copy" 1381 clone = self._parse_table(schema=True) 1382 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1383 clone_kind = ( 1384 self._match(TokenType.L_PAREN) 1385 and self._match_texts(self.CLONE_KINDS) 1386 and self._prev.text.upper() 1387 ) 1388 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1389 self._match(TokenType.R_PAREN) 1390 clone = self.expression( 1391 exp.Clone, 1392 this=clone, 1393 when=when, 1394 kind=clone_kind, 1395 shallow=shallow, 1396 expression=clone_expression, 1397 copy=copy, 1398 ) 1399 1400 return self.expression( 1401 exp.Create, 1402 comments=comments, 1403 this=this, 1404 kind=create_token.text, 1405 replace=replace, 1406 unique=unique, 1407 expression=expression, 1408 exists=exists, 1409 properties=properties, 1410 indexes=indexes, 1411 no_schema_binding=no_schema_binding, 1412 begin=begin, 1413 end=end, 1414 clone=clone, 1415 ) 1416 1417 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1418 # only used for teradata currently 1419 self._match(TokenType.COMMA) 1420 1421 kwargs = { 1422 "no": self._match_text_seq("NO"), 1423 "dual": self._match_text_seq("DUAL"), 1424 "before": self._match_text_seq("BEFORE"), 1425 "default": self._match_text_seq("DEFAULT"), 1426 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1427 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1428 "after": self._match_text_seq("AFTER"), 1429 "minimum": self._match_texts(("MIN", "MINIMUM")), 1430 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1431 } 1432 1433 if self._match_texts(self.PROPERTY_PARSERS): 1434 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1435 try: 1436 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1437 except TypeError: 1438 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1439 1440 return None 1441 1442 def _parse_property(self) -> t.Optional[exp.Expression]: 1443 if self._match_texts(self.PROPERTY_PARSERS): 1444 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1445 1446 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1447 return self._parse_character_set(default=True) 1448 1449 if self._match_text_seq("COMPOUND", "SORTKEY"): 1450 return self._parse_sortkey(compound=True) 1451 1452 if self._match_text_seq("SQL", "SECURITY"): 1453 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1454 1455 index = self._index 1456 key = self._parse_column() 1457 1458 if not self._match(TokenType.EQ): 1459 self._retreat(index) 1460 return None 1461 1462 return self.expression( 1463 exp.Property, 1464 this=key.to_dot() if isinstance(key, exp.Column) else key, 1465 value=self._parse_column() or self._parse_var(any_token=True), 1466 ) 1467 1468 def _parse_stored(self) -> exp.FileFormatProperty: 1469 self._match(TokenType.ALIAS) 1470 1471 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1472 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1473 1474 return self.expression( 1475 exp.FileFormatProperty, 1476 this=self.expression( 1477 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1478 ) 1479 if input_format or output_format 1480 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1481 ) 1482 1483 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1484 self._match(TokenType.EQ) 1485 self._match(TokenType.ALIAS) 1486 return self.expression(exp_class, this=self._parse_field()) 1487 1488 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1489 properties = [] 1490 while True: 1491 if before: 1492 prop = self._parse_property_before() 1493 else: 1494 prop = self._parse_property() 1495 1496 if not prop: 1497 break 1498 for p in ensure_list(prop): 1499 properties.append(p) 1500 1501 if properties: 1502 return self.expression(exp.Properties, expressions=properties) 1503 1504 return None 1505 1506 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1507 return self.expression( 1508 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1509 ) 1510 1511 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1512 if self._index >= 2: 1513 pre_volatile_token = self._tokens[self._index - 2] 1514 else: 1515 pre_volatile_token = None 1516 1517 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1518 return exp.VolatileProperty() 1519 1520 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1521 1522 def _parse_with_property( 1523 self, 1524 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1525 if self._match(TokenType.L_PAREN, advance=False): 1526 return self._parse_wrapped_csv(self._parse_property) 1527 1528 if self._match_text_seq("JOURNAL"): 1529 return self._parse_withjournaltable() 1530 1531 if self._match_text_seq("DATA"): 1532 return self._parse_withdata(no=False) 1533 elif self._match_text_seq("NO", "DATA"): 1534 return self._parse_withdata(no=True) 1535 1536 if not self._next: 1537 return None 1538 1539 return self._parse_withisolatedloading() 1540 1541 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1542 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1543 self._match(TokenType.EQ) 1544 1545 user = self._parse_id_var() 1546 self._match(TokenType.PARAMETER) 1547 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1548 1549 if not user or not host: 1550 return None 1551 1552 return exp.DefinerProperty(this=f"{user}@{host}") 1553 1554 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1555 self._match(TokenType.TABLE) 1556 self._match(TokenType.EQ) 1557 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1558 1559 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1560 return self.expression(exp.LogProperty, no=no) 1561 1562 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1563 return self.expression(exp.JournalProperty, **kwargs) 1564 1565 def _parse_checksum(self) -> exp.ChecksumProperty: 1566 self._match(TokenType.EQ) 1567 1568 on = None 1569 if self._match(TokenType.ON): 1570 on = True 1571 elif self._match_text_seq("OFF"): 1572 on = False 1573 1574 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1575 1576 def _parse_cluster(self) -> exp.Cluster: 1577 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1578 1579 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1580 self._match_text_seq("BY") 1581 1582 self._match_l_paren() 1583 expressions = self._parse_csv(self._parse_column) 1584 self._match_r_paren() 1585 1586 if self._match_text_seq("SORTED", "BY"): 1587 self._match_l_paren() 1588 sorted_by = self._parse_csv(self._parse_ordered) 1589 self._match_r_paren() 1590 else: 1591 sorted_by = None 1592 1593 self._match(TokenType.INTO) 1594 buckets = self._parse_number() 1595 self._match_text_seq("BUCKETS") 1596 1597 return self.expression( 1598 exp.ClusteredByProperty, 1599 expressions=expressions, 1600 sorted_by=sorted_by, 1601 buckets=buckets, 1602 ) 1603 1604 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1605 if not self._match_text_seq("GRANTS"): 1606 self._retreat(self._index - 1) 1607 return None 1608 1609 return self.expression(exp.CopyGrantsProperty) 1610 1611 def _parse_freespace(self) -> exp.FreespaceProperty: 1612 self._match(TokenType.EQ) 1613 return self.expression( 1614 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1615 ) 1616 1617 def _parse_mergeblockratio( 1618 self, no: bool = False, default: bool = False 1619 ) -> exp.MergeBlockRatioProperty: 1620 if self._match(TokenType.EQ): 1621 return self.expression( 1622 exp.MergeBlockRatioProperty, 1623 this=self._parse_number(), 1624 percent=self._match(TokenType.PERCENT), 1625 ) 1626 1627 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1628 1629 def _parse_datablocksize( 1630 self, 1631 default: t.Optional[bool] = None, 1632 minimum: t.Optional[bool] = None, 1633 maximum: t.Optional[bool] = None, 1634 ) -> exp.DataBlocksizeProperty: 1635 self._match(TokenType.EQ) 1636 size = self._parse_number() 1637 1638 units = None 1639 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1640 units = self._prev.text 1641 1642 return self.expression( 1643 exp.DataBlocksizeProperty, 1644 size=size, 1645 units=units, 1646 default=default, 1647 minimum=minimum, 1648 maximum=maximum, 1649 ) 1650 1651 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1652 self._match(TokenType.EQ) 1653 always = self._match_text_seq("ALWAYS") 1654 manual = self._match_text_seq("MANUAL") 1655 never = self._match_text_seq("NEVER") 1656 default = self._match_text_seq("DEFAULT") 1657 1658 autotemp = None 1659 if self._match_text_seq("AUTOTEMP"): 1660 autotemp = self._parse_schema() 1661 1662 return self.expression( 1663 exp.BlockCompressionProperty, 1664 always=always, 1665 manual=manual, 1666 never=never, 1667 default=default, 1668 autotemp=autotemp, 1669 ) 1670 1671 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1672 no = self._match_text_seq("NO") 1673 concurrent = self._match_text_seq("CONCURRENT") 1674 self._match_text_seq("ISOLATED", "LOADING") 1675 for_all = self._match_text_seq("FOR", "ALL") 1676 for_insert = self._match_text_seq("FOR", "INSERT") 1677 for_none = self._match_text_seq("FOR", "NONE") 1678 return self.expression( 1679 exp.IsolatedLoadingProperty, 1680 no=no, 1681 concurrent=concurrent, 1682 for_all=for_all, 1683 for_insert=for_insert, 1684 for_none=for_none, 1685 ) 1686 1687 def _parse_locking(self) -> exp.LockingProperty: 1688 if self._match(TokenType.TABLE): 1689 kind = "TABLE" 1690 elif self._match(TokenType.VIEW): 1691 kind = "VIEW" 1692 elif self._match(TokenType.ROW): 1693 kind = "ROW" 1694 elif self._match_text_seq("DATABASE"): 1695 kind = "DATABASE" 1696 else: 1697 kind = None 1698 1699 if kind in ("DATABASE", "TABLE", "VIEW"): 1700 this = self._parse_table_parts() 1701 else: 1702 this = None 1703 1704 if self._match(TokenType.FOR): 1705 for_or_in = "FOR" 1706 elif self._match(TokenType.IN): 1707 for_or_in = "IN" 1708 else: 1709 for_or_in = None 1710 1711 if self._match_text_seq("ACCESS"): 1712 lock_type = "ACCESS" 1713 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1714 lock_type = "EXCLUSIVE" 1715 elif self._match_text_seq("SHARE"): 1716 lock_type = "SHARE" 1717 elif self._match_text_seq("READ"): 1718 lock_type = "READ" 1719 elif self._match_text_seq("WRITE"): 1720 lock_type = "WRITE" 1721 elif self._match_text_seq("CHECKSUM"): 1722 lock_type = "CHECKSUM" 1723 else: 1724 lock_type = None 1725 1726 override = self._match_text_seq("OVERRIDE") 1727 1728 return self.expression( 1729 exp.LockingProperty, 1730 this=this, 1731 kind=kind, 1732 for_or_in=for_or_in, 1733 lock_type=lock_type, 1734 override=override, 1735 ) 1736 1737 def _parse_partition_by(self) -> t.List[exp.Expression]: 1738 if self._match(TokenType.PARTITION_BY): 1739 return self._parse_csv(self._parse_conjunction) 1740 return [] 1741 1742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1743 self._match(TokenType.EQ) 1744 return self.expression( 1745 exp.PartitionedByProperty, 1746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1747 ) 1748 1749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1750 if self._match_text_seq("AND", "STATISTICS"): 1751 statistics = True 1752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1753 statistics = False 1754 else: 1755 statistics = None 1756 1757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1758 1759 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1760 if self._match_text_seq("PRIMARY", "INDEX"): 1761 return exp.NoPrimaryIndexProperty() 1762 return None 1763 1764 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1765 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1766 return exp.OnCommitProperty() 1767 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1768 return exp.OnCommitProperty(delete=True) 1769 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1770 1771 def _parse_distkey(self) -> exp.DistKeyProperty: 1772 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1773 1774 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1775 table = self._parse_table(schema=True) 1776 1777 options = [] 1778 while self._match_texts(("INCLUDING", "EXCLUDING")): 1779 this = self._prev.text.upper() 1780 1781 id_var = self._parse_id_var() 1782 if not id_var: 1783 return None 1784 1785 options.append( 1786 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1787 ) 1788 1789 return self.expression(exp.LikeProperty, this=table, expressions=options) 1790 1791 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1792 return self.expression( 1793 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1794 ) 1795 1796 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1797 self._match(TokenType.EQ) 1798 return self.expression( 1799 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1800 ) 1801 1802 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1803 self._match_text_seq("WITH", "CONNECTION") 1804 return self.expression( 1805 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1806 ) 1807 1808 def _parse_returns(self) -> exp.ReturnsProperty: 1809 value: t.Optional[exp.Expression] 1810 is_table = self._match(TokenType.TABLE) 1811 1812 if is_table: 1813 if self._match(TokenType.LT): 1814 value = self.expression( 1815 exp.Schema, 1816 this="TABLE", 1817 expressions=self._parse_csv(self._parse_struct_types), 1818 ) 1819 if not self._match(TokenType.GT): 1820 self.raise_error("Expecting >") 1821 else: 1822 value = self._parse_schema(exp.var("TABLE")) 1823 else: 1824 value = self._parse_types() 1825 1826 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1827 1828 def _parse_describe(self) -> exp.Describe: 1829 kind = self._match_set(self.CREATABLES) and self._prev.text 1830 this = self._parse_table(schema=True) 1831 properties = self._parse_properties() 1832 expressions = properties.expressions if properties else None 1833 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1834 1835 def _parse_insert(self) -> exp.Insert: 1836 comments = ensure_list(self._prev_comments) 1837 overwrite = self._match(TokenType.OVERWRITE) 1838 ignore = self._match(TokenType.IGNORE) 1839 local = self._match_text_seq("LOCAL") 1840 alternative = None 1841 1842 if self._match_text_seq("DIRECTORY"): 1843 this: t.Optional[exp.Expression] = self.expression( 1844 exp.Directory, 1845 this=self._parse_var_or_string(), 1846 local=local, 1847 row_format=self._parse_row_format(match_row=True), 1848 ) 1849 else: 1850 if self._match(TokenType.OR): 1851 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1852 1853 self._match(TokenType.INTO) 1854 comments += ensure_list(self._prev_comments) 1855 self._match(TokenType.TABLE) 1856 this = self._parse_table(schema=True) 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Insert, 1862 comments=comments, 1863 this=this, 1864 by_name=self._match_text_seq("BY", "NAME"), 1865 exists=self._parse_exists(), 1866 partition=self._parse_partition(), 1867 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1868 and self._parse_conjunction(), 1869 expression=self._parse_ddl_select(), 1870 conflict=self._parse_on_conflict(), 1871 returning=returning or self._parse_returning(), 1872 overwrite=overwrite, 1873 alternative=alternative, 1874 ignore=ignore, 1875 ) 1876 1877 def _parse_kill(self) -> exp.Kill: 1878 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1879 1880 return self.expression( 1881 exp.Kill, 1882 this=self._parse_primary(), 1883 kind=kind, 1884 ) 1885 1886 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1887 conflict = self._match_text_seq("ON", "CONFLICT") 1888 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1889 1890 if not conflict and not duplicate: 1891 return None 1892 1893 nothing = None 1894 expressions = None 1895 key = None 1896 constraint = None 1897 1898 if conflict: 1899 if self._match_text_seq("ON", "CONSTRAINT"): 1900 constraint = self._parse_id_var() 1901 else: 1902 key = self._parse_csv(self._parse_value) 1903 1904 self._match_text_seq("DO") 1905 if self._match_text_seq("NOTHING"): 1906 nothing = True 1907 else: 1908 self._match(TokenType.UPDATE) 1909 self._match(TokenType.SET) 1910 expressions = self._parse_csv(self._parse_equality) 1911 1912 return self.expression( 1913 exp.OnConflict, 1914 duplicate=duplicate, 1915 expressions=expressions, 1916 nothing=nothing, 1917 key=key, 1918 constraint=constraint, 1919 ) 1920 1921 def _parse_returning(self) -> t.Optional[exp.Returning]: 1922 if not self._match(TokenType.RETURNING): 1923 return None 1924 return self.expression( 1925 exp.Returning, 1926 expressions=self._parse_csv(self._parse_expression), 1927 into=self._match(TokenType.INTO) and self._parse_table_part(), 1928 ) 1929 1930 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1931 if not self._match(TokenType.FORMAT): 1932 return None 1933 return self._parse_row_format() 1934 1935 def _parse_row_format( 1936 self, match_row: bool = False 1937 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1938 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1939 return None 1940 1941 if self._match_text_seq("SERDE"): 1942 this = self._parse_string() 1943 1944 serde_properties = None 1945 if self._match(TokenType.SERDE_PROPERTIES): 1946 serde_properties = self.expression( 1947 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1948 ) 1949 1950 return self.expression( 1951 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1952 ) 1953 1954 self._match_text_seq("DELIMITED") 1955 1956 kwargs = {} 1957 1958 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1959 kwargs["fields"] = self._parse_string() 1960 if self._match_text_seq("ESCAPED", "BY"): 1961 kwargs["escaped"] = self._parse_string() 1962 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1963 kwargs["collection_items"] = self._parse_string() 1964 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1965 kwargs["map_keys"] = self._parse_string() 1966 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1967 kwargs["lines"] = self._parse_string() 1968 if self._match_text_seq("NULL", "DEFINED", "AS"): 1969 kwargs["null"] = self._parse_string() 1970 1971 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1972 1973 def _parse_load(self) -> exp.LoadData | exp.Command: 1974 if self._match_text_seq("DATA"): 1975 local = self._match_text_seq("LOCAL") 1976 self._match_text_seq("INPATH") 1977 inpath = self._parse_string() 1978 overwrite = self._match(TokenType.OVERWRITE) 1979 self._match_pair(TokenType.INTO, TokenType.TABLE) 1980 1981 return self.expression( 1982 exp.LoadData, 1983 this=self._parse_table(schema=True), 1984 local=local, 1985 overwrite=overwrite, 1986 inpath=inpath, 1987 partition=self._parse_partition(), 1988 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1989 serde=self._match_text_seq("SERDE") and self._parse_string(), 1990 ) 1991 return self._parse_as_command(self._prev) 1992 1993 def _parse_delete(self) -> exp.Delete: 1994 # This handles MySQL's "Multiple-Table Syntax" 1995 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1996 tables = None 1997 comments = self._prev_comments 1998 if not self._match(TokenType.FROM, advance=False): 1999 tables = self._parse_csv(self._parse_table) or None 2000 2001 returning = self._parse_returning() 2002 2003 return self.expression( 2004 exp.Delete, 2005 comments=comments, 2006 tables=tables, 2007 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2008 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2009 where=self._parse_where(), 2010 returning=returning or self._parse_returning(), 2011 limit=self._parse_limit(), 2012 ) 2013 2014 def _parse_update(self) -> exp.Update: 2015 comments = self._prev_comments 2016 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2017 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2018 returning = self._parse_returning() 2019 return self.expression( 2020 exp.Update, 2021 comments=comments, 2022 **{ # type: ignore 2023 "this": this, 2024 "expressions": expressions, 2025 "from": self._parse_from(joins=True), 2026 "where": self._parse_where(), 2027 "returning": returning or self._parse_returning(), 2028 "order": self._parse_order(), 2029 "limit": self._parse_limit(), 2030 }, 2031 ) 2032 2033 def _parse_uncache(self) -> exp.Uncache: 2034 if not self._match(TokenType.TABLE): 2035 self.raise_error("Expecting TABLE after UNCACHE") 2036 2037 return self.expression( 2038 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2039 ) 2040 2041 def _parse_cache(self) -> exp.Cache: 2042 lazy = self._match_text_seq("LAZY") 2043 self._match(TokenType.TABLE) 2044 table = self._parse_table(schema=True) 2045 2046 options = [] 2047 if self._match_text_seq("OPTIONS"): 2048 self._match_l_paren() 2049 k = self._parse_string() 2050 self._match(TokenType.EQ) 2051 v = self._parse_string() 2052 options = [k, v] 2053 self._match_r_paren() 2054 2055 self._match(TokenType.ALIAS) 2056 return self.expression( 2057 exp.Cache, 2058 this=table, 2059 lazy=lazy, 2060 options=options, 2061 expression=self._parse_select(nested=True), 2062 ) 2063 2064 def _parse_partition(self) -> t.Optional[exp.Partition]: 2065 if not self._match(TokenType.PARTITION): 2066 return None 2067 2068 return self.expression( 2069 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2070 ) 2071 2072 def _parse_value(self) -> exp.Tuple: 2073 if self._match(TokenType.L_PAREN): 2074 expressions = self._parse_csv(self._parse_conjunction) 2075 self._match_r_paren() 2076 return self.expression(exp.Tuple, expressions=expressions) 2077 2078 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2079 # https://prestodb.io/docs/current/sql/values.html 2080 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2081 2082 def _parse_projections(self) -> t.List[exp.Expression]: 2083 return self._parse_expressions() 2084 2085 def _parse_select( 2086 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2087 ) -> t.Optional[exp.Expression]: 2088 cte = self._parse_with() 2089 2090 if cte: 2091 this = self._parse_statement() 2092 2093 if not this: 2094 self.raise_error("Failed to parse any statement following CTE") 2095 return cte 2096 2097 if "with" in this.arg_types: 2098 this.set("with", cte) 2099 else: 2100 self.raise_error(f"{this.key} does not support CTE") 2101 this = cte 2102 2103 return this 2104 2105 # duckdb supports leading with FROM x 2106 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2107 2108 if self._match(TokenType.SELECT): 2109 comments = self._prev_comments 2110 2111 hint = self._parse_hint() 2112 all_ = self._match(TokenType.ALL) 2113 distinct = self._match_set(self.DISTINCT_TOKENS) 2114 2115 kind = ( 2116 self._match(TokenType.ALIAS) 2117 and self._match_texts(("STRUCT", "VALUE")) 2118 and self._prev.text 2119 ) 2120 2121 if distinct: 2122 distinct = self.expression( 2123 exp.Distinct, 2124 on=self._parse_value() if self._match(TokenType.ON) else None, 2125 ) 2126 2127 if all_ and distinct: 2128 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2129 2130 limit = self._parse_limit(top=True) 2131 projections = self._parse_projections() 2132 2133 this = self.expression( 2134 exp.Select, 2135 kind=kind, 2136 hint=hint, 2137 distinct=distinct, 2138 expressions=projections, 2139 limit=limit, 2140 ) 2141 this.comments = comments 2142 2143 into = self._parse_into() 2144 if into: 2145 this.set("into", into) 2146 2147 if not from_: 2148 from_ = self._parse_from() 2149 2150 if from_: 2151 this.set("from", from_) 2152 2153 this = self._parse_query_modifiers(this) 2154 elif (table or nested) and self._match(TokenType.L_PAREN): 2155 if self._match(TokenType.PIVOT): 2156 this = self._parse_simplified_pivot() 2157 elif self._match(TokenType.FROM): 2158 this = exp.select("*").from_( 2159 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2160 ) 2161 else: 2162 this = self._parse_table() if table else self._parse_select(nested=True) 2163 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2164 2165 self._match_r_paren() 2166 2167 # We return early here so that the UNION isn't attached to the subquery by the 2168 # following call to _parse_set_operations, but instead becomes the parent node 2169 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2170 elif self._match(TokenType.VALUES): 2171 this = self.expression( 2172 exp.Values, 2173 expressions=self._parse_csv(self._parse_value), 2174 alias=self._parse_table_alias(), 2175 ) 2176 elif from_: 2177 this = exp.select("*").from_(from_.this, copy=False) 2178 else: 2179 this = None 2180 2181 return self._parse_set_operations(this) 2182 2183 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2184 if not skip_with_token and not self._match(TokenType.WITH): 2185 return None 2186 2187 comments = self._prev_comments 2188 recursive = self._match(TokenType.RECURSIVE) 2189 2190 expressions = [] 2191 while True: 2192 expressions.append(self._parse_cte()) 2193 2194 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2195 break 2196 else: 2197 self._match(TokenType.WITH) 2198 2199 return self.expression( 2200 exp.With, comments=comments, expressions=expressions, recursive=recursive 2201 ) 2202 2203 def _parse_cte(self) -> exp.CTE: 2204 alias = self._parse_table_alias() 2205 if not alias or not alias.this: 2206 self.raise_error("Expected CTE to have alias") 2207 2208 self._match(TokenType.ALIAS) 2209 return self.expression( 2210 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2211 ) 2212 2213 def _parse_table_alias( 2214 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2215 ) -> t.Optional[exp.TableAlias]: 2216 any_token = self._match(TokenType.ALIAS) 2217 alias = ( 2218 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2219 or self._parse_string_as_identifier() 2220 ) 2221 2222 index = self._index 2223 if self._match(TokenType.L_PAREN): 2224 columns = self._parse_csv(self._parse_function_parameter) 2225 self._match_r_paren() if columns else self._retreat(index) 2226 else: 2227 columns = None 2228 2229 if not alias and not columns: 2230 return None 2231 2232 return self.expression(exp.TableAlias, this=alias, columns=columns) 2233 2234 def _parse_subquery( 2235 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2236 ) -> t.Optional[exp.Subquery]: 2237 if not this: 2238 return None 2239 2240 return self.expression( 2241 exp.Subquery, 2242 this=this, 2243 pivots=self._parse_pivots(), 2244 alias=self._parse_table_alias() if parse_alias else None, 2245 ) 2246 2247 def _parse_query_modifiers( 2248 self, this: t.Optional[exp.Expression] 2249 ) -> t.Optional[exp.Expression]: 2250 if isinstance(this, self.MODIFIABLES): 2251 for join in iter(self._parse_join, None): 2252 this.append("joins", join) 2253 for lateral in iter(self._parse_lateral, None): 2254 this.append("laterals", lateral) 2255 2256 while True: 2257 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2258 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2259 key, expression = parser(self) 2260 2261 if expression: 2262 this.set(key, expression) 2263 if key == "limit": 2264 offset = expression.args.pop("offset", None) 2265 if offset: 2266 this.set("offset", exp.Offset(expression=offset)) 2267 continue 2268 break 2269 return this 2270 2271 def _parse_hint(self) -> t.Optional[exp.Hint]: 2272 if self._match(TokenType.HINT): 2273 hints = [] 2274 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2275 hints.extend(hint) 2276 2277 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2278 self.raise_error("Expected */ after HINT") 2279 2280 return self.expression(exp.Hint, expressions=hints) 2281 2282 return None 2283 2284 def _parse_into(self) -> t.Optional[exp.Into]: 2285 if not self._match(TokenType.INTO): 2286 return None 2287 2288 temp = self._match(TokenType.TEMPORARY) 2289 unlogged = self._match_text_seq("UNLOGGED") 2290 self._match(TokenType.TABLE) 2291 2292 return self.expression( 2293 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2294 ) 2295 2296 def _parse_from( 2297 self, joins: bool = False, skip_from_token: bool = False 2298 ) -> t.Optional[exp.From]: 2299 if not skip_from_token and not self._match(TokenType.FROM): 2300 return None 2301 2302 return self.expression( 2303 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2304 ) 2305 2306 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2307 if not self._match(TokenType.MATCH_RECOGNIZE): 2308 return None 2309 2310 self._match_l_paren() 2311 2312 partition = self._parse_partition_by() 2313 order = self._parse_order() 2314 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2315 2316 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2317 rows = exp.var("ONE ROW PER MATCH") 2318 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2319 text = "ALL ROWS PER MATCH" 2320 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2321 text += f" SHOW EMPTY MATCHES" 2322 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2323 text += f" OMIT EMPTY MATCHES" 2324 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2325 text += f" WITH UNMATCHED ROWS" 2326 rows = exp.var(text) 2327 else: 2328 rows = None 2329 2330 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2331 text = "AFTER MATCH SKIP" 2332 if self._match_text_seq("PAST", "LAST", "ROW"): 2333 text += f" PAST LAST ROW" 2334 elif self._match_text_seq("TO", "NEXT", "ROW"): 2335 text += f" TO NEXT ROW" 2336 elif self._match_text_seq("TO", "FIRST"): 2337 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2338 elif self._match_text_seq("TO", "LAST"): 2339 text += f" TO LAST {self._advance_any().text}" # type: ignore 2340 after = exp.var(text) 2341 else: 2342 after = None 2343 2344 if self._match_text_seq("PATTERN"): 2345 self._match_l_paren() 2346 2347 if not self._curr: 2348 self.raise_error("Expecting )", self._curr) 2349 2350 paren = 1 2351 start = self._curr 2352 2353 while self._curr and paren > 0: 2354 if self._curr.token_type == TokenType.L_PAREN: 2355 paren += 1 2356 if self._curr.token_type == TokenType.R_PAREN: 2357 paren -= 1 2358 2359 end = self._prev 2360 self._advance() 2361 2362 if paren > 0: 2363 self.raise_error("Expecting )", self._curr) 2364 2365 pattern = exp.var(self._find_sql(start, end)) 2366 else: 2367 pattern = None 2368 2369 define = ( 2370 self._parse_csv( 2371 lambda: self.expression( 2372 exp.Alias, 2373 alias=self._parse_id_var(any_token=True), 2374 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2375 ) 2376 ) 2377 if self._match_text_seq("DEFINE") 2378 else None 2379 ) 2380 2381 self._match_r_paren() 2382 2383 return self.expression( 2384 exp.MatchRecognize, 2385 partition_by=partition, 2386 order=order, 2387 measures=measures, 2388 rows=rows, 2389 after=after, 2390 pattern=pattern, 2391 define=define, 2392 alias=self._parse_table_alias(), 2393 ) 2394 2395 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2396 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2397 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2398 2399 if outer_apply or cross_apply: 2400 this = self._parse_select(table=True) 2401 view = None 2402 outer = not cross_apply 2403 elif self._match(TokenType.LATERAL): 2404 this = self._parse_select(table=True) 2405 view = self._match(TokenType.VIEW) 2406 outer = self._match(TokenType.OUTER) 2407 else: 2408 return None 2409 2410 if not this: 2411 this = ( 2412 self._parse_unnest() 2413 or self._parse_function() 2414 or self._parse_id_var(any_token=False) 2415 ) 2416 2417 while self._match(TokenType.DOT): 2418 this = exp.Dot( 2419 this=this, 2420 expression=self._parse_function() or self._parse_id_var(any_token=False), 2421 ) 2422 2423 if view: 2424 table = self._parse_id_var(any_token=False) 2425 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2426 table_alias: t.Optional[exp.TableAlias] = self.expression( 2427 exp.TableAlias, this=table, columns=columns 2428 ) 2429 elif isinstance(this, exp.Subquery) and this.alias: 2430 # Ensures parity between the Subquery's and the Lateral's "alias" args 2431 table_alias = this.args["alias"].copy() 2432 else: 2433 table_alias = self._parse_table_alias() 2434 2435 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2436 2437 def _parse_join_parts( 2438 self, 2439 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2440 return ( 2441 self._match_set(self.JOIN_METHODS) and self._prev, 2442 self._match_set(self.JOIN_SIDES) and self._prev, 2443 self._match_set(self.JOIN_KINDS) and self._prev, 2444 ) 2445 2446 def _parse_join( 2447 self, skip_join_token: bool = False, parse_bracket: bool = False 2448 ) -> t.Optional[exp.Join]: 2449 if self._match(TokenType.COMMA): 2450 return self.expression(exp.Join, this=self._parse_table()) 2451 2452 index = self._index 2453 method, side, kind = self._parse_join_parts() 2454 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2455 join = self._match(TokenType.JOIN) 2456 2457 if not skip_join_token and not join: 2458 self._retreat(index) 2459 kind = None 2460 method = None 2461 side = None 2462 2463 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2464 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2465 2466 if not skip_join_token and not join and not outer_apply and not cross_apply: 2467 return None 2468 2469 if outer_apply: 2470 side = Token(TokenType.LEFT, "LEFT") 2471 2472 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2473 2474 if method: 2475 kwargs["method"] = method.text 2476 if side: 2477 kwargs["side"] = side.text 2478 if kind: 2479 kwargs["kind"] = kind.text 2480 if hint: 2481 kwargs["hint"] = hint 2482 2483 if self._match(TokenType.ON): 2484 kwargs["on"] = self._parse_conjunction() 2485 elif self._match(TokenType.USING): 2486 kwargs["using"] = self._parse_wrapped_id_vars() 2487 elif not (kind and kind.token_type == TokenType.CROSS): 2488 index = self._index 2489 join = self._parse_join() 2490 2491 if join and self._match(TokenType.ON): 2492 kwargs["on"] = self._parse_conjunction() 2493 elif join and self._match(TokenType.USING): 2494 kwargs["using"] = self._parse_wrapped_id_vars() 2495 else: 2496 join = None 2497 self._retreat(index) 2498 2499 kwargs["this"].set("joins", [join] if join else None) 2500 2501 comments = [c for token in (method, side, kind) if token for c in token.comments] 2502 return self.expression(exp.Join, comments=comments, **kwargs) 2503 2504 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2505 this = self._parse_conjunction() 2506 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2507 return this 2508 2509 opclass = self._parse_var(any_token=True) 2510 if opclass: 2511 return self.expression(exp.Opclass, this=this, expression=opclass) 2512 2513 return this 2514 2515 def _parse_index( 2516 self, 2517 index: t.Optional[exp.Expression] = None, 2518 ) -> t.Optional[exp.Index]: 2519 if index: 2520 unique = None 2521 primary = None 2522 amp = None 2523 2524 self._match(TokenType.ON) 2525 self._match(TokenType.TABLE) # hive 2526 table = self._parse_table_parts(schema=True) 2527 else: 2528 unique = self._match(TokenType.UNIQUE) 2529 primary = self._match_text_seq("PRIMARY") 2530 amp = self._match_text_seq("AMP") 2531 2532 if not self._match(TokenType.INDEX): 2533 return None 2534 2535 index = self._parse_id_var() 2536 table = None 2537 2538 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2539 2540 if self._match(TokenType.L_PAREN, advance=False): 2541 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2542 else: 2543 columns = None 2544 2545 return self.expression( 2546 exp.Index, 2547 this=index, 2548 table=table, 2549 using=using, 2550 columns=columns, 2551 unique=unique, 2552 primary=primary, 2553 amp=amp, 2554 partition_by=self._parse_partition_by(), 2555 where=self._parse_where(), 2556 ) 2557 2558 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2559 hints: t.List[exp.Expression] = [] 2560 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2561 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2562 hints.append( 2563 self.expression( 2564 exp.WithTableHint, 2565 expressions=self._parse_csv( 2566 lambda: self._parse_function() or self._parse_var(any_token=True) 2567 ), 2568 ) 2569 ) 2570 self._match_r_paren() 2571 else: 2572 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2573 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2574 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2575 2576 self._match_texts({"INDEX", "KEY"}) 2577 if self._match(TokenType.FOR): 2578 hint.set("target", self._advance_any() and self._prev.text.upper()) 2579 2580 hint.set("expressions", self._parse_wrapped_id_vars()) 2581 hints.append(hint) 2582 2583 return hints or None 2584 2585 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2586 return ( 2587 (not schema and self._parse_function(optional_parens=False)) 2588 or self._parse_id_var(any_token=False) 2589 or self._parse_string_as_identifier() 2590 or self._parse_placeholder() 2591 ) 2592 2593 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2594 catalog = None 2595 db = None 2596 table = self._parse_table_part(schema=schema) 2597 2598 while self._match(TokenType.DOT): 2599 if catalog: 2600 # This allows nesting the table in arbitrarily many dot expressions if needed 2601 table = self.expression( 2602 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2603 ) 2604 else: 2605 catalog = db 2606 db = table 2607 table = self._parse_table_part(schema=schema) 2608 2609 if not table: 2610 self.raise_error(f"Expected table name but got {self._curr}") 2611 2612 return self.expression( 2613 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2614 ) 2615 2616 def _parse_table( 2617 self, 2618 schema: bool = False, 2619 joins: bool = False, 2620 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2621 parse_bracket: bool = False, 2622 ) -> t.Optional[exp.Expression]: 2623 lateral = self._parse_lateral() 2624 if lateral: 2625 return lateral 2626 2627 unnest = self._parse_unnest() 2628 if unnest: 2629 return unnest 2630 2631 values = self._parse_derived_table_values() 2632 if values: 2633 return values 2634 2635 subquery = self._parse_select(table=True) 2636 if subquery: 2637 if not subquery.args.get("pivots"): 2638 subquery.set("pivots", self._parse_pivots()) 2639 return subquery 2640 2641 bracket = parse_bracket and self._parse_bracket(None) 2642 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2643 this = t.cast( 2644 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2645 ) 2646 2647 if schema: 2648 return self._parse_schema(this=this) 2649 2650 version = self._parse_version() 2651 2652 if version: 2653 this.set("version", version) 2654 2655 if self.ALIAS_POST_TABLESAMPLE: 2656 table_sample = self._parse_table_sample() 2657 2658 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2659 if alias: 2660 this.set("alias", alias) 2661 2662 if self._match_text_seq("AT"): 2663 this.set("index", self._parse_id_var()) 2664 2665 this.set("hints", self._parse_table_hints()) 2666 2667 if not this.args.get("pivots"): 2668 this.set("pivots", self._parse_pivots()) 2669 2670 if not self.ALIAS_POST_TABLESAMPLE: 2671 table_sample = self._parse_table_sample() 2672 2673 if table_sample: 2674 table_sample.set("this", this) 2675 this = table_sample 2676 2677 if joins: 2678 for join in iter(self._parse_join, None): 2679 this.append("joins", join) 2680 2681 return this 2682 2683 def _parse_version(self) -> t.Optional[exp.Version]: 2684 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2685 this = "TIMESTAMP" 2686 elif self._match(TokenType.VERSION_SNAPSHOT): 2687 this = "VERSION" 2688 else: 2689 return None 2690 2691 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2692 kind = self._prev.text.upper() 2693 start = self._parse_bitwise() 2694 self._match_texts(("TO", "AND")) 2695 end = self._parse_bitwise() 2696 expression: t.Optional[exp.Expression] = self.expression( 2697 exp.Tuple, expressions=[start, end] 2698 ) 2699 elif self._match_text_seq("CONTAINED", "IN"): 2700 kind = "CONTAINED IN" 2701 expression = self.expression( 2702 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2703 ) 2704 elif self._match(TokenType.ALL): 2705 kind = "ALL" 2706 expression = None 2707 else: 2708 self._match_text_seq("AS", "OF") 2709 kind = "AS OF" 2710 expression = self._parse_type() 2711 2712 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2713 2714 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2715 if not self._match(TokenType.UNNEST): 2716 return None 2717 2718 expressions = self._parse_wrapped_csv(self._parse_type) 2719 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2720 2721 alias = self._parse_table_alias() if with_alias else None 2722 2723 if alias: 2724 if self.UNNEST_COLUMN_ONLY: 2725 if alias.args.get("columns"): 2726 self.raise_error("Unexpected extra column alias in unnest.") 2727 2728 alias.set("columns", [alias.this]) 2729 alias.set("this", None) 2730 2731 columns = alias.args.get("columns") or [] 2732 if offset and len(expressions) < len(columns): 2733 offset = columns.pop() 2734 2735 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2736 self._match(TokenType.ALIAS) 2737 offset = self._parse_id_var( 2738 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2739 ) or exp.to_identifier("offset") 2740 2741 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2742 2743 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2744 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2745 if not is_derived and not self._match(TokenType.VALUES): 2746 return None 2747 2748 expressions = self._parse_csv(self._parse_value) 2749 alias = self._parse_table_alias() 2750 2751 if is_derived: 2752 self._match_r_paren() 2753 2754 return self.expression( 2755 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2756 ) 2757 2758 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2759 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2760 as_modifier and self._match_text_seq("USING", "SAMPLE") 2761 ): 2762 return None 2763 2764 bucket_numerator = None 2765 bucket_denominator = None 2766 bucket_field = None 2767 percent = None 2768 rows = None 2769 size = None 2770 seed = None 2771 2772 kind = ( 2773 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2774 ) 2775 method = self._parse_var(tokens=(TokenType.ROW,)) 2776 2777 matched_l_paren = self._match(TokenType.L_PAREN) 2778 2779 if self.TABLESAMPLE_CSV: 2780 num = None 2781 expressions = self._parse_csv(self._parse_primary) 2782 else: 2783 expressions = None 2784 num = ( 2785 self._parse_factor() 2786 if self._match(TokenType.NUMBER, advance=False) 2787 else self._parse_primary() 2788 ) 2789 2790 if self._match_text_seq("BUCKET"): 2791 bucket_numerator = self._parse_number() 2792 self._match_text_seq("OUT", "OF") 2793 bucket_denominator = bucket_denominator = self._parse_number() 2794 self._match(TokenType.ON) 2795 bucket_field = self._parse_field() 2796 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2797 percent = num 2798 elif self._match(TokenType.ROWS): 2799 rows = num 2800 elif num: 2801 size = num 2802 2803 if matched_l_paren: 2804 self._match_r_paren() 2805 2806 if self._match(TokenType.L_PAREN): 2807 method = self._parse_var() 2808 seed = self._match(TokenType.COMMA) and self._parse_number() 2809 self._match_r_paren() 2810 elif self._match_texts(("SEED", "REPEATABLE")): 2811 seed = self._parse_wrapped(self._parse_number) 2812 2813 return self.expression( 2814 exp.TableSample, 2815 expressions=expressions, 2816 method=method, 2817 bucket_numerator=bucket_numerator, 2818 bucket_denominator=bucket_denominator, 2819 bucket_field=bucket_field, 2820 percent=percent, 2821 rows=rows, 2822 size=size, 2823 seed=seed, 2824 kind=kind, 2825 ) 2826 2827 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2828 return list(iter(self._parse_pivot, None)) or None 2829 2830 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2831 return list(iter(self._parse_join, None)) or None 2832 2833 # https://duckdb.org/docs/sql/statements/pivot 2834 def _parse_simplified_pivot(self) -> exp.Pivot: 2835 def _parse_on() -> t.Optional[exp.Expression]: 2836 this = self._parse_bitwise() 2837 return self._parse_in(this) if self._match(TokenType.IN) else this 2838 2839 this = self._parse_table() 2840 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2841 using = self._match(TokenType.USING) and self._parse_csv( 2842 lambda: self._parse_alias(self._parse_function()) 2843 ) 2844 group = self._parse_group() 2845 return self.expression( 2846 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2847 ) 2848 2849 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2850 index = self._index 2851 include_nulls = None 2852 2853 if self._match(TokenType.PIVOT): 2854 unpivot = False 2855 elif self._match(TokenType.UNPIVOT): 2856 unpivot = True 2857 2858 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2859 if self._match_text_seq("INCLUDE", "NULLS"): 2860 include_nulls = True 2861 elif self._match_text_seq("EXCLUDE", "NULLS"): 2862 include_nulls = False 2863 else: 2864 return None 2865 2866 expressions = [] 2867 field = None 2868 2869 if not self._match(TokenType.L_PAREN): 2870 self._retreat(index) 2871 return None 2872 2873 if unpivot: 2874 expressions = self._parse_csv(self._parse_column) 2875 else: 2876 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2877 2878 if not expressions: 2879 self.raise_error("Failed to parse PIVOT's aggregation list") 2880 2881 if not self._match(TokenType.FOR): 2882 self.raise_error("Expecting FOR") 2883 2884 value = self._parse_column() 2885 2886 if not self._match(TokenType.IN): 2887 self.raise_error("Expecting IN") 2888 2889 field = self._parse_in(value, alias=True) 2890 2891 self._match_r_paren() 2892 2893 pivot = self.expression( 2894 exp.Pivot, 2895 expressions=expressions, 2896 field=field, 2897 unpivot=unpivot, 2898 include_nulls=include_nulls, 2899 ) 2900 2901 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2902 pivot.set("alias", self._parse_table_alias()) 2903 2904 if not unpivot: 2905 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2906 2907 columns: t.List[exp.Expression] = [] 2908 for fld in pivot.args["field"].expressions: 2909 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2910 for name in names: 2911 if self.PREFIXED_PIVOT_COLUMNS: 2912 name = f"{name}_{field_name}" if name else field_name 2913 else: 2914 name = f"{field_name}_{name}" if name else field_name 2915 2916 columns.append(exp.to_identifier(name)) 2917 2918 pivot.set("columns", columns) 2919 2920 return pivot 2921 2922 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2923 return [agg.alias for agg in aggregations] 2924 2925 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2926 if not skip_where_token and not self._match(TokenType.WHERE): 2927 return None 2928 2929 return self.expression( 2930 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2931 ) 2932 2933 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2934 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2935 return None 2936 2937 elements = defaultdict(list) 2938 2939 if self._match(TokenType.ALL): 2940 return self.expression(exp.Group, all=True) 2941 2942 while True: 2943 expressions = self._parse_csv(self._parse_conjunction) 2944 if expressions: 2945 elements["expressions"].extend(expressions) 2946 2947 grouping_sets = self._parse_grouping_sets() 2948 if grouping_sets: 2949 elements["grouping_sets"].extend(grouping_sets) 2950 2951 rollup = None 2952 cube = None 2953 totals = None 2954 2955 index = self._index 2956 with_ = self._match(TokenType.WITH) 2957 if self._match(TokenType.ROLLUP): 2958 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2959 elements["rollup"].extend(ensure_list(rollup)) 2960 2961 if self._match(TokenType.CUBE): 2962 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2963 elements["cube"].extend(ensure_list(cube)) 2964 2965 if self._match_text_seq("TOTALS"): 2966 totals = True 2967 elements["totals"] = True # type: ignore 2968 2969 if not (grouping_sets or rollup or cube or totals): 2970 if with_: 2971 self._retreat(index) 2972 break 2973 2974 return self.expression(exp.Group, **elements) # type: ignore 2975 2976 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2977 if not self._match(TokenType.GROUPING_SETS): 2978 return None 2979 2980 return self._parse_wrapped_csv(self._parse_grouping_set) 2981 2982 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2983 if self._match(TokenType.L_PAREN): 2984 grouping_set = self._parse_csv(self._parse_column) 2985 self._match_r_paren() 2986 return self.expression(exp.Tuple, expressions=grouping_set) 2987 2988 return self._parse_column() 2989 2990 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2991 if not skip_having_token and not self._match(TokenType.HAVING): 2992 return None 2993 return self.expression(exp.Having, this=self._parse_conjunction()) 2994 2995 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2996 if not self._match(TokenType.QUALIFY): 2997 return None 2998 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2999 3000 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3001 if skip_start_token: 3002 start = None 3003 elif self._match(TokenType.START_WITH): 3004 start = self._parse_conjunction() 3005 else: 3006 return None 3007 3008 self._match(TokenType.CONNECT_BY) 3009 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3010 exp.Prior, this=self._parse_bitwise() 3011 ) 3012 connect = self._parse_conjunction() 3013 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3014 3015 if not start and self._match(TokenType.START_WITH): 3016 start = self._parse_conjunction() 3017 3018 return self.expression(exp.Connect, start=start, connect=connect) 3019 3020 def _parse_order( 3021 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3022 ) -> t.Optional[exp.Expression]: 3023 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3024 return this 3025 3026 return self.expression( 3027 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3028 ) 3029 3030 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3031 if not self._match(token): 3032 return None 3033 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3034 3035 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3036 this = parse_method() if parse_method else self._parse_conjunction() 3037 3038 asc = self._match(TokenType.ASC) 3039 desc = self._match(TokenType.DESC) or (asc and False) 3040 3041 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3042 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3043 3044 nulls_first = is_nulls_first or False 3045 explicitly_null_ordered = is_nulls_first or is_nulls_last 3046 3047 if ( 3048 not explicitly_null_ordered 3049 and ( 3050 (not desc and self.NULL_ORDERING == "nulls_are_small") 3051 or (desc and self.NULL_ORDERING != "nulls_are_small") 3052 ) 3053 and self.NULL_ORDERING != "nulls_are_last" 3054 ): 3055 nulls_first = True 3056 3057 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3058 3059 def _parse_limit( 3060 self, this: t.Optional[exp.Expression] = None, top: bool = False 3061 ) -> t.Optional[exp.Expression]: 3062 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3063 comments = self._prev_comments 3064 if top: 3065 limit_paren = self._match(TokenType.L_PAREN) 3066 expression = self._parse_number() 3067 3068 if limit_paren: 3069 self._match_r_paren() 3070 else: 3071 expression = self._parse_term() 3072 3073 if self._match(TokenType.COMMA): 3074 offset = expression 3075 expression = self._parse_term() 3076 else: 3077 offset = None 3078 3079 limit_exp = self.expression( 3080 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3081 ) 3082 3083 return limit_exp 3084 3085 if self._match(TokenType.FETCH): 3086 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3087 direction = self._prev.text if direction else "FIRST" 3088 3089 count = self._parse_field(tokens=self.FETCH_TOKENS) 3090 percent = self._match(TokenType.PERCENT) 3091 3092 self._match_set((TokenType.ROW, TokenType.ROWS)) 3093 3094 only = self._match_text_seq("ONLY") 3095 with_ties = self._match_text_seq("WITH", "TIES") 3096 3097 if only and with_ties: 3098 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3099 3100 return self.expression( 3101 exp.Fetch, 3102 direction=direction, 3103 count=count, 3104 percent=percent, 3105 with_ties=with_ties, 3106 ) 3107 3108 return this 3109 3110 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3111 if not self._match(TokenType.OFFSET): 3112 return this 3113 3114 count = self._parse_term() 3115 self._match_set((TokenType.ROW, TokenType.ROWS)) 3116 return self.expression(exp.Offset, this=this, expression=count) 3117 3118 def _parse_locks(self) -> t.List[exp.Lock]: 3119 locks = [] 3120 while True: 3121 if self._match_text_seq("FOR", "UPDATE"): 3122 update = True 3123 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3124 "LOCK", "IN", "SHARE", "MODE" 3125 ): 3126 update = False 3127 else: 3128 break 3129 3130 expressions = None 3131 if self._match_text_seq("OF"): 3132 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3133 3134 wait: t.Optional[bool | exp.Expression] = None 3135 if self._match_text_seq("NOWAIT"): 3136 wait = True 3137 elif self._match_text_seq("WAIT"): 3138 wait = self._parse_primary() 3139 elif self._match_text_seq("SKIP", "LOCKED"): 3140 wait = False 3141 3142 locks.append( 3143 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3144 ) 3145 3146 return locks 3147 3148 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3149 if not self._match_set(self.SET_OPERATIONS): 3150 return this 3151 3152 token_type = self._prev.token_type 3153 3154 if token_type == TokenType.UNION: 3155 expression = exp.Union 3156 elif token_type == TokenType.EXCEPT: 3157 expression = exp.Except 3158 else: 3159 expression = exp.Intersect 3160 3161 return self.expression( 3162 expression, 3163 this=this, 3164 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3165 by_name=self._match_text_seq("BY", "NAME"), 3166 expression=self._parse_set_operations(self._parse_select(nested=True)), 3167 ) 3168 3169 def _parse_expression(self) -> t.Optional[exp.Expression]: 3170 return self._parse_alias(self._parse_conjunction()) 3171 3172 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3173 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3174 3175 def _parse_equality(self) -> t.Optional[exp.Expression]: 3176 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3177 3178 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3179 return self._parse_tokens(self._parse_range, self.COMPARISON) 3180 3181 def _parse_range(self) -> t.Optional[exp.Expression]: 3182 this = self._parse_bitwise() 3183 negate = self._match(TokenType.NOT) 3184 3185 if self._match_set(self.RANGE_PARSERS): 3186 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3187 if not expression: 3188 return this 3189 3190 this = expression 3191 elif self._match(TokenType.ISNULL): 3192 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3193 3194 # Postgres supports ISNULL and NOTNULL for conditions. 3195 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3196 if self._match(TokenType.NOTNULL): 3197 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3198 this = self.expression(exp.Not, this=this) 3199 3200 if negate: 3201 this = self.expression(exp.Not, this=this) 3202 3203 if self._match(TokenType.IS): 3204 this = self._parse_is(this) 3205 3206 return this 3207 3208 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3209 index = self._index - 1 3210 negate = self._match(TokenType.NOT) 3211 3212 if self._match_text_seq("DISTINCT", "FROM"): 3213 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3214 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3215 3216 expression = self._parse_null() or self._parse_boolean() 3217 if not expression: 3218 self._retreat(index) 3219 return None 3220 3221 this = self.expression(exp.Is, this=this, expression=expression) 3222 return self.expression(exp.Not, this=this) if negate else this 3223 3224 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3225 unnest = self._parse_unnest(with_alias=False) 3226 if unnest: 3227 this = self.expression(exp.In, this=this, unnest=unnest) 3228 elif self._match(TokenType.L_PAREN): 3229 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3230 3231 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3232 this = self.expression(exp.In, this=this, query=expressions[0]) 3233 else: 3234 this = self.expression(exp.In, this=this, expressions=expressions) 3235 3236 self._match_r_paren(this) 3237 else: 3238 this = self.expression(exp.In, this=this, field=self._parse_field()) 3239 3240 return this 3241 3242 def _parse_between(self, this: exp.Expression) -> exp.Between: 3243 low = self._parse_bitwise() 3244 self._match(TokenType.AND) 3245 high = self._parse_bitwise() 3246 return self.expression(exp.Between, this=this, low=low, high=high) 3247 3248 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3249 if not self._match(TokenType.ESCAPE): 3250 return this 3251 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3252 3253 def _parse_interval(self) -> t.Optional[exp.Interval]: 3254 index = self._index 3255 3256 if not self._match(TokenType.INTERVAL): 3257 return None 3258 3259 if self._match(TokenType.STRING, advance=False): 3260 this = self._parse_primary() 3261 else: 3262 this = self._parse_term() 3263 3264 if not this: 3265 self._retreat(index) 3266 return None 3267 3268 unit = self._parse_function() or self._parse_var(any_token=True) 3269 3270 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3271 # each INTERVAL expression into this canonical form so it's easy to transpile 3272 if this and this.is_number: 3273 this = exp.Literal.string(this.name) 3274 elif this and this.is_string: 3275 parts = this.name.split() 3276 3277 if len(parts) == 2: 3278 if unit: 3279 # This is not actually a unit, it's something else (e.g. a "window side") 3280 unit = None 3281 self._retreat(self._index - 1) 3282 3283 this = exp.Literal.string(parts[0]) 3284 unit = self.expression(exp.Var, this=parts[1]) 3285 3286 return self.expression(exp.Interval, this=this, unit=unit) 3287 3288 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3289 this = self._parse_term() 3290 3291 while True: 3292 if self._match_set(self.BITWISE): 3293 this = self.expression( 3294 self.BITWISE[self._prev.token_type], 3295 this=this, 3296 expression=self._parse_term(), 3297 ) 3298 elif self._match(TokenType.DQMARK): 3299 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3300 elif self._match_pair(TokenType.LT, TokenType.LT): 3301 this = self.expression( 3302 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3303 ) 3304 elif self._match_pair(TokenType.GT, TokenType.GT): 3305 this = self.expression( 3306 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3307 ) 3308 else: 3309 break 3310 3311 return this 3312 3313 def _parse_term(self) -> t.Optional[exp.Expression]: 3314 return self._parse_tokens(self._parse_factor, self.TERM) 3315 3316 def _parse_factor(self) -> t.Optional[exp.Expression]: 3317 return self._parse_tokens(self._parse_unary, self.FACTOR) 3318 3319 def _parse_unary(self) -> t.Optional[exp.Expression]: 3320 if self._match_set(self.UNARY_PARSERS): 3321 return self.UNARY_PARSERS[self._prev.token_type](self) 3322 return self._parse_at_time_zone(self._parse_type()) 3323 3324 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3325 interval = parse_interval and self._parse_interval() 3326 if interval: 3327 return interval 3328 3329 index = self._index 3330 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3331 this = self._parse_column() 3332 3333 if data_type: 3334 if isinstance(this, exp.Literal): 3335 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3336 if parser: 3337 return parser(self, this, data_type) 3338 return self.expression(exp.Cast, this=this, to=data_type) 3339 if not data_type.expressions: 3340 self._retreat(index) 3341 return self._parse_column() 3342 return self._parse_column_ops(data_type) 3343 3344 return this and self._parse_column_ops(this) 3345 3346 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3347 this = self._parse_type() 3348 if not this: 3349 return None 3350 3351 return self.expression( 3352 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3353 ) 3354 3355 def _parse_types( 3356 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3357 ) -> t.Optional[exp.Expression]: 3358 index = self._index 3359 3360 prefix = self._match_text_seq("SYSUDTLIB", ".") 3361 3362 if not self._match_set(self.TYPE_TOKENS): 3363 identifier = allow_identifiers and self._parse_id_var( 3364 any_token=False, tokens=(TokenType.VAR,) 3365 ) 3366 3367 if identifier: 3368 tokens = self._tokenizer.tokenize(identifier.name) 3369 3370 if len(tokens) != 1: 3371 self.raise_error("Unexpected identifier", self._prev) 3372 3373 if tokens[0].token_type in self.TYPE_TOKENS: 3374 self._prev = tokens[0] 3375 elif self.SUPPORTS_USER_DEFINED_TYPES: 3376 type_name = identifier.name 3377 3378 while self._match(TokenType.DOT): 3379 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3380 3381 return exp.DataType.build(type_name, udt=True) 3382 else: 3383 return None 3384 else: 3385 return None 3386 3387 type_token = self._prev.token_type 3388 3389 if type_token == TokenType.PSEUDO_TYPE: 3390 return self.expression(exp.PseudoType, this=self._prev.text) 3391 3392 if type_token == TokenType.OBJECT_IDENTIFIER: 3393 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3394 3395 nested = type_token in self.NESTED_TYPE_TOKENS 3396 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3397 expressions = None 3398 maybe_func = False 3399 3400 if self._match(TokenType.L_PAREN): 3401 if is_struct: 3402 expressions = self._parse_csv(self._parse_struct_types) 3403 elif nested: 3404 expressions = self._parse_csv( 3405 lambda: self._parse_types( 3406 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3407 ) 3408 ) 3409 elif type_token in self.ENUM_TYPE_TOKENS: 3410 expressions = self._parse_csv(self._parse_equality) 3411 else: 3412 expressions = self._parse_csv(self._parse_type_size) 3413 3414 if not expressions or not self._match(TokenType.R_PAREN): 3415 self._retreat(index) 3416 return None 3417 3418 maybe_func = True 3419 3420 this: t.Optional[exp.Expression] = None 3421 values: t.Optional[t.List[exp.Expression]] = None 3422 3423 if nested and self._match(TokenType.LT): 3424 if is_struct: 3425 expressions = self._parse_csv(self._parse_struct_types) 3426 else: 3427 expressions = self._parse_csv( 3428 lambda: self._parse_types( 3429 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3430 ) 3431 ) 3432 3433 if not self._match(TokenType.GT): 3434 self.raise_error("Expecting >") 3435 3436 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3437 values = self._parse_csv(self._parse_conjunction) 3438 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3439 3440 if type_token in self.TIMESTAMPS: 3441 if self._match_text_seq("WITH", "TIME", "ZONE"): 3442 maybe_func = False 3443 tz_type = ( 3444 exp.DataType.Type.TIMETZ 3445 if type_token in self.TIMES 3446 else exp.DataType.Type.TIMESTAMPTZ 3447 ) 3448 this = exp.DataType(this=tz_type, expressions=expressions) 3449 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3450 maybe_func = False 3451 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3452 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3453 maybe_func = False 3454 elif type_token == TokenType.INTERVAL: 3455 unit = self._parse_var() 3456 3457 if self._match_text_seq("TO"): 3458 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3459 else: 3460 span = None 3461 3462 if span or not unit: 3463 this = self.expression( 3464 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3465 ) 3466 else: 3467 this = self.expression(exp.Interval, unit=unit) 3468 3469 if maybe_func and check_func: 3470 index2 = self._index 3471 peek = self._parse_string() 3472 3473 if not peek: 3474 self._retreat(index) 3475 return None 3476 3477 self._retreat(index2) 3478 3479 if not this: 3480 if self._match_text_seq("UNSIGNED"): 3481 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3482 if not unsigned_type_token: 3483 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3484 3485 type_token = unsigned_type_token or type_token 3486 3487 this = exp.DataType( 3488 this=exp.DataType.Type[type_token.value], 3489 expressions=expressions, 3490 nested=nested, 3491 values=values, 3492 prefix=prefix, 3493 ) 3494 3495 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3496 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3497 3498 return this 3499 3500 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3501 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3502 self._match(TokenType.COLON) 3503 return self._parse_column_def(this) 3504 3505 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3506 if not self._match_text_seq("AT", "TIME", "ZONE"): 3507 return this 3508 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3509 3510 def _parse_column(self) -> t.Optional[exp.Expression]: 3511 this = self._parse_field() 3512 if isinstance(this, exp.Identifier): 3513 this = self.expression(exp.Column, this=this) 3514 elif not this: 3515 return self._parse_bracket(this) 3516 return self._parse_column_ops(this) 3517 3518 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3519 this = self._parse_bracket(this) 3520 3521 while self._match_set(self.COLUMN_OPERATORS): 3522 op_token = self._prev.token_type 3523 op = self.COLUMN_OPERATORS.get(op_token) 3524 3525 if op_token == TokenType.DCOLON: 3526 field = self._parse_types() 3527 if not field: 3528 self.raise_error("Expected type") 3529 elif op and self._curr: 3530 self._advance() 3531 value = self._prev.text 3532 field = ( 3533 exp.Literal.number(value) 3534 if self._prev.token_type == TokenType.NUMBER 3535 else exp.Literal.string(value) 3536 ) 3537 else: 3538 field = self._parse_field(anonymous_func=True, any_token=True) 3539 3540 if isinstance(field, exp.Func): 3541 # bigquery allows function calls like x.y.count(...) 3542 # SAFE.SUBSTR(...) 3543 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3544 this = self._replace_columns_with_dots(this) 3545 3546 if op: 3547 this = op(self, this, field) 3548 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3549 this = self.expression( 3550 exp.Column, 3551 this=field, 3552 table=this.this, 3553 db=this.args.get("table"), 3554 catalog=this.args.get("db"), 3555 ) 3556 else: 3557 this = self.expression(exp.Dot, this=this, expression=field) 3558 this = self._parse_bracket(this) 3559 return this 3560 3561 def _parse_primary(self) -> t.Optional[exp.Expression]: 3562 if self._match_set(self.PRIMARY_PARSERS): 3563 token_type = self._prev.token_type 3564 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3565 3566 if token_type == TokenType.STRING: 3567 expressions = [primary] 3568 while self._match(TokenType.STRING): 3569 expressions.append(exp.Literal.string(self._prev.text)) 3570 3571 if len(expressions) > 1: 3572 return self.expression(exp.Concat, expressions=expressions) 3573 3574 return primary 3575 3576 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3577 return exp.Literal.number(f"0.{self._prev.text}") 3578 3579 if self._match(TokenType.L_PAREN): 3580 comments = self._prev_comments 3581 query = self._parse_select() 3582 3583 if query: 3584 expressions = [query] 3585 else: 3586 expressions = self._parse_expressions() 3587 3588 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3589 3590 if isinstance(this, exp.Subqueryable): 3591 this = self._parse_set_operations( 3592 self._parse_subquery(this=this, parse_alias=False) 3593 ) 3594 elif len(expressions) > 1: 3595 this = self.expression(exp.Tuple, expressions=expressions) 3596 else: 3597 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3598 3599 if this: 3600 this.add_comments(comments) 3601 3602 self._match_r_paren(expression=this) 3603 return this 3604 3605 return None 3606 3607 def _parse_field( 3608 self, 3609 any_token: bool = False, 3610 tokens: t.Optional[t.Collection[TokenType]] = None, 3611 anonymous_func: bool = False, 3612 ) -> t.Optional[exp.Expression]: 3613 return ( 3614 self._parse_primary() 3615 or self._parse_function(anonymous=anonymous_func) 3616 or self._parse_id_var(any_token=any_token, tokens=tokens) 3617 ) 3618 3619 def _parse_function( 3620 self, 3621 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3622 anonymous: bool = False, 3623 optional_parens: bool = True, 3624 ) -> t.Optional[exp.Expression]: 3625 if not self._curr: 3626 return None 3627 3628 token_type = self._curr.token_type 3629 this = self._curr.text 3630 upper = this.upper() 3631 3632 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3633 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3634 self._advance() 3635 return parser(self) 3636 3637 if not self._next or self._next.token_type != TokenType.L_PAREN: 3638 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3639 self._advance() 3640 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3641 3642 return None 3643 3644 if token_type not in self.FUNC_TOKENS: 3645 return None 3646 3647 self._advance(2) 3648 3649 parser = self.FUNCTION_PARSERS.get(upper) 3650 if parser and not anonymous: 3651 this = parser(self) 3652 else: 3653 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3654 3655 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3656 this = self.expression(subquery_predicate, this=self._parse_select()) 3657 self._match_r_paren() 3658 return this 3659 3660 if functions is None: 3661 functions = self.FUNCTIONS 3662 3663 function = functions.get(upper) 3664 3665 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3666 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3667 3668 if function and not anonymous: 3669 func = self.validate_expression(function(args), args) 3670 if not self.NORMALIZE_FUNCTIONS: 3671 func.meta["name"] = this 3672 this = func 3673 else: 3674 this = self.expression(exp.Anonymous, this=this, expressions=args) 3675 3676 self._match_r_paren(this) 3677 return self._parse_window(this) 3678 3679 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3680 return self._parse_column_def(self._parse_id_var()) 3681 3682 def _parse_user_defined_function( 3683 self, kind: t.Optional[TokenType] = None 3684 ) -> t.Optional[exp.Expression]: 3685 this = self._parse_id_var() 3686 3687 while self._match(TokenType.DOT): 3688 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3689 3690 if not self._match(TokenType.L_PAREN): 3691 return this 3692 3693 expressions = self._parse_csv(self._parse_function_parameter) 3694 self._match_r_paren() 3695 return self.expression( 3696 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3697 ) 3698 3699 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3700 literal = self._parse_primary() 3701 if literal: 3702 return self.expression(exp.Introducer, this=token.text, expression=literal) 3703 3704 return self.expression(exp.Identifier, this=token.text) 3705 3706 def _parse_session_parameter(self) -> exp.SessionParameter: 3707 kind = None 3708 this = self._parse_id_var() or self._parse_primary() 3709 3710 if this and self._match(TokenType.DOT): 3711 kind = this.name 3712 this = self._parse_var() or self._parse_primary() 3713 3714 return self.expression(exp.SessionParameter, this=this, kind=kind) 3715 3716 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3717 index = self._index 3718 3719 if self._match(TokenType.L_PAREN): 3720 expressions = t.cast( 3721 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3722 ) 3723 3724 if not self._match(TokenType.R_PAREN): 3725 self._retreat(index) 3726 else: 3727 expressions = [self._parse_id_var()] 3728 3729 if self._match_set(self.LAMBDAS): 3730 return self.LAMBDAS[self._prev.token_type](self, expressions) 3731 3732 self._retreat(index) 3733 3734 this: t.Optional[exp.Expression] 3735 3736 if self._match(TokenType.DISTINCT): 3737 this = self.expression( 3738 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3739 ) 3740 else: 3741 this = self._parse_select_or_expression(alias=alias) 3742 3743 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3744 3745 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3746 index = self._index 3747 3748 if not self.errors: 3749 try: 3750 if self._parse_select(nested=True): 3751 return this 3752 except ParseError: 3753 pass 3754 finally: 3755 self.errors.clear() 3756 self._retreat(index) 3757 3758 if not self._match(TokenType.L_PAREN): 3759 return this 3760 3761 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3762 3763 self._match_r_paren() 3764 return self.expression(exp.Schema, this=this, expressions=args) 3765 3766 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3767 return self._parse_column_def(self._parse_field(any_token=True)) 3768 3769 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3770 # column defs are not really columns, they're identifiers 3771 if isinstance(this, exp.Column): 3772 this = this.this 3773 3774 kind = self._parse_types(schema=True) 3775 3776 if self._match_text_seq("FOR", "ORDINALITY"): 3777 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3778 3779 constraints: t.List[exp.Expression] = [] 3780 3781 if not kind and self._match(TokenType.ALIAS): 3782 constraints.append( 3783 self.expression( 3784 exp.ComputedColumnConstraint, 3785 this=self._parse_conjunction(), 3786 persisted=self._match_text_seq("PERSISTED"), 3787 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3788 ) 3789 ) 3790 3791 while True: 3792 constraint = self._parse_column_constraint() 3793 if not constraint: 3794 break 3795 constraints.append(constraint) 3796 3797 if not kind and not constraints: 3798 return this 3799 3800 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3801 3802 def _parse_auto_increment( 3803 self, 3804 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3805 start = None 3806 increment = None 3807 3808 if self._match(TokenType.L_PAREN, advance=False): 3809 args = self._parse_wrapped_csv(self._parse_bitwise) 3810 start = seq_get(args, 0) 3811 increment = seq_get(args, 1) 3812 elif self._match_text_seq("START"): 3813 start = self._parse_bitwise() 3814 self._match_text_seq("INCREMENT") 3815 increment = self._parse_bitwise() 3816 3817 if start and increment: 3818 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3819 3820 return exp.AutoIncrementColumnConstraint() 3821 3822 def _parse_compress(self) -> exp.CompressColumnConstraint: 3823 if self._match(TokenType.L_PAREN, advance=False): 3824 return self.expression( 3825 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3826 ) 3827 3828 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3829 3830 def _parse_generated_as_identity( 3831 self, 3832 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3833 if self._match_text_seq("BY", "DEFAULT"): 3834 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3835 this = self.expression( 3836 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3837 ) 3838 else: 3839 self._match_text_seq("ALWAYS") 3840 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3841 3842 self._match(TokenType.ALIAS) 3843 identity = self._match_text_seq("IDENTITY") 3844 3845 if self._match(TokenType.L_PAREN): 3846 if self._match(TokenType.START_WITH): 3847 this.set("start", self._parse_bitwise()) 3848 if self._match_text_seq("INCREMENT", "BY"): 3849 this.set("increment", self._parse_bitwise()) 3850 if self._match_text_seq("MINVALUE"): 3851 this.set("minvalue", self._parse_bitwise()) 3852 if self._match_text_seq("MAXVALUE"): 3853 this.set("maxvalue", self._parse_bitwise()) 3854 3855 if self._match_text_seq("CYCLE"): 3856 this.set("cycle", True) 3857 elif self._match_text_seq("NO", "CYCLE"): 3858 this.set("cycle", False) 3859 3860 if not identity: 3861 this.set("expression", self._parse_bitwise()) 3862 3863 self._match_r_paren() 3864 3865 return this 3866 3867 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3868 self._match_text_seq("LENGTH") 3869 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3870 3871 def _parse_not_constraint( 3872 self, 3873 ) -> t.Optional[exp.Expression]: 3874 if self._match_text_seq("NULL"): 3875 return self.expression(exp.NotNullColumnConstraint) 3876 if self._match_text_seq("CASESPECIFIC"): 3877 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3878 if self._match_text_seq("FOR", "REPLICATION"): 3879 return self.expression(exp.NotForReplicationColumnConstraint) 3880 return None 3881 3882 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3883 if self._match(TokenType.CONSTRAINT): 3884 this = self._parse_id_var() 3885 else: 3886 this = None 3887 3888 if self._match_texts(self.CONSTRAINT_PARSERS): 3889 return self.expression( 3890 exp.ColumnConstraint, 3891 this=this, 3892 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3893 ) 3894 3895 return this 3896 3897 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3898 if not self._match(TokenType.CONSTRAINT): 3899 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3900 3901 this = self._parse_id_var() 3902 expressions = [] 3903 3904 while True: 3905 constraint = self._parse_unnamed_constraint() or self._parse_function() 3906 if not constraint: 3907 break 3908 expressions.append(constraint) 3909 3910 return self.expression(exp.Constraint, this=this, expressions=expressions) 3911 3912 def _parse_unnamed_constraint( 3913 self, constraints: t.Optional[t.Collection[str]] = None 3914 ) -> t.Optional[exp.Expression]: 3915 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3916 constraints or self.CONSTRAINT_PARSERS 3917 ): 3918 return None 3919 3920 constraint = self._prev.text.upper() 3921 if constraint not in self.CONSTRAINT_PARSERS: 3922 self.raise_error(f"No parser found for schema constraint {constraint}.") 3923 3924 return self.CONSTRAINT_PARSERS[constraint](self) 3925 3926 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3927 self._match_text_seq("KEY") 3928 return self.expression( 3929 exp.UniqueColumnConstraint, 3930 this=self._parse_schema(self._parse_id_var(any_token=False)), 3931 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3932 ) 3933 3934 def _parse_key_constraint_options(self) -> t.List[str]: 3935 options = [] 3936 while True: 3937 if not self._curr: 3938 break 3939 3940 if self._match(TokenType.ON): 3941 action = None 3942 on = self._advance_any() and self._prev.text 3943 3944 if self._match_text_seq("NO", "ACTION"): 3945 action = "NO ACTION" 3946 elif self._match_text_seq("CASCADE"): 3947 action = "CASCADE" 3948 elif self._match_text_seq("RESTRICT"): 3949 action = "RESTRICT" 3950 elif self._match_pair(TokenType.SET, TokenType.NULL): 3951 action = "SET NULL" 3952 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3953 action = "SET DEFAULT" 3954 else: 3955 self.raise_error("Invalid key constraint") 3956 3957 options.append(f"ON {on} {action}") 3958 elif self._match_text_seq("NOT", "ENFORCED"): 3959 options.append("NOT ENFORCED") 3960 elif self._match_text_seq("DEFERRABLE"): 3961 options.append("DEFERRABLE") 3962 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3963 options.append("INITIALLY DEFERRED") 3964 elif self._match_text_seq("NORELY"): 3965 options.append("NORELY") 3966 elif self._match_text_seq("MATCH", "FULL"): 3967 options.append("MATCH FULL") 3968 else: 3969 break 3970 3971 return options 3972 3973 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3974 if match and not self._match(TokenType.REFERENCES): 3975 return None 3976 3977 expressions = None 3978 this = self._parse_table(schema=True) 3979 options = self._parse_key_constraint_options() 3980 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3981 3982 def _parse_foreign_key(self) -> exp.ForeignKey: 3983 expressions = self._parse_wrapped_id_vars() 3984 reference = self._parse_references() 3985 options = {} 3986 3987 while self._match(TokenType.ON): 3988 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3989 self.raise_error("Expected DELETE or UPDATE") 3990 3991 kind = self._prev.text.lower() 3992 3993 if self._match_text_seq("NO", "ACTION"): 3994 action = "NO ACTION" 3995 elif self._match(TokenType.SET): 3996 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3997 action = "SET " + self._prev.text.upper() 3998 else: 3999 self._advance() 4000 action = self._prev.text.upper() 4001 4002 options[kind] = action 4003 4004 return self.expression( 4005 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4006 ) 4007 4008 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4009 return self._parse_field() 4010 4011 def _parse_primary_key( 4012 self, wrapped_optional: bool = False, in_props: bool = False 4013 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4014 desc = ( 4015 self._match_set((TokenType.ASC, TokenType.DESC)) 4016 and self._prev.token_type == TokenType.DESC 4017 ) 4018 4019 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4020 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4021 4022 expressions = self._parse_wrapped_csv( 4023 self._parse_primary_key_part, optional=wrapped_optional 4024 ) 4025 options = self._parse_key_constraint_options() 4026 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4027 4028 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4029 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4030 return this 4031 4032 bracket_kind = self._prev.token_type 4033 4034 if self._match(TokenType.COLON): 4035 expressions: t.List[exp.Expression] = [ 4036 self.expression(exp.Slice, expression=self._parse_conjunction()) 4037 ] 4038 else: 4039 expressions = self._parse_csv( 4040 lambda: self._parse_slice( 4041 self._parse_alias(self._parse_conjunction(), explicit=True) 4042 ) 4043 ) 4044 4045 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4046 if bracket_kind == TokenType.L_BRACE: 4047 this = self.expression(exp.Struct, expressions=expressions) 4048 elif not this or this.name.upper() == "ARRAY": 4049 this = self.expression(exp.Array, expressions=expressions) 4050 else: 4051 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4052 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4053 4054 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4055 self.raise_error("Expected ]") 4056 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4057 self.raise_error("Expected }") 4058 4059 self._add_comments(this) 4060 return self._parse_bracket(this) 4061 4062 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4063 if self._match(TokenType.COLON): 4064 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4065 return this 4066 4067 def _parse_case(self) -> t.Optional[exp.Expression]: 4068 ifs = [] 4069 default = None 4070 4071 comments = self._prev_comments 4072 expression = self._parse_conjunction() 4073 4074 while self._match(TokenType.WHEN): 4075 this = self._parse_conjunction() 4076 self._match(TokenType.THEN) 4077 then = self._parse_conjunction() 4078 ifs.append(self.expression(exp.If, this=this, true=then)) 4079 4080 if self._match(TokenType.ELSE): 4081 default = self._parse_conjunction() 4082 4083 if not self._match(TokenType.END): 4084 self.raise_error("Expected END after CASE", self._prev) 4085 4086 return self._parse_window( 4087 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4088 ) 4089 4090 def _parse_if(self) -> t.Optional[exp.Expression]: 4091 if self._match(TokenType.L_PAREN): 4092 args = self._parse_csv(self._parse_conjunction) 4093 this = self.validate_expression(exp.If.from_arg_list(args), args) 4094 self._match_r_paren() 4095 else: 4096 index = self._index - 1 4097 condition = self._parse_conjunction() 4098 4099 if not condition: 4100 self._retreat(index) 4101 return None 4102 4103 self._match(TokenType.THEN) 4104 true = self._parse_conjunction() 4105 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4106 self._match(TokenType.END) 4107 this = self.expression(exp.If, this=condition, true=true, false=false) 4108 4109 return self._parse_window(this) 4110 4111 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4112 if not self._match_text_seq("VALUE", "FOR"): 4113 self._retreat(self._index - 1) 4114 return None 4115 4116 return self.expression( 4117 exp.NextValueFor, 4118 this=self._parse_column(), 4119 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4120 ) 4121 4122 def _parse_extract(self) -> exp.Extract: 4123 this = self._parse_function() or self._parse_var() or self._parse_type() 4124 4125 if self._match(TokenType.FROM): 4126 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4127 4128 if not self._match(TokenType.COMMA): 4129 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4130 4131 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4132 4133 def _parse_any_value(self) -> exp.AnyValue: 4134 this = self._parse_lambda() 4135 is_max = None 4136 having = None 4137 4138 if self._match(TokenType.HAVING): 4139 self._match_texts(("MAX", "MIN")) 4140 is_max = self._prev.text == "MAX" 4141 having = self._parse_column() 4142 4143 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4144 4145 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4146 this = self._parse_conjunction() 4147 4148 if not self._match(TokenType.ALIAS): 4149 if self._match(TokenType.COMMA): 4150 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4151 4152 self.raise_error("Expected AS after CAST") 4153 4154 fmt = None 4155 to = self._parse_types() 4156 4157 if not to: 4158 self.raise_error("Expected TYPE after CAST") 4159 elif isinstance(to, exp.Identifier): 4160 to = exp.DataType.build(to.name, udt=True) 4161 elif to.this == exp.DataType.Type.CHAR: 4162 if self._match(TokenType.CHARACTER_SET): 4163 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4164 elif self._match(TokenType.FORMAT): 4165 fmt_string = self._parse_string() 4166 fmt = self._parse_at_time_zone(fmt_string) 4167 4168 if to.this in exp.DataType.TEMPORAL_TYPES: 4169 this = self.expression( 4170 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4171 this=this, 4172 format=exp.Literal.string( 4173 format_time( 4174 fmt_string.this if fmt_string else "", 4175 self.FORMAT_MAPPING or self.TIME_MAPPING, 4176 self.FORMAT_TRIE or self.TIME_TRIE, 4177 ) 4178 ), 4179 ) 4180 4181 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4182 this.set("zone", fmt.args["zone"]) 4183 4184 return this 4185 4186 return self.expression( 4187 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4188 ) 4189 4190 def _parse_concat(self) -> t.Optional[exp.Expression]: 4191 args = self._parse_csv(self._parse_conjunction) 4192 if self.CONCAT_NULL_OUTPUTS_STRING: 4193 args = self._ensure_string_if_null(args) 4194 4195 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4196 # we find such a call we replace it with its argument. 4197 if len(args) == 1: 4198 return args[0] 4199 4200 return self.expression( 4201 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4202 ) 4203 4204 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4205 args = self._parse_csv(self._parse_conjunction) 4206 if len(args) < 2: 4207 return self.expression(exp.ConcatWs, expressions=args) 4208 delim, *values = args 4209 if self.CONCAT_NULL_OUTPUTS_STRING: 4210 values = self._ensure_string_if_null(values) 4211 4212 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4213 4214 def _parse_string_agg(self) -> exp.Expression: 4215 if self._match(TokenType.DISTINCT): 4216 args: t.List[t.Optional[exp.Expression]] = [ 4217 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4218 ] 4219 if self._match(TokenType.COMMA): 4220 args.extend(self._parse_csv(self._parse_conjunction)) 4221 else: 4222 args = self._parse_csv(self._parse_conjunction) # type: ignore 4223 4224 index = self._index 4225 if not self._match(TokenType.R_PAREN) and args: 4226 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4227 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4228 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4229 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4230 4231 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4232 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4233 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4234 if not self._match_text_seq("WITHIN", "GROUP"): 4235 self._retreat(index) 4236 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4237 4238 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4239 order = self._parse_order(this=seq_get(args, 0)) 4240 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4241 4242 def _parse_convert( 4243 self, strict: bool, safe: t.Optional[bool] = None 4244 ) -> t.Optional[exp.Expression]: 4245 this = self._parse_bitwise() 4246 4247 if self._match(TokenType.USING): 4248 to: t.Optional[exp.Expression] = self.expression( 4249 exp.CharacterSet, this=self._parse_var() 4250 ) 4251 elif self._match(TokenType.COMMA): 4252 to = self._parse_types() 4253 else: 4254 to = None 4255 4256 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4257 4258 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4259 """ 4260 There are generally two variants of the DECODE function: 4261 4262 - DECODE(bin, charset) 4263 - DECODE(expression, search, result [, search, result] ... [, default]) 4264 4265 The second variant will always be parsed into a CASE expression. Note that NULL 4266 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4267 instead of relying on pattern matching. 4268 """ 4269 args = self._parse_csv(self._parse_conjunction) 4270 4271 if len(args) < 3: 4272 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4273 4274 expression, *expressions = args 4275 if not expression: 4276 return None 4277 4278 ifs = [] 4279 for search, result in zip(expressions[::2], expressions[1::2]): 4280 if not search or not result: 4281 return None 4282 4283 if isinstance(search, exp.Literal): 4284 ifs.append( 4285 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4286 ) 4287 elif isinstance(search, exp.Null): 4288 ifs.append( 4289 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4290 ) 4291 else: 4292 cond = exp.or_( 4293 exp.EQ(this=expression.copy(), expression=search), 4294 exp.and_( 4295 exp.Is(this=expression.copy(), expression=exp.Null()), 4296 exp.Is(this=search.copy(), expression=exp.Null()), 4297 copy=False, 4298 ), 4299 copy=False, 4300 ) 4301 ifs.append(exp.If(this=cond, true=result)) 4302 4303 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4304 4305 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4306 self._match_text_seq("KEY") 4307 key = self._parse_column() 4308 self._match_set((TokenType.COLON, TokenType.COMMA)) 4309 self._match_text_seq("VALUE") 4310 value = self._parse_bitwise() 4311 4312 if not key and not value: 4313 return None 4314 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4315 4316 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4317 if not this or not self._match_text_seq("FORMAT", "JSON"): 4318 return this 4319 4320 return self.expression(exp.FormatJson, this=this) 4321 4322 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4323 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4324 for value in values: 4325 if self._match_text_seq(value, "ON", on): 4326 return f"{value} ON {on}" 4327 4328 return None 4329 4330 def _parse_json_object(self) -> exp.JSONObject: 4331 star = self._parse_star() 4332 expressions = ( 4333 [star] 4334 if star 4335 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4336 ) 4337 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4338 4339 unique_keys = None 4340 if self._match_text_seq("WITH", "UNIQUE"): 4341 unique_keys = True 4342 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4343 unique_keys = False 4344 4345 self._match_text_seq("KEYS") 4346 4347 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4348 self._parse_type() 4349 ) 4350 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4351 4352 return self.expression( 4353 exp.JSONObject, 4354 expressions=expressions, 4355 null_handling=null_handling, 4356 unique_keys=unique_keys, 4357 return_type=return_type, 4358 encoding=encoding, 4359 ) 4360 4361 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4362 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4363 if not self._match_text_seq("NESTED"): 4364 this = self._parse_id_var() 4365 kind = self._parse_types(allow_identifiers=False) 4366 nested = None 4367 else: 4368 this = None 4369 kind = None 4370 nested = True 4371 4372 path = self._match_text_seq("PATH") and self._parse_string() 4373 nested_schema = nested and self._parse_json_schema() 4374 4375 return self.expression( 4376 exp.JSONColumnDef, 4377 this=this, 4378 kind=kind, 4379 path=path, 4380 nested_schema=nested_schema, 4381 ) 4382 4383 def _parse_json_schema(self) -> exp.JSONSchema: 4384 self._match_text_seq("COLUMNS") 4385 return self.expression( 4386 exp.JSONSchema, 4387 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4388 ) 4389 4390 def _parse_json_table(self) -> exp.JSONTable: 4391 this = self._parse_format_json(self._parse_bitwise()) 4392 path = self._match(TokenType.COMMA) and self._parse_string() 4393 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4394 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4395 schema = self._parse_json_schema() 4396 4397 return exp.JSONTable( 4398 this=this, 4399 schema=schema, 4400 path=path, 4401 error_handling=error_handling, 4402 empty_handling=empty_handling, 4403 ) 4404 4405 def _parse_logarithm(self) -> exp.Func: 4406 # Default argument order is base, expression 4407 args = self._parse_csv(self._parse_range) 4408 4409 if len(args) > 1: 4410 if not self.LOG_BASE_FIRST: 4411 args.reverse() 4412 return exp.Log.from_arg_list(args) 4413 4414 return self.expression( 4415 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4416 ) 4417 4418 def _parse_match_against(self) -> exp.MatchAgainst: 4419 expressions = self._parse_csv(self._parse_column) 4420 4421 self._match_text_seq(")", "AGAINST", "(") 4422 4423 this = self._parse_string() 4424 4425 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4426 modifier = "IN NATURAL LANGUAGE MODE" 4427 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4428 modifier = f"{modifier} WITH QUERY EXPANSION" 4429 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4430 modifier = "IN BOOLEAN MODE" 4431 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4432 modifier = "WITH QUERY EXPANSION" 4433 else: 4434 modifier = None 4435 4436 return self.expression( 4437 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4438 ) 4439 4440 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4441 def _parse_open_json(self) -> exp.OpenJSON: 4442 this = self._parse_bitwise() 4443 path = self._match(TokenType.COMMA) and self._parse_string() 4444 4445 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4446 this = self._parse_field(any_token=True) 4447 kind = self._parse_types() 4448 path = self._parse_string() 4449 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4450 4451 return self.expression( 4452 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4453 ) 4454 4455 expressions = None 4456 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4457 self._match_l_paren() 4458 expressions = self._parse_csv(_parse_open_json_column_def) 4459 4460 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4461 4462 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4463 args = self._parse_csv(self._parse_bitwise) 4464 4465 if self._match(TokenType.IN): 4466 return self.expression( 4467 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4468 ) 4469 4470 if haystack_first: 4471 haystack = seq_get(args, 0) 4472 needle = seq_get(args, 1) 4473 else: 4474 needle = seq_get(args, 0) 4475 haystack = seq_get(args, 1) 4476 4477 return self.expression( 4478 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4479 ) 4480 4481 def _parse_predict(self) -> exp.Predict: 4482 self._match_text_seq("MODEL") 4483 this = self._parse_table() 4484 4485 self._match(TokenType.COMMA) 4486 self._match_text_seq("TABLE") 4487 4488 return self.expression( 4489 exp.Predict, 4490 this=this, 4491 expression=self._parse_table(), 4492 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4493 ) 4494 4495 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4496 args = self._parse_csv(self._parse_table) 4497 return exp.JoinHint(this=func_name.upper(), expressions=args) 4498 4499 def _parse_substring(self) -> exp.Substring: 4500 # Postgres supports the form: substring(string [from int] [for int]) 4501 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4502 4503 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4504 4505 if self._match(TokenType.FROM): 4506 args.append(self._parse_bitwise()) 4507 if self._match(TokenType.FOR): 4508 args.append(self._parse_bitwise()) 4509 4510 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4511 4512 def _parse_trim(self) -> exp.Trim: 4513 # https://www.w3resource.com/sql/character-functions/trim.php 4514 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4515 4516 position = None 4517 collation = None 4518 expression = None 4519 4520 if self._match_texts(self.TRIM_TYPES): 4521 position = self._prev.text.upper() 4522 4523 this = self._parse_bitwise() 4524 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4525 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4526 expression = self._parse_bitwise() 4527 4528 if invert_order: 4529 this, expression = expression, this 4530 4531 if self._match(TokenType.COLLATE): 4532 collation = self._parse_bitwise() 4533 4534 return self.expression( 4535 exp.Trim, this=this, position=position, expression=expression, collation=collation 4536 ) 4537 4538 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4539 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4540 4541 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4542 return self._parse_window(self._parse_id_var(), alias=True) 4543 4544 def _parse_respect_or_ignore_nulls( 4545 self, this: t.Optional[exp.Expression] 4546 ) -> t.Optional[exp.Expression]: 4547 if self._match_text_seq("IGNORE", "NULLS"): 4548 return self.expression(exp.IgnoreNulls, this=this) 4549 if self._match_text_seq("RESPECT", "NULLS"): 4550 return self.expression(exp.RespectNulls, this=this) 4551 return this 4552 4553 def _parse_window( 4554 self, this: t.Optional[exp.Expression], alias: bool = False 4555 ) -> t.Optional[exp.Expression]: 4556 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4557 self._match(TokenType.WHERE) 4558 this = self.expression( 4559 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4560 ) 4561 self._match_r_paren() 4562 4563 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4564 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4565 if self._match_text_seq("WITHIN", "GROUP"): 4566 order = self._parse_wrapped(self._parse_order) 4567 this = self.expression(exp.WithinGroup, this=this, expression=order) 4568 4569 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4570 # Some dialects choose to implement and some do not. 4571 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4572 4573 # There is some code above in _parse_lambda that handles 4574 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4575 4576 # The below changes handle 4577 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4578 4579 # Oracle allows both formats 4580 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4581 # and Snowflake chose to do the same for familiarity 4582 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4583 this = self._parse_respect_or_ignore_nulls(this) 4584 4585 # bigquery select from window x AS (partition by ...) 4586 if alias: 4587 over = None 4588 self._match(TokenType.ALIAS) 4589 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4590 return this 4591 else: 4592 over = self._prev.text.upper() 4593 4594 if not self._match(TokenType.L_PAREN): 4595 return self.expression( 4596 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4597 ) 4598 4599 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4600 4601 first = self._match(TokenType.FIRST) 4602 if self._match_text_seq("LAST"): 4603 first = False 4604 4605 partition, order = self._parse_partition_and_order() 4606 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4607 4608 if kind: 4609 self._match(TokenType.BETWEEN) 4610 start = self._parse_window_spec() 4611 self._match(TokenType.AND) 4612 end = self._parse_window_spec() 4613 4614 spec = self.expression( 4615 exp.WindowSpec, 4616 kind=kind, 4617 start=start["value"], 4618 start_side=start["side"], 4619 end=end["value"], 4620 end_side=end["side"], 4621 ) 4622 else: 4623 spec = None 4624 4625 self._match_r_paren() 4626 4627 window = self.expression( 4628 exp.Window, 4629 this=this, 4630 partition_by=partition, 4631 order=order, 4632 spec=spec, 4633 alias=window_alias, 4634 over=over, 4635 first=first, 4636 ) 4637 4638 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4639 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4640 return self._parse_window(window, alias=alias) 4641 4642 return window 4643 4644 def _parse_partition_and_order( 4645 self, 4646 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4647 return self._parse_partition_by(), self._parse_order() 4648 4649 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4650 self._match(TokenType.BETWEEN) 4651 4652 return { 4653 "value": ( 4654 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4655 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4656 or self._parse_bitwise() 4657 ), 4658 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4659 } 4660 4661 def _parse_alias( 4662 self, this: t.Optional[exp.Expression], explicit: bool = False 4663 ) -> t.Optional[exp.Expression]: 4664 any_token = self._match(TokenType.ALIAS) 4665 4666 if explicit and not any_token: 4667 return this 4668 4669 if self._match(TokenType.L_PAREN): 4670 aliases = self.expression( 4671 exp.Aliases, 4672 this=this, 4673 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4674 ) 4675 self._match_r_paren(aliases) 4676 return aliases 4677 4678 alias = self._parse_id_var(any_token) 4679 4680 if alias: 4681 return self.expression(exp.Alias, this=this, alias=alias) 4682 4683 return this 4684 4685 def _parse_id_var( 4686 self, 4687 any_token: bool = True, 4688 tokens: t.Optional[t.Collection[TokenType]] = None, 4689 ) -> t.Optional[exp.Expression]: 4690 identifier = self._parse_identifier() 4691 4692 if identifier: 4693 return identifier 4694 4695 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4696 quoted = self._prev.token_type == TokenType.STRING 4697 return exp.Identifier(this=self._prev.text, quoted=quoted) 4698 4699 return None 4700 4701 def _parse_string(self) -> t.Optional[exp.Expression]: 4702 if self._match(TokenType.STRING): 4703 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4704 return self._parse_placeholder() 4705 4706 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4707 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4708 4709 def _parse_number(self) -> t.Optional[exp.Expression]: 4710 if self._match(TokenType.NUMBER): 4711 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4712 return self._parse_placeholder() 4713 4714 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4715 if self._match(TokenType.IDENTIFIER): 4716 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4717 return self._parse_placeholder() 4718 4719 def _parse_var( 4720 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4721 ) -> t.Optional[exp.Expression]: 4722 if ( 4723 (any_token and self._advance_any()) 4724 or self._match(TokenType.VAR) 4725 or (self._match_set(tokens) if tokens else False) 4726 ): 4727 return self.expression(exp.Var, this=self._prev.text) 4728 return self._parse_placeholder() 4729 4730 def _advance_any(self) -> t.Optional[Token]: 4731 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4732 self._advance() 4733 return self._prev 4734 return None 4735 4736 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4737 return self._parse_var() or self._parse_string() 4738 4739 def _parse_null(self) -> t.Optional[exp.Expression]: 4740 if self._match_set(self.NULL_TOKENS): 4741 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4742 return self._parse_placeholder() 4743 4744 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4745 if self._match(TokenType.TRUE): 4746 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4747 if self._match(TokenType.FALSE): 4748 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4749 return self._parse_placeholder() 4750 4751 def _parse_star(self) -> t.Optional[exp.Expression]: 4752 if self._match(TokenType.STAR): 4753 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4754 return self._parse_placeholder() 4755 4756 def _parse_parameter(self) -> exp.Parameter: 4757 wrapped = self._match(TokenType.L_BRACE) 4758 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4759 self._match(TokenType.R_BRACE) 4760 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4761 4762 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4763 if self._match_set(self.PLACEHOLDER_PARSERS): 4764 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4765 if placeholder: 4766 return placeholder 4767 self._advance(-1) 4768 return None 4769 4770 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4771 if not self._match(TokenType.EXCEPT): 4772 return None 4773 if self._match(TokenType.L_PAREN, advance=False): 4774 return self._parse_wrapped_csv(self._parse_column) 4775 4776 except_column = self._parse_column() 4777 return [except_column] if except_column else None 4778 4779 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4780 if not self._match(TokenType.REPLACE): 4781 return None 4782 if self._match(TokenType.L_PAREN, advance=False): 4783 return self._parse_wrapped_csv(self._parse_expression) 4784 4785 replace_expression = self._parse_expression() 4786 return [replace_expression] if replace_expression else None 4787 4788 def _parse_csv( 4789 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4790 ) -> t.List[exp.Expression]: 4791 parse_result = parse_method() 4792 items = [parse_result] if parse_result is not None else [] 4793 4794 while self._match(sep): 4795 self._add_comments(parse_result) 4796 parse_result = parse_method() 4797 if parse_result is not None: 4798 items.append(parse_result) 4799 4800 return items 4801 4802 def _parse_tokens( 4803 self, parse_method: t.Callable, expressions: t.Dict 4804 ) -> t.Optional[exp.Expression]: 4805 this = parse_method() 4806 4807 while self._match_set(expressions): 4808 this = self.expression( 4809 expressions[self._prev.token_type], 4810 this=this, 4811 comments=self._prev_comments, 4812 expression=parse_method(), 4813 ) 4814 4815 return this 4816 4817 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4818 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4819 4820 def _parse_wrapped_csv( 4821 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4822 ) -> t.List[exp.Expression]: 4823 return self._parse_wrapped( 4824 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4825 ) 4826 4827 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4828 wrapped = self._match(TokenType.L_PAREN) 4829 if not wrapped and not optional: 4830 self.raise_error("Expecting (") 4831 parse_result = parse_method() 4832 if wrapped: 4833 self._match_r_paren() 4834 return parse_result 4835 4836 def _parse_expressions(self) -> t.List[exp.Expression]: 4837 return self._parse_csv(self._parse_expression) 4838 4839 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4840 return self._parse_select() or self._parse_set_operations( 4841 self._parse_expression() if alias else self._parse_conjunction() 4842 ) 4843 4844 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4845 return self._parse_query_modifiers( 4846 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4847 ) 4848 4849 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4850 this = None 4851 if self._match_texts(self.TRANSACTION_KIND): 4852 this = self._prev.text 4853 4854 self._match_texts({"TRANSACTION", "WORK"}) 4855 4856 modes = [] 4857 while True: 4858 mode = [] 4859 while self._match(TokenType.VAR): 4860 mode.append(self._prev.text) 4861 4862 if mode: 4863 modes.append(" ".join(mode)) 4864 if not self._match(TokenType.COMMA): 4865 break 4866 4867 return self.expression(exp.Transaction, this=this, modes=modes) 4868 4869 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4870 chain = None 4871 savepoint = None 4872 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4873 4874 self._match_texts({"TRANSACTION", "WORK"}) 4875 4876 if self._match_text_seq("TO"): 4877 self._match_text_seq("SAVEPOINT") 4878 savepoint = self._parse_id_var() 4879 4880 if self._match(TokenType.AND): 4881 chain = not self._match_text_seq("NO") 4882 self._match_text_seq("CHAIN") 4883 4884 if is_rollback: 4885 return self.expression(exp.Rollback, savepoint=savepoint) 4886 4887 return self.expression(exp.Commit, chain=chain) 4888 4889 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4890 if not self._match_text_seq("ADD"): 4891 return None 4892 4893 self._match(TokenType.COLUMN) 4894 exists_column = self._parse_exists(not_=True) 4895 expression = self._parse_field_def() 4896 4897 if expression: 4898 expression.set("exists", exists_column) 4899 4900 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4901 if self._match_texts(("FIRST", "AFTER")): 4902 position = self._prev.text 4903 column_position = self.expression( 4904 exp.ColumnPosition, this=self._parse_column(), position=position 4905 ) 4906 expression.set("position", column_position) 4907 4908 return expression 4909 4910 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4911 drop = self._match(TokenType.DROP) and self._parse_drop() 4912 if drop and not isinstance(drop, exp.Command): 4913 drop.set("kind", drop.args.get("kind", "COLUMN")) 4914 return drop 4915 4916 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4917 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4918 return self.expression( 4919 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4920 ) 4921 4922 def _parse_add_constraint(self) -> exp.AddConstraint: 4923 this = None 4924 kind = self._prev.token_type 4925 4926 if kind == TokenType.CONSTRAINT: 4927 this = self._parse_id_var() 4928 4929 if self._match_text_seq("CHECK"): 4930 expression = self._parse_wrapped(self._parse_conjunction) 4931 enforced = self._match_text_seq("ENFORCED") 4932 4933 return self.expression( 4934 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4935 ) 4936 4937 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4938 expression = self._parse_foreign_key() 4939 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4940 expression = self._parse_primary_key() 4941 else: 4942 expression = None 4943 4944 return self.expression(exp.AddConstraint, this=this, expression=expression) 4945 4946 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4947 index = self._index - 1 4948 4949 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4950 return self._parse_csv(self._parse_add_constraint) 4951 4952 self._retreat(index) 4953 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4954 return self._parse_csv(self._parse_field_def) 4955 4956 return self._parse_csv(self._parse_add_column) 4957 4958 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4959 self._match(TokenType.COLUMN) 4960 column = self._parse_field(any_token=True) 4961 4962 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4963 return self.expression(exp.AlterColumn, this=column, drop=True) 4964 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4965 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4966 4967 self._match_text_seq("SET", "DATA") 4968 return self.expression( 4969 exp.AlterColumn, 4970 this=column, 4971 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4972 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4973 using=self._match(TokenType.USING) and self._parse_conjunction(), 4974 ) 4975 4976 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4977 index = self._index - 1 4978 4979 partition_exists = self._parse_exists() 4980 if self._match(TokenType.PARTITION, advance=False): 4981 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4982 4983 self._retreat(index) 4984 return self._parse_csv(self._parse_drop_column) 4985 4986 def _parse_alter_table_rename(self) -> exp.RenameTable: 4987 self._match_text_seq("TO") 4988 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4989 4990 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4991 start = self._prev 4992 4993 if not self._match(TokenType.TABLE): 4994 return self._parse_as_command(start) 4995 4996 exists = self._parse_exists() 4997 only = self._match_text_seq("ONLY") 4998 this = self._parse_table(schema=True) 4999 5000 if self._next: 5001 self._advance() 5002 5003 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5004 if parser: 5005 actions = ensure_list(parser(self)) 5006 5007 if not self._curr: 5008 return self.expression( 5009 exp.AlterTable, 5010 this=this, 5011 exists=exists, 5012 actions=actions, 5013 only=only, 5014 ) 5015 5016 return self._parse_as_command(start) 5017 5018 def _parse_merge(self) -> exp.Merge: 5019 self._match(TokenType.INTO) 5020 target = self._parse_table() 5021 5022 if target and self._match(TokenType.ALIAS, advance=False): 5023 target.set("alias", self._parse_table_alias()) 5024 5025 self._match(TokenType.USING) 5026 using = self._parse_table() 5027 5028 self._match(TokenType.ON) 5029 on = self._parse_conjunction() 5030 5031 return self.expression( 5032 exp.Merge, 5033 this=target, 5034 using=using, 5035 on=on, 5036 expressions=self._parse_when_matched(), 5037 ) 5038 5039 def _parse_when_matched(self) -> t.List[exp.When]: 5040 whens = [] 5041 5042 while self._match(TokenType.WHEN): 5043 matched = not self._match(TokenType.NOT) 5044 self._match_text_seq("MATCHED") 5045 source = ( 5046 False 5047 if self._match_text_seq("BY", "TARGET") 5048 else self._match_text_seq("BY", "SOURCE") 5049 ) 5050 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5051 5052 self._match(TokenType.THEN) 5053 5054 if self._match(TokenType.INSERT): 5055 _this = self._parse_star() 5056 if _this: 5057 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5058 else: 5059 then = self.expression( 5060 exp.Insert, 5061 this=self._parse_value(), 5062 expression=self._match(TokenType.VALUES) and self._parse_value(), 5063 ) 5064 elif self._match(TokenType.UPDATE): 5065 expressions = self._parse_star() 5066 if expressions: 5067 then = self.expression(exp.Update, expressions=expressions) 5068 else: 5069 then = self.expression( 5070 exp.Update, 5071 expressions=self._match(TokenType.SET) 5072 and self._parse_csv(self._parse_equality), 5073 ) 5074 elif self._match(TokenType.DELETE): 5075 then = self.expression(exp.Var, this=self._prev.text) 5076 else: 5077 then = None 5078 5079 whens.append( 5080 self.expression( 5081 exp.When, 5082 matched=matched, 5083 source=source, 5084 condition=condition, 5085 then=then, 5086 ) 5087 ) 5088 return whens 5089 5090 def _parse_show(self) -> t.Optional[exp.Expression]: 5091 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5092 if parser: 5093 return parser(self) 5094 return self._parse_as_command(self._prev) 5095 5096 def _parse_set_item_assignment( 5097 self, kind: t.Optional[str] = None 5098 ) -> t.Optional[exp.Expression]: 5099 index = self._index 5100 5101 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5102 return self._parse_set_transaction(global_=kind == "GLOBAL") 5103 5104 left = self._parse_primary() or self._parse_id_var() 5105 assignment_delimiter = self._match_texts(("=", "TO")) 5106 5107 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5108 self._retreat(index) 5109 return None 5110 5111 right = self._parse_statement() or self._parse_id_var() 5112 this = self.expression(exp.EQ, this=left, expression=right) 5113 5114 return self.expression(exp.SetItem, this=this, kind=kind) 5115 5116 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5117 self._match_text_seq("TRANSACTION") 5118 characteristics = self._parse_csv( 5119 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5120 ) 5121 return self.expression( 5122 exp.SetItem, 5123 expressions=characteristics, 5124 kind="TRANSACTION", 5125 **{"global": global_}, # type: ignore 5126 ) 5127 5128 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5129 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5130 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5131 5132 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5133 index = self._index 5134 set_ = self.expression( 5135 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5136 ) 5137 5138 if self._curr: 5139 self._retreat(index) 5140 return self._parse_as_command(self._prev) 5141 5142 return set_ 5143 5144 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5145 for option in options: 5146 if self._match_text_seq(*option.split(" ")): 5147 return exp.var(option) 5148 return None 5149 5150 def _parse_as_command(self, start: Token) -> exp.Command: 5151 while self._curr: 5152 self._advance() 5153 text = self._find_sql(start, self._prev) 5154 size = len(start.text) 5155 return exp.Command(this=text[:size], expression=text[size:]) 5156 5157 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5158 settings = [] 5159 5160 self._match_l_paren() 5161 kind = self._parse_id_var() 5162 5163 if self._match(TokenType.L_PAREN): 5164 while True: 5165 key = self._parse_id_var() 5166 value = self._parse_primary() 5167 5168 if not key and value is None: 5169 break 5170 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5171 self._match(TokenType.R_PAREN) 5172 5173 self._match_r_paren() 5174 5175 return self.expression( 5176 exp.DictProperty, 5177 this=this, 5178 kind=kind.this if kind else None, 5179 settings=settings, 5180 ) 5181 5182 def _parse_dict_range(self, this: str) -> exp.DictRange: 5183 self._match_l_paren() 5184 has_min = self._match_text_seq("MIN") 5185 if has_min: 5186 min = self._parse_var() or self._parse_primary() 5187 self._match_text_seq("MAX") 5188 max = self._parse_var() or self._parse_primary() 5189 else: 5190 max = self._parse_var() or self._parse_primary() 5191 min = exp.Literal.number(0) 5192 self._match_r_paren() 5193 return self.expression(exp.DictRange, this=this, min=min, max=max) 5194 5195 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5196 index = self._index 5197 expression = self._parse_column() 5198 if not self._match(TokenType.IN): 5199 self._retreat(index - 1) 5200 return None 5201 iterator = self._parse_column() 5202 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5203 return self.expression( 5204 exp.Comprehension, 5205 this=this, 5206 expression=expression, 5207 iterator=iterator, 5208 condition=condition, 5209 ) 5210 5211 def _find_parser( 5212 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5213 ) -> t.Optional[t.Callable]: 5214 if not self._curr: 5215 return None 5216 5217 index = self._index 5218 this = [] 5219 while True: 5220 # The current token might be multiple words 5221 curr = self._curr.text.upper() 5222 key = curr.split(" ") 5223 this.append(curr) 5224 5225 self._advance() 5226 result, trie = in_trie(trie, key) 5227 if result == TrieResult.FAILED: 5228 break 5229 5230 if result == TrieResult.EXISTS: 5231 subparser = parsers[" ".join(this)] 5232 return subparser 5233 5234 self._retreat(index) 5235 return None 5236 5237 def _match(self, token_type, advance=True, expression=None): 5238 if not self._curr: 5239 return None 5240 5241 if self._curr.token_type == token_type: 5242 if advance: 5243 self._advance() 5244 self._add_comments(expression) 5245 return True 5246 5247 return None 5248 5249 def _match_set(self, types, advance=True): 5250 if not self._curr: 5251 return None 5252 5253 if self._curr.token_type in types: 5254 if advance: 5255 self._advance() 5256 return True 5257 5258 return None 5259 5260 def _match_pair(self, token_type_a, token_type_b, advance=True): 5261 if not self._curr or not self._next: 5262 return None 5263 5264 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5265 if advance: 5266 self._advance(2) 5267 return True 5268 5269 return None 5270 5271 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5272 if not self._match(TokenType.L_PAREN, expression=expression): 5273 self.raise_error("Expecting (") 5274 5275 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5276 if not self._match(TokenType.R_PAREN, expression=expression): 5277 self.raise_error("Expecting )") 5278 5279 def _match_texts(self, texts, advance=True): 5280 if self._curr and self._curr.text.upper() in texts: 5281 if advance: 5282 self._advance() 5283 return True 5284 return False 5285 5286 def _match_text_seq(self, *texts, advance=True): 5287 index = self._index 5288 for text in texts: 5289 if self._curr and self._curr.text.upper() == text: 5290 self._advance() 5291 else: 5292 self._retreat(index) 5293 return False 5294 5295 if not advance: 5296 self._retreat(index) 5297 5298 return True 5299 5300 @t.overload 5301 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5302 ... 5303 5304 @t.overload 5305 def _replace_columns_with_dots( 5306 self, this: t.Optional[exp.Expression] 5307 ) -> t.Optional[exp.Expression]: 5308 ... 5309 5310 def _replace_columns_with_dots(self, this): 5311 if isinstance(this, exp.Dot): 5312 exp.replace_children(this, self._replace_columns_with_dots) 5313 elif isinstance(this, exp.Column): 5314 exp.replace_children(this, self._replace_columns_with_dots) 5315 table = this.args.get("table") 5316 this = ( 5317 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5318 ) 5319 5320 return this 5321 5322 def _replace_lambda( 5323 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5324 ) -> t.Optional[exp.Expression]: 5325 if not node: 5326 return node 5327 5328 for column in node.find_all(exp.Column): 5329 if column.parts[0].name in lambda_variables: 5330 dot_or_id = column.to_dot() if column.table else column.this 5331 parent = column.parent 5332 5333 while isinstance(parent, exp.Dot): 5334 if not isinstance(parent.parent, exp.Dot): 5335 parent.replace(dot_or_id) 5336 break 5337 parent = parent.parent 5338 else: 5339 if column is node: 5340 node = dot_or_id 5341 else: 5342 column.replace(dot_or_id) 5343 return node 5344 5345 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5346 return [ 5347 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5348 for value in values 5349 if value 5350 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARACTER SET": lambda self: self._parse_character_set(), 634 "CHECKSUM": lambda self: self._parse_checksum(), 635 "CLUSTER BY": lambda self: self._parse_cluster(), 636 "CLUSTERED": lambda self: self._parse_clustered_by(), 637 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 638 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 639 "COPY": lambda self: self._parse_copy_property(), 640 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 641 "DEFINER": lambda self: self._parse_definer(), 642 "DETERMINISTIC": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 644 ), 645 "DISTKEY": lambda self: self._parse_distkey(), 646 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 647 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 648 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 649 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 650 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 651 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "FREESPACE": lambda self: self._parse_freespace(), 653 "HEAP": lambda self: self.expression(exp.HeapProperty), 654 "IMMUTABLE": lambda self: self.expression( 655 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 656 ), 657 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 658 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 659 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 660 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 661 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 662 "LIKE": lambda self: self._parse_create_like(), 663 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 664 "LOCK": lambda self: self._parse_locking(), 665 "LOCKING": lambda self: self._parse_locking(), 666 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 667 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 668 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 669 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 670 "NO": lambda self: self._parse_no_property(), 671 "ON": lambda self: self._parse_on_property(), 672 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 673 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 674 "PARTITION BY": lambda self: self._parse_partitioned_by(), 675 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 676 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 677 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 678 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 679 "REMOTE": lambda self: self._parse_remote_with_connection(), 680 "RETURNS": lambda self: self._parse_returns(), 681 "ROW": lambda self: self._parse_row(), 682 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 683 "SAMPLE": lambda self: self.expression( 684 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 685 ), 686 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 687 "SETTINGS": lambda self: self.expression( 688 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 689 ), 690 "SORTKEY": lambda self: self._parse_sortkey(), 691 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 692 "STABLE": lambda self: self.expression( 693 exp.StabilityProperty, this=exp.Literal.string("STABLE") 694 ), 695 "STORED": lambda self: self._parse_stored(), 696 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 697 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 698 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 699 "TO": lambda self: self._parse_to_table(), 700 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 701 "TRANSFORM": lambda self: self.expression( 702 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 703 ), 704 "TTL": lambda self: self._parse_ttl(), 705 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 706 "VOLATILE": lambda self: self._parse_volatile_property(), 707 "WITH": lambda self: self._parse_with_property(), 708 } 709 710 CONSTRAINT_PARSERS = { 711 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 712 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 713 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 714 "CHARACTER SET": lambda self: self.expression( 715 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 716 ), 717 "CHECK": lambda self: self.expression( 718 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 719 ), 720 "COLLATE": lambda self: self.expression( 721 exp.CollateColumnConstraint, this=self._parse_var() 722 ), 723 "COMMENT": lambda self: self.expression( 724 exp.CommentColumnConstraint, this=self._parse_string() 725 ), 726 "COMPRESS": lambda self: self._parse_compress(), 727 "CLUSTERED": lambda self: self.expression( 728 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 729 ), 730 "NONCLUSTERED": lambda self: self.expression( 731 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 732 ), 733 "DEFAULT": lambda self: self.expression( 734 exp.DefaultColumnConstraint, this=self._parse_bitwise() 735 ), 736 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 737 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 738 "FORMAT": lambda self: self.expression( 739 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 740 ), 741 "GENERATED": lambda self: self._parse_generated_as_identity(), 742 "IDENTITY": lambda self: self._parse_auto_increment(), 743 "INLINE": lambda self: self._parse_inline(), 744 "LIKE": lambda self: self._parse_create_like(), 745 "NOT": lambda self: self._parse_not_constraint(), 746 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 747 "ON": lambda self: ( 748 self._match(TokenType.UPDATE) 749 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 750 ) 751 or self.expression(exp.OnProperty, this=self._parse_id_var()), 752 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 753 "PRIMARY KEY": lambda self: self._parse_primary_key(), 754 "REFERENCES": lambda self: self._parse_references(match=False), 755 "TITLE": lambda self: self.expression( 756 exp.TitleColumnConstraint, this=self._parse_var_or_string() 757 ), 758 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 759 "UNIQUE": lambda self: self._parse_unique(), 760 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 761 "WITH": lambda self: self.expression( 762 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 763 ), 764 } 765 766 ALTER_PARSERS = { 767 "ADD": lambda self: self._parse_alter_table_add(), 768 "ALTER": lambda self: self._parse_alter_table_alter(), 769 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 770 "DROP": lambda self: self._parse_alter_table_drop(), 771 "RENAME": lambda self: self._parse_alter_table_rename(), 772 } 773 774 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 775 776 NO_PAREN_FUNCTION_PARSERS = { 777 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 778 "CASE": lambda self: self._parse_case(), 779 "IF": lambda self: self._parse_if(), 780 "NEXT": lambda self: self._parse_next_value_for(), 781 } 782 783 INVALID_FUNC_NAME_TOKENS = { 784 TokenType.IDENTIFIER, 785 TokenType.STRING, 786 } 787 788 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 789 790 FUNCTION_PARSERS = { 791 "ANY_VALUE": lambda self: self._parse_any_value(), 792 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 793 "CONCAT": lambda self: self._parse_concat(), 794 "CONCAT_WS": lambda self: self._parse_concat_ws(), 795 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 796 "DECODE": lambda self: self._parse_decode(), 797 "EXTRACT": lambda self: self._parse_extract(), 798 "JSON_OBJECT": lambda self: self._parse_json_object(), 799 "JSON_TABLE": lambda self: self._parse_json_table(), 800 "LOG": lambda self: self._parse_logarithm(), 801 "MATCH": lambda self: self._parse_match_against(), 802 "OPENJSON": lambda self: self._parse_open_json(), 803 "POSITION": lambda self: self._parse_position(), 804 "PREDICT": lambda self: self._parse_predict(), 805 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 806 "STRING_AGG": lambda self: self._parse_string_agg(), 807 "SUBSTRING": lambda self: self._parse_substring(), 808 "TRIM": lambda self: self._parse_trim(), 809 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 810 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 811 } 812 813 QUERY_MODIFIER_PARSERS = { 814 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 815 TokenType.WHERE: lambda self: ("where", self._parse_where()), 816 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 817 TokenType.HAVING: lambda self: ("having", self._parse_having()), 818 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 819 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 820 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 821 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 822 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 823 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 824 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 825 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 826 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 827 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 828 TokenType.CLUSTER_BY: lambda self: ( 829 "cluster", 830 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 831 ), 832 TokenType.DISTRIBUTE_BY: lambda self: ( 833 "distribute", 834 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 835 ), 836 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 837 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 838 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 839 } 840 841 SET_PARSERS = { 842 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 843 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 844 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 845 "TRANSACTION": lambda self: self._parse_set_transaction(), 846 } 847 848 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 849 850 TYPE_LITERAL_PARSERS = { 851 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 852 } 853 854 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 855 856 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 857 858 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 859 860 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 861 TRANSACTION_CHARACTERISTICS = { 862 "ISOLATION LEVEL REPEATABLE READ", 863 "ISOLATION LEVEL READ COMMITTED", 864 "ISOLATION LEVEL READ UNCOMMITTED", 865 "ISOLATION LEVEL SERIALIZABLE", 866 "READ WRITE", 867 "READ ONLY", 868 } 869 870 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 871 872 CLONE_KEYWORDS = {"CLONE", "COPY"} 873 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 874 875 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 876 877 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 878 879 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 880 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 881 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 882 883 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 884 885 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 886 887 DISTINCT_TOKENS = {TokenType.DISTINCT} 888 889 NULL_TOKENS = {TokenType.NULL} 890 891 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 892 893 STRICT_CAST = True 894 895 # A NULL arg in CONCAT yields NULL by default 896 CONCAT_NULL_OUTPUTS_STRING = False 897 898 PREFIXED_PIVOT_COLUMNS = False 899 IDENTIFY_PIVOT_STRINGS = False 900 901 LOG_BASE_FIRST = True 902 LOG_DEFAULTS_TO_LN = False 903 904 # Whether or not ADD is present for each column added by ALTER TABLE 905 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 906 907 # Whether or not the table sample clause expects CSV syntax 908 TABLESAMPLE_CSV = False 909 910 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 911 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 912 913 # Whether the TRIM function expects the characters to trim as its first argument 914 TRIM_PATTERN_FIRST = False 915 916 __slots__ = ( 917 "error_level", 918 "error_message_context", 919 "max_errors", 920 "sql", 921 "errors", 922 "_tokens", 923 "_index", 924 "_curr", 925 "_next", 926 "_prev", 927 "_prev_comments", 928 "_tokenizer", 929 ) 930 931 # Autofilled 932 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 933 INDEX_OFFSET: int = 0 934 UNNEST_COLUMN_ONLY: bool = False 935 ALIAS_POST_TABLESAMPLE: bool = False 936 STRICT_STRING_CONCAT = False 937 SUPPORTS_USER_DEFINED_TYPES = True 938 NORMALIZE_FUNCTIONS = "upper" 939 NULL_ORDERING: str = "nulls_are_small" 940 SHOW_TRIE: t.Dict = {} 941 SET_TRIE: t.Dict = {} 942 FORMAT_MAPPING: t.Dict[str, str] = {} 943 FORMAT_TRIE: t.Dict = {} 944 TIME_MAPPING: t.Dict[str, str] = {} 945 TIME_TRIE: t.Dict = {} 946 947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset() 958 959 def reset(self): 960 self.sql = "" 961 self.errors = [] 962 self._tokens = [] 963 self._index = 0 964 self._curr = None 965 self._next = None 966 self._prev = None 967 self._prev_comments = None 968 969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 ) 986 987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1] 1022 1023 def _parse( 1024 self, 1025 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1026 raw_tokens: t.List[Token], 1027 sql: t.Optional[str] = None, 1028 ) -> t.List[t.Optional[exp.Expression]]: 1029 self.reset() 1030 self.sql = sql or "" 1031 1032 total = len(raw_tokens) 1033 chunks: t.List[t.List[Token]] = [[]] 1034 1035 for i, token in enumerate(raw_tokens): 1036 if token.token_type == TokenType.SEMICOLON: 1037 if i < total - 1: 1038 chunks.append([]) 1039 else: 1040 chunks[-1].append(token) 1041 1042 expressions = [] 1043 1044 for tokens in chunks: 1045 self._index = -1 1046 self._tokens = tokens 1047 self._advance() 1048 1049 expressions.append(parse_method(self)) 1050 1051 if self._index < len(self._tokens): 1052 self.raise_error("Invalid expression / Unexpected token") 1053 1054 self.check_errors() 1055 1056 return expressions 1057 1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 ) 1068 1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error) 1096 1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance) 1114 1115 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1116 if expression and self._prev_comments: 1117 expression.add_comments(self._prev_comments) 1118 self._prev_comments = None 1119 1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression 1136 1137 def _find_sql(self, start: Token, end: Token) -> str: 1138 return self.sql[start.start : end.end + 1] 1139 1140 def _advance(self, times: int = 1) -> None: 1141 self._index += times 1142 self._curr = seq_get(self._tokens, self._index) 1143 self._next = seq_get(self._tokens, self._index + 1) 1144 1145 if self._index > 0: 1146 self._prev = self._tokens[self._index - 1] 1147 self._prev_comments = self._prev.comments 1148 else: 1149 self._prev = None 1150 self._prev_comments = None 1151 1152 def _retreat(self, index: int) -> None: 1153 if index != self._index: 1154 self._advance(index - self._index) 1155 1156 def _parse_command(self) -> exp.Command: 1157 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1158 1159 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1160 start = self._prev 1161 exists = self._parse_exists() if allow_exists else None 1162 1163 self._match(TokenType.ON) 1164 1165 kind = self._match_set(self.CREATABLES) and self._prev 1166 if not kind: 1167 return self._parse_as_command(start) 1168 1169 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1170 this = self._parse_user_defined_function(kind=kind.token_type) 1171 elif kind.token_type == TokenType.TABLE: 1172 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1173 elif kind.token_type == TokenType.COLUMN: 1174 this = self._parse_column() 1175 else: 1176 this = self._parse_id_var() 1177 1178 self._match(TokenType.IS) 1179 1180 return self.expression( 1181 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1182 ) 1183 1184 def _parse_to_table( 1185 self, 1186 ) -> exp.ToTableProperty: 1187 table = self._parse_table_parts(schema=True) 1188 return self.expression(exp.ToTableProperty, this=table) 1189 1190 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1191 def _parse_ttl(self) -> exp.Expression: 1192 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1193 this = self._parse_bitwise() 1194 1195 if self._match_text_seq("DELETE"): 1196 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1197 if self._match_text_seq("RECOMPRESS"): 1198 return self.expression( 1199 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1200 ) 1201 if self._match_text_seq("TO", "DISK"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1204 ) 1205 if self._match_text_seq("TO", "VOLUME"): 1206 return self.expression( 1207 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1208 ) 1209 1210 return this 1211 1212 expressions = self._parse_csv(_parse_ttl_action) 1213 where = self._parse_where() 1214 group = self._parse_group() 1215 1216 aggregates = None 1217 if group and self._match(TokenType.SET): 1218 aggregates = self._parse_csv(self._parse_set_item) 1219 1220 return self.expression( 1221 exp.MergeTreeTTL, 1222 expressions=expressions, 1223 where=where, 1224 group=group, 1225 aggregates=aggregates, 1226 ) 1227 1228 def _parse_statement(self) -> t.Optional[exp.Expression]: 1229 if self._curr is None: 1230 return None 1231 1232 if self._match_set(self.STATEMENT_PARSERS): 1233 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1234 1235 if self._match_set(Tokenizer.COMMANDS): 1236 return self._parse_command() 1237 1238 expression = self._parse_expression() 1239 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1240 return self._parse_query_modifiers(expression) 1241 1242 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1243 start = self._prev 1244 temporary = self._match(TokenType.TEMPORARY) 1245 materialized = self._match_text_seq("MATERIALIZED") 1246 1247 kind = self._match_set(self.CREATABLES) and self._prev.text 1248 if not kind: 1249 return self._parse_as_command(start) 1250 1251 return self.expression( 1252 exp.Drop, 1253 comments=start.comments, 1254 exists=exists or self._parse_exists(), 1255 this=self._parse_table(schema=True), 1256 kind=kind, 1257 temporary=temporary, 1258 materialized=materialized, 1259 cascade=self._match_text_seq("CASCADE"), 1260 constraints=self._match_text_seq("CONSTRAINTS"), 1261 purge=self._match_text_seq("PURGE"), 1262 ) 1263 1264 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1265 return ( 1266 self._match_text_seq("IF") 1267 and (not not_ or self._match(TokenType.NOT)) 1268 and self._match(TokenType.EXISTS) 1269 ) 1270 1271 def _parse_create(self) -> exp.Create | exp.Command: 1272 # Note: this can't be None because we've matched a statement parser 1273 start = self._prev 1274 comments = self._prev_comments 1275 1276 replace = start.text.upper() == "REPLACE" or self._match_pair( 1277 TokenType.OR, TokenType.REPLACE 1278 ) 1279 unique = self._match(TokenType.UNIQUE) 1280 1281 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1282 self._advance() 1283 1284 properties = None 1285 create_token = self._match_set(self.CREATABLES) and self._prev 1286 1287 if not create_token: 1288 # exp.Properties.Location.POST_CREATE 1289 properties = self._parse_properties() 1290 create_token = self._match_set(self.CREATABLES) and self._prev 1291 1292 if not properties or not create_token: 1293 return self._parse_as_command(start) 1294 1295 exists = self._parse_exists(not_=True) 1296 this = None 1297 expression: t.Optional[exp.Expression] = None 1298 indexes = None 1299 no_schema_binding = None 1300 begin = None 1301 end = None 1302 clone = None 1303 1304 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1305 nonlocal properties 1306 if properties and temp_props: 1307 properties.expressions.extend(temp_props.expressions) 1308 elif temp_props: 1309 properties = temp_props 1310 1311 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1312 this = self._parse_user_defined_function(kind=create_token.token_type) 1313 1314 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1315 extend_props(self._parse_properties()) 1316 1317 self._match(TokenType.ALIAS) 1318 1319 if self._match(TokenType.COMMAND): 1320 expression = self._parse_as_command(self._prev) 1321 else: 1322 begin = self._match(TokenType.BEGIN) 1323 return_ = self._match_text_seq("RETURN") 1324 1325 if self._match(TokenType.STRING, advance=False): 1326 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1327 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1328 expression = self._parse_string() 1329 extend_props(self._parse_properties()) 1330 else: 1331 expression = self._parse_statement() 1332 1333 end = self._match_text_seq("END") 1334 1335 if return_: 1336 expression = self.expression(exp.Return, this=expression) 1337 elif create_token.token_type == TokenType.INDEX: 1338 this = self._parse_index(index=self._parse_id_var()) 1339 elif create_token.token_type in self.DB_CREATABLES: 1340 table_parts = self._parse_table_parts(schema=True) 1341 1342 # exp.Properties.Location.POST_NAME 1343 self._match(TokenType.COMMA) 1344 extend_props(self._parse_properties(before=True)) 1345 1346 this = self._parse_schema(this=table_parts) 1347 1348 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1349 extend_props(self._parse_properties()) 1350 1351 self._match(TokenType.ALIAS) 1352 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1353 # exp.Properties.Location.POST_ALIAS 1354 extend_props(self._parse_properties()) 1355 1356 expression = self._parse_ddl_select() 1357 1358 if create_token.token_type == TokenType.TABLE: 1359 # exp.Properties.Location.POST_EXPRESSION 1360 extend_props(self._parse_properties()) 1361 1362 indexes = [] 1363 while True: 1364 index = self._parse_index() 1365 1366 # exp.Properties.Location.POST_INDEX 1367 extend_props(self._parse_properties()) 1368 1369 if not index: 1370 break 1371 else: 1372 self._match(TokenType.COMMA) 1373 indexes.append(index) 1374 elif create_token.token_type == TokenType.VIEW: 1375 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1376 no_schema_binding = True 1377 1378 shallow = self._match_text_seq("SHALLOW") 1379 1380 if self._match_texts(self.CLONE_KEYWORDS): 1381 copy = self._prev.text.lower() == "copy" 1382 clone = self._parse_table(schema=True) 1383 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1384 clone_kind = ( 1385 self._match(TokenType.L_PAREN) 1386 and self._match_texts(self.CLONE_KINDS) 1387 and self._prev.text.upper() 1388 ) 1389 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1390 self._match(TokenType.R_PAREN) 1391 clone = self.expression( 1392 exp.Clone, 1393 this=clone, 1394 when=when, 1395 kind=clone_kind, 1396 shallow=shallow, 1397 expression=clone_expression, 1398 copy=copy, 1399 ) 1400 1401 return self.expression( 1402 exp.Create, 1403 comments=comments, 1404 this=this, 1405 kind=create_token.text, 1406 replace=replace, 1407 unique=unique, 1408 expression=expression, 1409 exists=exists, 1410 properties=properties, 1411 indexes=indexes, 1412 no_schema_binding=no_schema_binding, 1413 begin=begin, 1414 end=end, 1415 clone=clone, 1416 ) 1417 1418 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1419 # only used for teradata currently 1420 self._match(TokenType.COMMA) 1421 1422 kwargs = { 1423 "no": self._match_text_seq("NO"), 1424 "dual": self._match_text_seq("DUAL"), 1425 "before": self._match_text_seq("BEFORE"), 1426 "default": self._match_text_seq("DEFAULT"), 1427 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1428 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1429 "after": self._match_text_seq("AFTER"), 1430 "minimum": self._match_texts(("MIN", "MINIMUM")), 1431 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1432 } 1433 1434 if self._match_texts(self.PROPERTY_PARSERS): 1435 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1436 try: 1437 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1438 except TypeError: 1439 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1440 1441 return None 1442 1443 def _parse_property(self) -> t.Optional[exp.Expression]: 1444 if self._match_texts(self.PROPERTY_PARSERS): 1445 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1446 1447 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1448 return self._parse_character_set(default=True) 1449 1450 if self._match_text_seq("COMPOUND", "SORTKEY"): 1451 return self._parse_sortkey(compound=True) 1452 1453 if self._match_text_seq("SQL", "SECURITY"): 1454 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1455 1456 index = self._index 1457 key = self._parse_column() 1458 1459 if not self._match(TokenType.EQ): 1460 self._retreat(index) 1461 return None 1462 1463 return self.expression( 1464 exp.Property, 1465 this=key.to_dot() if isinstance(key, exp.Column) else key, 1466 value=self._parse_column() or self._parse_var(any_token=True), 1467 ) 1468 1469 def _parse_stored(self) -> exp.FileFormatProperty: 1470 self._match(TokenType.ALIAS) 1471 1472 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1473 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1474 1475 return self.expression( 1476 exp.FileFormatProperty, 1477 this=self.expression( 1478 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1479 ) 1480 if input_format or output_format 1481 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1482 ) 1483 1484 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1485 self._match(TokenType.EQ) 1486 self._match(TokenType.ALIAS) 1487 return self.expression(exp_class, this=self._parse_field()) 1488 1489 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1490 properties = [] 1491 while True: 1492 if before: 1493 prop = self._parse_property_before() 1494 else: 1495 prop = self._parse_property() 1496 1497 if not prop: 1498 break 1499 for p in ensure_list(prop): 1500 properties.append(p) 1501 1502 if properties: 1503 return self.expression(exp.Properties, expressions=properties) 1504 1505 return None 1506 1507 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1508 return self.expression( 1509 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1510 ) 1511 1512 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1513 if self._index >= 2: 1514 pre_volatile_token = self._tokens[self._index - 2] 1515 else: 1516 pre_volatile_token = None 1517 1518 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1519 return exp.VolatileProperty() 1520 1521 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1522 1523 def _parse_with_property( 1524 self, 1525 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1526 if self._match(TokenType.L_PAREN, advance=False): 1527 return self._parse_wrapped_csv(self._parse_property) 1528 1529 if self._match_text_seq("JOURNAL"): 1530 return self._parse_withjournaltable() 1531 1532 if self._match_text_seq("DATA"): 1533 return self._parse_withdata(no=False) 1534 elif self._match_text_seq("NO", "DATA"): 1535 return self._parse_withdata(no=True) 1536 1537 if not self._next: 1538 return None 1539 1540 return self._parse_withisolatedloading() 1541 1542 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1543 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1544 self._match(TokenType.EQ) 1545 1546 user = self._parse_id_var() 1547 self._match(TokenType.PARAMETER) 1548 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1549 1550 if not user or not host: 1551 return None 1552 1553 return exp.DefinerProperty(this=f"{user}@{host}") 1554 1555 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1556 self._match(TokenType.TABLE) 1557 self._match(TokenType.EQ) 1558 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1559 1560 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1561 return self.expression(exp.LogProperty, no=no) 1562 1563 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1564 return self.expression(exp.JournalProperty, **kwargs) 1565 1566 def _parse_checksum(self) -> exp.ChecksumProperty: 1567 self._match(TokenType.EQ) 1568 1569 on = None 1570 if self._match(TokenType.ON): 1571 on = True 1572 elif self._match_text_seq("OFF"): 1573 on = False 1574 1575 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1576 1577 def _parse_cluster(self) -> exp.Cluster: 1578 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1579 1580 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1581 self._match_text_seq("BY") 1582 1583 self._match_l_paren() 1584 expressions = self._parse_csv(self._parse_column) 1585 self._match_r_paren() 1586 1587 if self._match_text_seq("SORTED", "BY"): 1588 self._match_l_paren() 1589 sorted_by = self._parse_csv(self._parse_ordered) 1590 self._match_r_paren() 1591 else: 1592 sorted_by = None 1593 1594 self._match(TokenType.INTO) 1595 buckets = self._parse_number() 1596 self._match_text_seq("BUCKETS") 1597 1598 return self.expression( 1599 exp.ClusteredByProperty, 1600 expressions=expressions, 1601 sorted_by=sorted_by, 1602 buckets=buckets, 1603 ) 1604 1605 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1606 if not self._match_text_seq("GRANTS"): 1607 self._retreat(self._index - 1) 1608 return None 1609 1610 return self.expression(exp.CopyGrantsProperty) 1611 1612 def _parse_freespace(self) -> exp.FreespaceProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1616 ) 1617 1618 def _parse_mergeblockratio( 1619 self, no: bool = False, default: bool = False 1620 ) -> exp.MergeBlockRatioProperty: 1621 if self._match(TokenType.EQ): 1622 return self.expression( 1623 exp.MergeBlockRatioProperty, 1624 this=self._parse_number(), 1625 percent=self._match(TokenType.PERCENT), 1626 ) 1627 1628 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1629 1630 def _parse_datablocksize( 1631 self, 1632 default: t.Optional[bool] = None, 1633 minimum: t.Optional[bool] = None, 1634 maximum: t.Optional[bool] = None, 1635 ) -> exp.DataBlocksizeProperty: 1636 self._match(TokenType.EQ) 1637 size = self._parse_number() 1638 1639 units = None 1640 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1641 units = self._prev.text 1642 1643 return self.expression( 1644 exp.DataBlocksizeProperty, 1645 size=size, 1646 units=units, 1647 default=default, 1648 minimum=minimum, 1649 maximum=maximum, 1650 ) 1651 1652 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1653 self._match(TokenType.EQ) 1654 always = self._match_text_seq("ALWAYS") 1655 manual = self._match_text_seq("MANUAL") 1656 never = self._match_text_seq("NEVER") 1657 default = self._match_text_seq("DEFAULT") 1658 1659 autotemp = None 1660 if self._match_text_seq("AUTOTEMP"): 1661 autotemp = self._parse_schema() 1662 1663 return self.expression( 1664 exp.BlockCompressionProperty, 1665 always=always, 1666 manual=manual, 1667 never=never, 1668 default=default, 1669 autotemp=autotemp, 1670 ) 1671 1672 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1673 no = self._match_text_seq("NO") 1674 concurrent = self._match_text_seq("CONCURRENT") 1675 self._match_text_seq("ISOLATED", "LOADING") 1676 for_all = self._match_text_seq("FOR", "ALL") 1677 for_insert = self._match_text_seq("FOR", "INSERT") 1678 for_none = self._match_text_seq("FOR", "NONE") 1679 return self.expression( 1680 exp.IsolatedLoadingProperty, 1681 no=no, 1682 concurrent=concurrent, 1683 for_all=for_all, 1684 for_insert=for_insert, 1685 for_none=for_none, 1686 ) 1687 1688 def _parse_locking(self) -> exp.LockingProperty: 1689 if self._match(TokenType.TABLE): 1690 kind = "TABLE" 1691 elif self._match(TokenType.VIEW): 1692 kind = "VIEW" 1693 elif self._match(TokenType.ROW): 1694 kind = "ROW" 1695 elif self._match_text_seq("DATABASE"): 1696 kind = "DATABASE" 1697 else: 1698 kind = None 1699 1700 if kind in ("DATABASE", "TABLE", "VIEW"): 1701 this = self._parse_table_parts() 1702 else: 1703 this = None 1704 1705 if self._match(TokenType.FOR): 1706 for_or_in = "FOR" 1707 elif self._match(TokenType.IN): 1708 for_or_in = "IN" 1709 else: 1710 for_or_in = None 1711 1712 if self._match_text_seq("ACCESS"): 1713 lock_type = "ACCESS" 1714 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1715 lock_type = "EXCLUSIVE" 1716 elif self._match_text_seq("SHARE"): 1717 lock_type = "SHARE" 1718 elif self._match_text_seq("READ"): 1719 lock_type = "READ" 1720 elif self._match_text_seq("WRITE"): 1721 lock_type = "WRITE" 1722 elif self._match_text_seq("CHECKSUM"): 1723 lock_type = "CHECKSUM" 1724 else: 1725 lock_type = None 1726 1727 override = self._match_text_seq("OVERRIDE") 1728 1729 return self.expression( 1730 exp.LockingProperty, 1731 this=this, 1732 kind=kind, 1733 for_or_in=for_or_in, 1734 lock_type=lock_type, 1735 override=override, 1736 ) 1737 1738 def _parse_partition_by(self) -> t.List[exp.Expression]: 1739 if self._match(TokenType.PARTITION_BY): 1740 return self._parse_csv(self._parse_conjunction) 1741 return [] 1742 1743 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1744 self._match(TokenType.EQ) 1745 return self.expression( 1746 exp.PartitionedByProperty, 1747 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1748 ) 1749 1750 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1751 if self._match_text_seq("AND", "STATISTICS"): 1752 statistics = True 1753 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1754 statistics = False 1755 else: 1756 statistics = None 1757 1758 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1759 1760 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1761 if self._match_text_seq("PRIMARY", "INDEX"): 1762 return exp.NoPrimaryIndexProperty() 1763 return None 1764 1765 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1766 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1767 return exp.OnCommitProperty() 1768 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1769 return exp.OnCommitProperty(delete=True) 1770 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1771 1772 def _parse_distkey(self) -> exp.DistKeyProperty: 1773 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1774 1775 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1776 table = self._parse_table(schema=True) 1777 1778 options = [] 1779 while self._match_texts(("INCLUDING", "EXCLUDING")): 1780 this = self._prev.text.upper() 1781 1782 id_var = self._parse_id_var() 1783 if not id_var: 1784 return None 1785 1786 options.append( 1787 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1788 ) 1789 1790 return self.expression(exp.LikeProperty, this=table, expressions=options) 1791 1792 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1793 return self.expression( 1794 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1795 ) 1796 1797 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1798 self._match(TokenType.EQ) 1799 return self.expression( 1800 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1801 ) 1802 1803 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1804 self._match_text_seq("WITH", "CONNECTION") 1805 return self.expression( 1806 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1807 ) 1808 1809 def _parse_returns(self) -> exp.ReturnsProperty: 1810 value: t.Optional[exp.Expression] 1811 is_table = self._match(TokenType.TABLE) 1812 1813 if is_table: 1814 if self._match(TokenType.LT): 1815 value = self.expression( 1816 exp.Schema, 1817 this="TABLE", 1818 expressions=self._parse_csv(self._parse_struct_types), 1819 ) 1820 if not self._match(TokenType.GT): 1821 self.raise_error("Expecting >") 1822 else: 1823 value = self._parse_schema(exp.var("TABLE")) 1824 else: 1825 value = self._parse_types() 1826 1827 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1828 1829 def _parse_describe(self) -> exp.Describe: 1830 kind = self._match_set(self.CREATABLES) and self._prev.text 1831 this = self._parse_table(schema=True) 1832 properties = self._parse_properties() 1833 expressions = properties.expressions if properties else None 1834 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1835 1836 def _parse_insert(self) -> exp.Insert: 1837 comments = ensure_list(self._prev_comments) 1838 overwrite = self._match(TokenType.OVERWRITE) 1839 ignore = self._match(TokenType.IGNORE) 1840 local = self._match_text_seq("LOCAL") 1841 alternative = None 1842 1843 if self._match_text_seq("DIRECTORY"): 1844 this: t.Optional[exp.Expression] = self.expression( 1845 exp.Directory, 1846 this=self._parse_var_or_string(), 1847 local=local, 1848 row_format=self._parse_row_format(match_row=True), 1849 ) 1850 else: 1851 if self._match(TokenType.OR): 1852 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1853 1854 self._match(TokenType.INTO) 1855 comments += ensure_list(self._prev_comments) 1856 self._match(TokenType.TABLE) 1857 this = self._parse_table(schema=True) 1858 1859 returning = self._parse_returning() 1860 1861 return self.expression( 1862 exp.Insert, 1863 comments=comments, 1864 this=this, 1865 by_name=self._match_text_seq("BY", "NAME"), 1866 exists=self._parse_exists(), 1867 partition=self._parse_partition(), 1868 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1869 and self._parse_conjunction(), 1870 expression=self._parse_ddl_select(), 1871 conflict=self._parse_on_conflict(), 1872 returning=returning or self._parse_returning(), 1873 overwrite=overwrite, 1874 alternative=alternative, 1875 ignore=ignore, 1876 ) 1877 1878 def _parse_kill(self) -> exp.Kill: 1879 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1880 1881 return self.expression( 1882 exp.Kill, 1883 this=self._parse_primary(), 1884 kind=kind, 1885 ) 1886 1887 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1888 conflict = self._match_text_seq("ON", "CONFLICT") 1889 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1890 1891 if not conflict and not duplicate: 1892 return None 1893 1894 nothing = None 1895 expressions = None 1896 key = None 1897 constraint = None 1898 1899 if conflict: 1900 if self._match_text_seq("ON", "CONSTRAINT"): 1901 constraint = self._parse_id_var() 1902 else: 1903 key = self._parse_csv(self._parse_value) 1904 1905 self._match_text_seq("DO") 1906 if self._match_text_seq("NOTHING"): 1907 nothing = True 1908 else: 1909 self._match(TokenType.UPDATE) 1910 self._match(TokenType.SET) 1911 expressions = self._parse_csv(self._parse_equality) 1912 1913 return self.expression( 1914 exp.OnConflict, 1915 duplicate=duplicate, 1916 expressions=expressions, 1917 nothing=nothing, 1918 key=key, 1919 constraint=constraint, 1920 ) 1921 1922 def _parse_returning(self) -> t.Optional[exp.Returning]: 1923 if not self._match(TokenType.RETURNING): 1924 return None 1925 return self.expression( 1926 exp.Returning, 1927 expressions=self._parse_csv(self._parse_expression), 1928 into=self._match(TokenType.INTO) and self._parse_table_part(), 1929 ) 1930 1931 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1932 if not self._match(TokenType.FORMAT): 1933 return None 1934 return self._parse_row_format() 1935 1936 def _parse_row_format( 1937 self, match_row: bool = False 1938 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1939 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1940 return None 1941 1942 if self._match_text_seq("SERDE"): 1943 this = self._parse_string() 1944 1945 serde_properties = None 1946 if self._match(TokenType.SERDE_PROPERTIES): 1947 serde_properties = self.expression( 1948 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1949 ) 1950 1951 return self.expression( 1952 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1953 ) 1954 1955 self._match_text_seq("DELIMITED") 1956 1957 kwargs = {} 1958 1959 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1960 kwargs["fields"] = self._parse_string() 1961 if self._match_text_seq("ESCAPED", "BY"): 1962 kwargs["escaped"] = self._parse_string() 1963 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1964 kwargs["collection_items"] = self._parse_string() 1965 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1966 kwargs["map_keys"] = self._parse_string() 1967 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1968 kwargs["lines"] = self._parse_string() 1969 if self._match_text_seq("NULL", "DEFINED", "AS"): 1970 kwargs["null"] = self._parse_string() 1971 1972 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1973 1974 def _parse_load(self) -> exp.LoadData | exp.Command: 1975 if self._match_text_seq("DATA"): 1976 local = self._match_text_seq("LOCAL") 1977 self._match_text_seq("INPATH") 1978 inpath = self._parse_string() 1979 overwrite = self._match(TokenType.OVERWRITE) 1980 self._match_pair(TokenType.INTO, TokenType.TABLE) 1981 1982 return self.expression( 1983 exp.LoadData, 1984 this=self._parse_table(schema=True), 1985 local=local, 1986 overwrite=overwrite, 1987 inpath=inpath, 1988 partition=self._parse_partition(), 1989 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1990 serde=self._match_text_seq("SERDE") and self._parse_string(), 1991 ) 1992 return self._parse_as_command(self._prev) 1993 1994 def _parse_delete(self) -> exp.Delete: 1995 # This handles MySQL's "Multiple-Table Syntax" 1996 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1997 tables = None 1998 comments = self._prev_comments 1999 if not self._match(TokenType.FROM, advance=False): 2000 tables = self._parse_csv(self._parse_table) or None 2001 2002 returning = self._parse_returning() 2003 2004 return self.expression( 2005 exp.Delete, 2006 comments=comments, 2007 tables=tables, 2008 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2009 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2010 where=self._parse_where(), 2011 returning=returning or self._parse_returning(), 2012 limit=self._parse_limit(), 2013 ) 2014 2015 def _parse_update(self) -> exp.Update: 2016 comments = self._prev_comments 2017 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2018 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2019 returning = self._parse_returning() 2020 return self.expression( 2021 exp.Update, 2022 comments=comments, 2023 **{ # type: ignore 2024 "this": this, 2025 "expressions": expressions, 2026 "from": self._parse_from(joins=True), 2027 "where": self._parse_where(), 2028 "returning": returning or self._parse_returning(), 2029 "order": self._parse_order(), 2030 "limit": self._parse_limit(), 2031 }, 2032 ) 2033 2034 def _parse_uncache(self) -> exp.Uncache: 2035 if not self._match(TokenType.TABLE): 2036 self.raise_error("Expecting TABLE after UNCACHE") 2037 2038 return self.expression( 2039 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2040 ) 2041 2042 def _parse_cache(self) -> exp.Cache: 2043 lazy = self._match_text_seq("LAZY") 2044 self._match(TokenType.TABLE) 2045 table = self._parse_table(schema=True) 2046 2047 options = [] 2048 if self._match_text_seq("OPTIONS"): 2049 self._match_l_paren() 2050 k = self._parse_string() 2051 self._match(TokenType.EQ) 2052 v = self._parse_string() 2053 options = [k, v] 2054 self._match_r_paren() 2055 2056 self._match(TokenType.ALIAS) 2057 return self.expression( 2058 exp.Cache, 2059 this=table, 2060 lazy=lazy, 2061 options=options, 2062 expression=self._parse_select(nested=True), 2063 ) 2064 2065 def _parse_partition(self) -> t.Optional[exp.Partition]: 2066 if not self._match(TokenType.PARTITION): 2067 return None 2068 2069 return self.expression( 2070 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2071 ) 2072 2073 def _parse_value(self) -> exp.Tuple: 2074 if self._match(TokenType.L_PAREN): 2075 expressions = self._parse_csv(self._parse_conjunction) 2076 self._match_r_paren() 2077 return self.expression(exp.Tuple, expressions=expressions) 2078 2079 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2080 # https://prestodb.io/docs/current/sql/values.html 2081 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2082 2083 def _parse_projections(self) -> t.List[exp.Expression]: 2084 return self._parse_expressions() 2085 2086 def _parse_select( 2087 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2088 ) -> t.Optional[exp.Expression]: 2089 cte = self._parse_with() 2090 2091 if cte: 2092 this = self._parse_statement() 2093 2094 if not this: 2095 self.raise_error("Failed to parse any statement following CTE") 2096 return cte 2097 2098 if "with" in this.arg_types: 2099 this.set("with", cte) 2100 else: 2101 self.raise_error(f"{this.key} does not support CTE") 2102 this = cte 2103 2104 return this 2105 2106 # duckdb supports leading with FROM x 2107 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2108 2109 if self._match(TokenType.SELECT): 2110 comments = self._prev_comments 2111 2112 hint = self._parse_hint() 2113 all_ = self._match(TokenType.ALL) 2114 distinct = self._match_set(self.DISTINCT_TOKENS) 2115 2116 kind = ( 2117 self._match(TokenType.ALIAS) 2118 and self._match_texts(("STRUCT", "VALUE")) 2119 and self._prev.text 2120 ) 2121 2122 if distinct: 2123 distinct = self.expression( 2124 exp.Distinct, 2125 on=self._parse_value() if self._match(TokenType.ON) else None, 2126 ) 2127 2128 if all_ and distinct: 2129 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2130 2131 limit = self._parse_limit(top=True) 2132 projections = self._parse_projections() 2133 2134 this = self.expression( 2135 exp.Select, 2136 kind=kind, 2137 hint=hint, 2138 distinct=distinct, 2139 expressions=projections, 2140 limit=limit, 2141 ) 2142 this.comments = comments 2143 2144 into = self._parse_into() 2145 if into: 2146 this.set("into", into) 2147 2148 if not from_: 2149 from_ = self._parse_from() 2150 2151 if from_: 2152 this.set("from", from_) 2153 2154 this = self._parse_query_modifiers(this) 2155 elif (table or nested) and self._match(TokenType.L_PAREN): 2156 if self._match(TokenType.PIVOT): 2157 this = self._parse_simplified_pivot() 2158 elif self._match(TokenType.FROM): 2159 this = exp.select("*").from_( 2160 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2161 ) 2162 else: 2163 this = self._parse_table() if table else self._parse_select(nested=True) 2164 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2165 2166 self._match_r_paren() 2167 2168 # We return early here so that the UNION isn't attached to the subquery by the 2169 # following call to _parse_set_operations, but instead becomes the parent node 2170 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2171 elif self._match(TokenType.VALUES): 2172 this = self.expression( 2173 exp.Values, 2174 expressions=self._parse_csv(self._parse_value), 2175 alias=self._parse_table_alias(), 2176 ) 2177 elif from_: 2178 this = exp.select("*").from_(from_.this, copy=False) 2179 else: 2180 this = None 2181 2182 return self._parse_set_operations(this) 2183 2184 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2185 if not skip_with_token and not self._match(TokenType.WITH): 2186 return None 2187 2188 comments = self._prev_comments 2189 recursive = self._match(TokenType.RECURSIVE) 2190 2191 expressions = [] 2192 while True: 2193 expressions.append(self._parse_cte()) 2194 2195 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2196 break 2197 else: 2198 self._match(TokenType.WITH) 2199 2200 return self.expression( 2201 exp.With, comments=comments, expressions=expressions, recursive=recursive 2202 ) 2203 2204 def _parse_cte(self) -> exp.CTE: 2205 alias = self._parse_table_alias() 2206 if not alias or not alias.this: 2207 self.raise_error("Expected CTE to have alias") 2208 2209 self._match(TokenType.ALIAS) 2210 return self.expression( 2211 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2212 ) 2213 2214 def _parse_table_alias( 2215 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2216 ) -> t.Optional[exp.TableAlias]: 2217 any_token = self._match(TokenType.ALIAS) 2218 alias = ( 2219 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2220 or self._parse_string_as_identifier() 2221 ) 2222 2223 index = self._index 2224 if self._match(TokenType.L_PAREN): 2225 columns = self._parse_csv(self._parse_function_parameter) 2226 self._match_r_paren() if columns else self._retreat(index) 2227 else: 2228 columns = None 2229 2230 if not alias and not columns: 2231 return None 2232 2233 return self.expression(exp.TableAlias, this=alias, columns=columns) 2234 2235 def _parse_subquery( 2236 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2237 ) -> t.Optional[exp.Subquery]: 2238 if not this: 2239 return None 2240 2241 return self.expression( 2242 exp.Subquery, 2243 this=this, 2244 pivots=self._parse_pivots(), 2245 alias=self._parse_table_alias() if parse_alias else None, 2246 ) 2247 2248 def _parse_query_modifiers( 2249 self, this: t.Optional[exp.Expression] 2250 ) -> t.Optional[exp.Expression]: 2251 if isinstance(this, self.MODIFIABLES): 2252 for join in iter(self._parse_join, None): 2253 this.append("joins", join) 2254 for lateral in iter(self._parse_lateral, None): 2255 this.append("laterals", lateral) 2256 2257 while True: 2258 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2259 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2260 key, expression = parser(self) 2261 2262 if expression: 2263 this.set(key, expression) 2264 if key == "limit": 2265 offset = expression.args.pop("offset", None) 2266 if offset: 2267 this.set("offset", exp.Offset(expression=offset)) 2268 continue 2269 break 2270 return this 2271 2272 def _parse_hint(self) -> t.Optional[exp.Hint]: 2273 if self._match(TokenType.HINT): 2274 hints = [] 2275 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2276 hints.extend(hint) 2277 2278 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2279 self.raise_error("Expected */ after HINT") 2280 2281 return self.expression(exp.Hint, expressions=hints) 2282 2283 return None 2284 2285 def _parse_into(self) -> t.Optional[exp.Into]: 2286 if not self._match(TokenType.INTO): 2287 return None 2288 2289 temp = self._match(TokenType.TEMPORARY) 2290 unlogged = self._match_text_seq("UNLOGGED") 2291 self._match(TokenType.TABLE) 2292 2293 return self.expression( 2294 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2295 ) 2296 2297 def _parse_from( 2298 self, joins: bool = False, skip_from_token: bool = False 2299 ) -> t.Optional[exp.From]: 2300 if not skip_from_token and not self._match(TokenType.FROM): 2301 return None 2302 2303 return self.expression( 2304 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2305 ) 2306 2307 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2308 if not self._match(TokenType.MATCH_RECOGNIZE): 2309 return None 2310 2311 self._match_l_paren() 2312 2313 partition = self._parse_partition_by() 2314 order = self._parse_order() 2315 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2316 2317 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2318 rows = exp.var("ONE ROW PER MATCH") 2319 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2320 text = "ALL ROWS PER MATCH" 2321 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2322 text += f" SHOW EMPTY MATCHES" 2323 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2324 text += f" OMIT EMPTY MATCHES" 2325 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2326 text += f" WITH UNMATCHED ROWS" 2327 rows = exp.var(text) 2328 else: 2329 rows = None 2330 2331 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2332 text = "AFTER MATCH SKIP" 2333 if self._match_text_seq("PAST", "LAST", "ROW"): 2334 text += f" PAST LAST ROW" 2335 elif self._match_text_seq("TO", "NEXT", "ROW"): 2336 text += f" TO NEXT ROW" 2337 elif self._match_text_seq("TO", "FIRST"): 2338 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2339 elif self._match_text_seq("TO", "LAST"): 2340 text += f" TO LAST {self._advance_any().text}" # type: ignore 2341 after = exp.var(text) 2342 else: 2343 after = None 2344 2345 if self._match_text_seq("PATTERN"): 2346 self._match_l_paren() 2347 2348 if not self._curr: 2349 self.raise_error("Expecting )", self._curr) 2350 2351 paren = 1 2352 start = self._curr 2353 2354 while self._curr and paren > 0: 2355 if self._curr.token_type == TokenType.L_PAREN: 2356 paren += 1 2357 if self._curr.token_type == TokenType.R_PAREN: 2358 paren -= 1 2359 2360 end = self._prev 2361 self._advance() 2362 2363 if paren > 0: 2364 self.raise_error("Expecting )", self._curr) 2365 2366 pattern = exp.var(self._find_sql(start, end)) 2367 else: 2368 pattern = None 2369 2370 define = ( 2371 self._parse_csv( 2372 lambda: self.expression( 2373 exp.Alias, 2374 alias=self._parse_id_var(any_token=True), 2375 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2376 ) 2377 ) 2378 if self._match_text_seq("DEFINE") 2379 else None 2380 ) 2381 2382 self._match_r_paren() 2383 2384 return self.expression( 2385 exp.MatchRecognize, 2386 partition_by=partition, 2387 order=order, 2388 measures=measures, 2389 rows=rows, 2390 after=after, 2391 pattern=pattern, 2392 define=define, 2393 alias=self._parse_table_alias(), 2394 ) 2395 2396 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2397 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2398 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2399 2400 if outer_apply or cross_apply: 2401 this = self._parse_select(table=True) 2402 view = None 2403 outer = not cross_apply 2404 elif self._match(TokenType.LATERAL): 2405 this = self._parse_select(table=True) 2406 view = self._match(TokenType.VIEW) 2407 outer = self._match(TokenType.OUTER) 2408 else: 2409 return None 2410 2411 if not this: 2412 this = ( 2413 self._parse_unnest() 2414 or self._parse_function() 2415 or self._parse_id_var(any_token=False) 2416 ) 2417 2418 while self._match(TokenType.DOT): 2419 this = exp.Dot( 2420 this=this, 2421 expression=self._parse_function() or self._parse_id_var(any_token=False), 2422 ) 2423 2424 if view: 2425 table = self._parse_id_var(any_token=False) 2426 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2427 table_alias: t.Optional[exp.TableAlias] = self.expression( 2428 exp.TableAlias, this=table, columns=columns 2429 ) 2430 elif isinstance(this, exp.Subquery) and this.alias: 2431 # Ensures parity between the Subquery's and the Lateral's "alias" args 2432 table_alias = this.args["alias"].copy() 2433 else: 2434 table_alias = self._parse_table_alias() 2435 2436 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2437 2438 def _parse_join_parts( 2439 self, 2440 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2441 return ( 2442 self._match_set(self.JOIN_METHODS) and self._prev, 2443 self._match_set(self.JOIN_SIDES) and self._prev, 2444 self._match_set(self.JOIN_KINDS) and self._prev, 2445 ) 2446 2447 def _parse_join( 2448 self, skip_join_token: bool = False, parse_bracket: bool = False 2449 ) -> t.Optional[exp.Join]: 2450 if self._match(TokenType.COMMA): 2451 return self.expression(exp.Join, this=self._parse_table()) 2452 2453 index = self._index 2454 method, side, kind = self._parse_join_parts() 2455 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2456 join = self._match(TokenType.JOIN) 2457 2458 if not skip_join_token and not join: 2459 self._retreat(index) 2460 kind = None 2461 method = None 2462 side = None 2463 2464 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2465 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2466 2467 if not skip_join_token and not join and not outer_apply and not cross_apply: 2468 return None 2469 2470 if outer_apply: 2471 side = Token(TokenType.LEFT, "LEFT") 2472 2473 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2474 2475 if method: 2476 kwargs["method"] = method.text 2477 if side: 2478 kwargs["side"] = side.text 2479 if kind: 2480 kwargs["kind"] = kind.text 2481 if hint: 2482 kwargs["hint"] = hint 2483 2484 if self._match(TokenType.ON): 2485 kwargs["on"] = self._parse_conjunction() 2486 elif self._match(TokenType.USING): 2487 kwargs["using"] = self._parse_wrapped_id_vars() 2488 elif not (kind and kind.token_type == TokenType.CROSS): 2489 index = self._index 2490 join = self._parse_join() 2491 2492 if join and self._match(TokenType.ON): 2493 kwargs["on"] = self._parse_conjunction() 2494 elif join and self._match(TokenType.USING): 2495 kwargs["using"] = self._parse_wrapped_id_vars() 2496 else: 2497 join = None 2498 self._retreat(index) 2499 2500 kwargs["this"].set("joins", [join] if join else None) 2501 2502 comments = [c for token in (method, side, kind) if token for c in token.comments] 2503 return self.expression(exp.Join, comments=comments, **kwargs) 2504 2505 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2506 this = self._parse_conjunction() 2507 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2508 return this 2509 2510 opclass = self._parse_var(any_token=True) 2511 if opclass: 2512 return self.expression(exp.Opclass, this=this, expression=opclass) 2513 2514 return this 2515 2516 def _parse_index( 2517 self, 2518 index: t.Optional[exp.Expression] = None, 2519 ) -> t.Optional[exp.Index]: 2520 if index: 2521 unique = None 2522 primary = None 2523 amp = None 2524 2525 self._match(TokenType.ON) 2526 self._match(TokenType.TABLE) # hive 2527 table = self._parse_table_parts(schema=True) 2528 else: 2529 unique = self._match(TokenType.UNIQUE) 2530 primary = self._match_text_seq("PRIMARY") 2531 amp = self._match_text_seq("AMP") 2532 2533 if not self._match(TokenType.INDEX): 2534 return None 2535 2536 index = self._parse_id_var() 2537 table = None 2538 2539 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2540 2541 if self._match(TokenType.L_PAREN, advance=False): 2542 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2543 else: 2544 columns = None 2545 2546 return self.expression( 2547 exp.Index, 2548 this=index, 2549 table=table, 2550 using=using, 2551 columns=columns, 2552 unique=unique, 2553 primary=primary, 2554 amp=amp, 2555 partition_by=self._parse_partition_by(), 2556 where=self._parse_where(), 2557 ) 2558 2559 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2560 hints: t.List[exp.Expression] = [] 2561 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2562 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2563 hints.append( 2564 self.expression( 2565 exp.WithTableHint, 2566 expressions=self._parse_csv( 2567 lambda: self._parse_function() or self._parse_var(any_token=True) 2568 ), 2569 ) 2570 ) 2571 self._match_r_paren() 2572 else: 2573 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2574 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2575 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2576 2577 self._match_texts({"INDEX", "KEY"}) 2578 if self._match(TokenType.FOR): 2579 hint.set("target", self._advance_any() and self._prev.text.upper()) 2580 2581 hint.set("expressions", self._parse_wrapped_id_vars()) 2582 hints.append(hint) 2583 2584 return hints or None 2585 2586 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2587 return ( 2588 (not schema and self._parse_function(optional_parens=False)) 2589 or self._parse_id_var(any_token=False) 2590 or self._parse_string_as_identifier() 2591 or self._parse_placeholder() 2592 ) 2593 2594 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2595 catalog = None 2596 db = None 2597 table = self._parse_table_part(schema=schema) 2598 2599 while self._match(TokenType.DOT): 2600 if catalog: 2601 # This allows nesting the table in arbitrarily many dot expressions if needed 2602 table = self.expression( 2603 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2604 ) 2605 else: 2606 catalog = db 2607 db = table 2608 table = self._parse_table_part(schema=schema) 2609 2610 if not table: 2611 self.raise_error(f"Expected table name but got {self._curr}") 2612 2613 return self.expression( 2614 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2615 ) 2616 2617 def _parse_table( 2618 self, 2619 schema: bool = False, 2620 joins: bool = False, 2621 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2622 parse_bracket: bool = False, 2623 ) -> t.Optional[exp.Expression]: 2624 lateral = self._parse_lateral() 2625 if lateral: 2626 return lateral 2627 2628 unnest = self._parse_unnest() 2629 if unnest: 2630 return unnest 2631 2632 values = self._parse_derived_table_values() 2633 if values: 2634 return values 2635 2636 subquery = self._parse_select(table=True) 2637 if subquery: 2638 if not subquery.args.get("pivots"): 2639 subquery.set("pivots", self._parse_pivots()) 2640 return subquery 2641 2642 bracket = parse_bracket and self._parse_bracket(None) 2643 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2644 this = t.cast( 2645 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2646 ) 2647 2648 if schema: 2649 return self._parse_schema(this=this) 2650 2651 version = self._parse_version() 2652 2653 if version: 2654 this.set("version", version) 2655 2656 if self.ALIAS_POST_TABLESAMPLE: 2657 table_sample = self._parse_table_sample() 2658 2659 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2660 if alias: 2661 this.set("alias", alias) 2662 2663 if self._match_text_seq("AT"): 2664 this.set("index", self._parse_id_var()) 2665 2666 this.set("hints", self._parse_table_hints()) 2667 2668 if not this.args.get("pivots"): 2669 this.set("pivots", self._parse_pivots()) 2670 2671 if not self.ALIAS_POST_TABLESAMPLE: 2672 table_sample = self._parse_table_sample() 2673 2674 if table_sample: 2675 table_sample.set("this", this) 2676 this = table_sample 2677 2678 if joins: 2679 for join in iter(self._parse_join, None): 2680 this.append("joins", join) 2681 2682 return this 2683 2684 def _parse_version(self) -> t.Optional[exp.Version]: 2685 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2686 this = "TIMESTAMP" 2687 elif self._match(TokenType.VERSION_SNAPSHOT): 2688 this = "VERSION" 2689 else: 2690 return None 2691 2692 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2693 kind = self._prev.text.upper() 2694 start = self._parse_bitwise() 2695 self._match_texts(("TO", "AND")) 2696 end = self._parse_bitwise() 2697 expression: t.Optional[exp.Expression] = self.expression( 2698 exp.Tuple, expressions=[start, end] 2699 ) 2700 elif self._match_text_seq("CONTAINED", "IN"): 2701 kind = "CONTAINED IN" 2702 expression = self.expression( 2703 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2704 ) 2705 elif self._match(TokenType.ALL): 2706 kind = "ALL" 2707 expression = None 2708 else: 2709 self._match_text_seq("AS", "OF") 2710 kind = "AS OF" 2711 expression = self._parse_type() 2712 2713 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2714 2715 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2716 if not self._match(TokenType.UNNEST): 2717 return None 2718 2719 expressions = self._parse_wrapped_csv(self._parse_type) 2720 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2721 2722 alias = self._parse_table_alias() if with_alias else None 2723 2724 if alias: 2725 if self.UNNEST_COLUMN_ONLY: 2726 if alias.args.get("columns"): 2727 self.raise_error("Unexpected extra column alias in unnest.") 2728 2729 alias.set("columns", [alias.this]) 2730 alias.set("this", None) 2731 2732 columns = alias.args.get("columns") or [] 2733 if offset and len(expressions) < len(columns): 2734 offset = columns.pop() 2735 2736 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2737 self._match(TokenType.ALIAS) 2738 offset = self._parse_id_var( 2739 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2740 ) or exp.to_identifier("offset") 2741 2742 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2743 2744 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2745 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2746 if not is_derived and not self._match(TokenType.VALUES): 2747 return None 2748 2749 expressions = self._parse_csv(self._parse_value) 2750 alias = self._parse_table_alias() 2751 2752 if is_derived: 2753 self._match_r_paren() 2754 2755 return self.expression( 2756 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2757 ) 2758 2759 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2760 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2761 as_modifier and self._match_text_seq("USING", "SAMPLE") 2762 ): 2763 return None 2764 2765 bucket_numerator = None 2766 bucket_denominator = None 2767 bucket_field = None 2768 percent = None 2769 rows = None 2770 size = None 2771 seed = None 2772 2773 kind = ( 2774 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2775 ) 2776 method = self._parse_var(tokens=(TokenType.ROW,)) 2777 2778 matched_l_paren = self._match(TokenType.L_PAREN) 2779 2780 if self.TABLESAMPLE_CSV: 2781 num = None 2782 expressions = self._parse_csv(self._parse_primary) 2783 else: 2784 expressions = None 2785 num = ( 2786 self._parse_factor() 2787 if self._match(TokenType.NUMBER, advance=False) 2788 else self._parse_primary() 2789 ) 2790 2791 if self._match_text_seq("BUCKET"): 2792 bucket_numerator = self._parse_number() 2793 self._match_text_seq("OUT", "OF") 2794 bucket_denominator = bucket_denominator = self._parse_number() 2795 self._match(TokenType.ON) 2796 bucket_field = self._parse_field() 2797 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2798 percent = num 2799 elif self._match(TokenType.ROWS): 2800 rows = num 2801 elif num: 2802 size = num 2803 2804 if matched_l_paren: 2805 self._match_r_paren() 2806 2807 if self._match(TokenType.L_PAREN): 2808 method = self._parse_var() 2809 seed = self._match(TokenType.COMMA) and self._parse_number() 2810 self._match_r_paren() 2811 elif self._match_texts(("SEED", "REPEATABLE")): 2812 seed = self._parse_wrapped(self._parse_number) 2813 2814 return self.expression( 2815 exp.TableSample, 2816 expressions=expressions, 2817 method=method, 2818 bucket_numerator=bucket_numerator, 2819 bucket_denominator=bucket_denominator, 2820 bucket_field=bucket_field, 2821 percent=percent, 2822 rows=rows, 2823 size=size, 2824 seed=seed, 2825 kind=kind, 2826 ) 2827 2828 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2829 return list(iter(self._parse_pivot, None)) or None 2830 2831 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2832 return list(iter(self._parse_join, None)) or None 2833 2834 # https://duckdb.org/docs/sql/statements/pivot 2835 def _parse_simplified_pivot(self) -> exp.Pivot: 2836 def _parse_on() -> t.Optional[exp.Expression]: 2837 this = self._parse_bitwise() 2838 return self._parse_in(this) if self._match(TokenType.IN) else this 2839 2840 this = self._parse_table() 2841 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2842 using = self._match(TokenType.USING) and self._parse_csv( 2843 lambda: self._parse_alias(self._parse_function()) 2844 ) 2845 group = self._parse_group() 2846 return self.expression( 2847 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2848 ) 2849 2850 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2851 index = self._index 2852 include_nulls = None 2853 2854 if self._match(TokenType.PIVOT): 2855 unpivot = False 2856 elif self._match(TokenType.UNPIVOT): 2857 unpivot = True 2858 2859 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2860 if self._match_text_seq("INCLUDE", "NULLS"): 2861 include_nulls = True 2862 elif self._match_text_seq("EXCLUDE", "NULLS"): 2863 include_nulls = False 2864 else: 2865 return None 2866 2867 expressions = [] 2868 field = None 2869 2870 if not self._match(TokenType.L_PAREN): 2871 self._retreat(index) 2872 return None 2873 2874 if unpivot: 2875 expressions = self._parse_csv(self._parse_column) 2876 else: 2877 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2878 2879 if not expressions: 2880 self.raise_error("Failed to parse PIVOT's aggregation list") 2881 2882 if not self._match(TokenType.FOR): 2883 self.raise_error("Expecting FOR") 2884 2885 value = self._parse_column() 2886 2887 if not self._match(TokenType.IN): 2888 self.raise_error("Expecting IN") 2889 2890 field = self._parse_in(value, alias=True) 2891 2892 self._match_r_paren() 2893 2894 pivot = self.expression( 2895 exp.Pivot, 2896 expressions=expressions, 2897 field=field, 2898 unpivot=unpivot, 2899 include_nulls=include_nulls, 2900 ) 2901 2902 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2903 pivot.set("alias", self._parse_table_alias()) 2904 2905 if not unpivot: 2906 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2907 2908 columns: t.List[exp.Expression] = [] 2909 for fld in pivot.args["field"].expressions: 2910 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2911 for name in names: 2912 if self.PREFIXED_PIVOT_COLUMNS: 2913 name = f"{name}_{field_name}" if name else field_name 2914 else: 2915 name = f"{field_name}_{name}" if name else field_name 2916 2917 columns.append(exp.to_identifier(name)) 2918 2919 pivot.set("columns", columns) 2920 2921 return pivot 2922 2923 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2924 return [agg.alias for agg in aggregations] 2925 2926 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2927 if not skip_where_token and not self._match(TokenType.WHERE): 2928 return None 2929 2930 return self.expression( 2931 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2932 ) 2933 2934 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2935 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2936 return None 2937 2938 elements = defaultdict(list) 2939 2940 if self._match(TokenType.ALL): 2941 return self.expression(exp.Group, all=True) 2942 2943 while True: 2944 expressions = self._parse_csv(self._parse_conjunction) 2945 if expressions: 2946 elements["expressions"].extend(expressions) 2947 2948 grouping_sets = self._parse_grouping_sets() 2949 if grouping_sets: 2950 elements["grouping_sets"].extend(grouping_sets) 2951 2952 rollup = None 2953 cube = None 2954 totals = None 2955 2956 index = self._index 2957 with_ = self._match(TokenType.WITH) 2958 if self._match(TokenType.ROLLUP): 2959 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2960 elements["rollup"].extend(ensure_list(rollup)) 2961 2962 if self._match(TokenType.CUBE): 2963 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2964 elements["cube"].extend(ensure_list(cube)) 2965 2966 if self._match_text_seq("TOTALS"): 2967 totals = True 2968 elements["totals"] = True # type: ignore 2969 2970 if not (grouping_sets or rollup or cube or totals): 2971 if with_: 2972 self._retreat(index) 2973 break 2974 2975 return self.expression(exp.Group, **elements) # type: ignore 2976 2977 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2978 if not self._match(TokenType.GROUPING_SETS): 2979 return None 2980 2981 return self._parse_wrapped_csv(self._parse_grouping_set) 2982 2983 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2984 if self._match(TokenType.L_PAREN): 2985 grouping_set = self._parse_csv(self._parse_column) 2986 self._match_r_paren() 2987 return self.expression(exp.Tuple, expressions=grouping_set) 2988 2989 return self._parse_column() 2990 2991 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2992 if not skip_having_token and not self._match(TokenType.HAVING): 2993 return None 2994 return self.expression(exp.Having, this=self._parse_conjunction()) 2995 2996 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2997 if not self._match(TokenType.QUALIFY): 2998 return None 2999 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3000 3001 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3002 if skip_start_token: 3003 start = None 3004 elif self._match(TokenType.START_WITH): 3005 start = self._parse_conjunction() 3006 else: 3007 return None 3008 3009 self._match(TokenType.CONNECT_BY) 3010 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3011 exp.Prior, this=self._parse_bitwise() 3012 ) 3013 connect = self._parse_conjunction() 3014 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3015 3016 if not start and self._match(TokenType.START_WITH): 3017 start = self._parse_conjunction() 3018 3019 return self.expression(exp.Connect, start=start, connect=connect) 3020 3021 def _parse_order( 3022 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3023 ) -> t.Optional[exp.Expression]: 3024 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3025 return this 3026 3027 return self.expression( 3028 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3029 ) 3030 3031 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3032 if not self._match(token): 3033 return None 3034 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3035 3036 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3037 this = parse_method() if parse_method else self._parse_conjunction() 3038 3039 asc = self._match(TokenType.ASC) 3040 desc = self._match(TokenType.DESC) or (asc and False) 3041 3042 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3043 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3044 3045 nulls_first = is_nulls_first or False 3046 explicitly_null_ordered = is_nulls_first or is_nulls_last 3047 3048 if ( 3049 not explicitly_null_ordered 3050 and ( 3051 (not desc and self.NULL_ORDERING == "nulls_are_small") 3052 or (desc and self.NULL_ORDERING != "nulls_are_small") 3053 ) 3054 and self.NULL_ORDERING != "nulls_are_last" 3055 ): 3056 nulls_first = True 3057 3058 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3059 3060 def _parse_limit( 3061 self, this: t.Optional[exp.Expression] = None, top: bool = False 3062 ) -> t.Optional[exp.Expression]: 3063 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3064 comments = self._prev_comments 3065 if top: 3066 limit_paren = self._match(TokenType.L_PAREN) 3067 expression = self._parse_number() 3068 3069 if limit_paren: 3070 self._match_r_paren() 3071 else: 3072 expression = self._parse_term() 3073 3074 if self._match(TokenType.COMMA): 3075 offset = expression 3076 expression = self._parse_term() 3077 else: 3078 offset = None 3079 3080 limit_exp = self.expression( 3081 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3082 ) 3083 3084 return limit_exp 3085 3086 if self._match(TokenType.FETCH): 3087 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3088 direction = self._prev.text if direction else "FIRST" 3089 3090 count = self._parse_field(tokens=self.FETCH_TOKENS) 3091 percent = self._match(TokenType.PERCENT) 3092 3093 self._match_set((TokenType.ROW, TokenType.ROWS)) 3094 3095 only = self._match_text_seq("ONLY") 3096 with_ties = self._match_text_seq("WITH", "TIES") 3097 3098 if only and with_ties: 3099 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3100 3101 return self.expression( 3102 exp.Fetch, 3103 direction=direction, 3104 count=count, 3105 percent=percent, 3106 with_ties=with_ties, 3107 ) 3108 3109 return this 3110 3111 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3112 if not self._match(TokenType.OFFSET): 3113 return this 3114 3115 count = self._parse_term() 3116 self._match_set((TokenType.ROW, TokenType.ROWS)) 3117 return self.expression(exp.Offset, this=this, expression=count) 3118 3119 def _parse_locks(self) -> t.List[exp.Lock]: 3120 locks = [] 3121 while True: 3122 if self._match_text_seq("FOR", "UPDATE"): 3123 update = True 3124 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3125 "LOCK", "IN", "SHARE", "MODE" 3126 ): 3127 update = False 3128 else: 3129 break 3130 3131 expressions = None 3132 if self._match_text_seq("OF"): 3133 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3134 3135 wait: t.Optional[bool | exp.Expression] = None 3136 if self._match_text_seq("NOWAIT"): 3137 wait = True 3138 elif self._match_text_seq("WAIT"): 3139 wait = self._parse_primary() 3140 elif self._match_text_seq("SKIP", "LOCKED"): 3141 wait = False 3142 3143 locks.append( 3144 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3145 ) 3146 3147 return locks 3148 3149 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3150 if not self._match_set(self.SET_OPERATIONS): 3151 return this 3152 3153 token_type = self._prev.token_type 3154 3155 if token_type == TokenType.UNION: 3156 expression = exp.Union 3157 elif token_type == TokenType.EXCEPT: 3158 expression = exp.Except 3159 else: 3160 expression = exp.Intersect 3161 3162 return self.expression( 3163 expression, 3164 this=this, 3165 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3166 by_name=self._match_text_seq("BY", "NAME"), 3167 expression=self._parse_set_operations(self._parse_select(nested=True)), 3168 ) 3169 3170 def _parse_expression(self) -> t.Optional[exp.Expression]: 3171 return self._parse_alias(self._parse_conjunction()) 3172 3173 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3174 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3175 3176 def _parse_equality(self) -> t.Optional[exp.Expression]: 3177 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3178 3179 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3180 return self._parse_tokens(self._parse_range, self.COMPARISON) 3181 3182 def _parse_range(self) -> t.Optional[exp.Expression]: 3183 this = self._parse_bitwise() 3184 negate = self._match(TokenType.NOT) 3185 3186 if self._match_set(self.RANGE_PARSERS): 3187 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3188 if not expression: 3189 return this 3190 3191 this = expression 3192 elif self._match(TokenType.ISNULL): 3193 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3194 3195 # Postgres supports ISNULL and NOTNULL for conditions. 3196 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3197 if self._match(TokenType.NOTNULL): 3198 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3199 this = self.expression(exp.Not, this=this) 3200 3201 if negate: 3202 this = self.expression(exp.Not, this=this) 3203 3204 if self._match(TokenType.IS): 3205 this = self._parse_is(this) 3206 3207 return this 3208 3209 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 index = self._index - 1 3211 negate = self._match(TokenType.NOT) 3212 3213 if self._match_text_seq("DISTINCT", "FROM"): 3214 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3215 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3216 3217 expression = self._parse_null() or self._parse_boolean() 3218 if not expression: 3219 self._retreat(index) 3220 return None 3221 3222 this = self.expression(exp.Is, this=this, expression=expression) 3223 return self.expression(exp.Not, this=this) if negate else this 3224 3225 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3226 unnest = self._parse_unnest(with_alias=False) 3227 if unnest: 3228 this = self.expression(exp.In, this=this, unnest=unnest) 3229 elif self._match(TokenType.L_PAREN): 3230 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3231 3232 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3233 this = self.expression(exp.In, this=this, query=expressions[0]) 3234 else: 3235 this = self.expression(exp.In, this=this, expressions=expressions) 3236 3237 self._match_r_paren(this) 3238 else: 3239 this = self.expression(exp.In, this=this, field=self._parse_field()) 3240 3241 return this 3242 3243 def _parse_between(self, this: exp.Expression) -> exp.Between: 3244 low = self._parse_bitwise() 3245 self._match(TokenType.AND) 3246 high = self._parse_bitwise() 3247 return self.expression(exp.Between, this=this, low=low, high=high) 3248 3249 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3250 if not self._match(TokenType.ESCAPE): 3251 return this 3252 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3253 3254 def _parse_interval(self) -> t.Optional[exp.Interval]: 3255 index = self._index 3256 3257 if not self._match(TokenType.INTERVAL): 3258 return None 3259 3260 if self._match(TokenType.STRING, advance=False): 3261 this = self._parse_primary() 3262 else: 3263 this = self._parse_term() 3264 3265 if not this: 3266 self._retreat(index) 3267 return None 3268 3269 unit = self._parse_function() or self._parse_var(any_token=True) 3270 3271 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3272 # each INTERVAL expression into this canonical form so it's easy to transpile 3273 if this and this.is_number: 3274 this = exp.Literal.string(this.name) 3275 elif this and this.is_string: 3276 parts = this.name.split() 3277 3278 if len(parts) == 2: 3279 if unit: 3280 # This is not actually a unit, it's something else (e.g. a "window side") 3281 unit = None 3282 self._retreat(self._index - 1) 3283 3284 this = exp.Literal.string(parts[0]) 3285 unit = self.expression(exp.Var, this=parts[1]) 3286 3287 return self.expression(exp.Interval, this=this, unit=unit) 3288 3289 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3290 this = self._parse_term() 3291 3292 while True: 3293 if self._match_set(self.BITWISE): 3294 this = self.expression( 3295 self.BITWISE[self._prev.token_type], 3296 this=this, 3297 expression=self._parse_term(), 3298 ) 3299 elif self._match(TokenType.DQMARK): 3300 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3301 elif self._match_pair(TokenType.LT, TokenType.LT): 3302 this = self.expression( 3303 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3304 ) 3305 elif self._match_pair(TokenType.GT, TokenType.GT): 3306 this = self.expression( 3307 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3308 ) 3309 else: 3310 break 3311 3312 return this 3313 3314 def _parse_term(self) -> t.Optional[exp.Expression]: 3315 return self._parse_tokens(self._parse_factor, self.TERM) 3316 3317 def _parse_factor(self) -> t.Optional[exp.Expression]: 3318 return self._parse_tokens(self._parse_unary, self.FACTOR) 3319 3320 def _parse_unary(self) -> t.Optional[exp.Expression]: 3321 if self._match_set(self.UNARY_PARSERS): 3322 return self.UNARY_PARSERS[self._prev.token_type](self) 3323 return self._parse_at_time_zone(self._parse_type()) 3324 3325 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3326 interval = parse_interval and self._parse_interval() 3327 if interval: 3328 return interval 3329 3330 index = self._index 3331 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3332 this = self._parse_column() 3333 3334 if data_type: 3335 if isinstance(this, exp.Literal): 3336 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3337 if parser: 3338 return parser(self, this, data_type) 3339 return self.expression(exp.Cast, this=this, to=data_type) 3340 if not data_type.expressions: 3341 self._retreat(index) 3342 return self._parse_column() 3343 return self._parse_column_ops(data_type) 3344 3345 return this and self._parse_column_ops(this) 3346 3347 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3348 this = self._parse_type() 3349 if not this: 3350 return None 3351 3352 return self.expression( 3353 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3354 ) 3355 3356 def _parse_types( 3357 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3358 ) -> t.Optional[exp.Expression]: 3359 index = self._index 3360 3361 prefix = self._match_text_seq("SYSUDTLIB", ".") 3362 3363 if not self._match_set(self.TYPE_TOKENS): 3364 identifier = allow_identifiers and self._parse_id_var( 3365 any_token=False, tokens=(TokenType.VAR,) 3366 ) 3367 3368 if identifier: 3369 tokens = self._tokenizer.tokenize(identifier.name) 3370 3371 if len(tokens) != 1: 3372 self.raise_error("Unexpected identifier", self._prev) 3373 3374 if tokens[0].token_type in self.TYPE_TOKENS: 3375 self._prev = tokens[0] 3376 elif self.SUPPORTS_USER_DEFINED_TYPES: 3377 type_name = identifier.name 3378 3379 while self._match(TokenType.DOT): 3380 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3381 3382 return exp.DataType.build(type_name, udt=True) 3383 else: 3384 return None 3385 else: 3386 return None 3387 3388 type_token = self._prev.token_type 3389 3390 if type_token == TokenType.PSEUDO_TYPE: 3391 return self.expression(exp.PseudoType, this=self._prev.text) 3392 3393 if type_token == TokenType.OBJECT_IDENTIFIER: 3394 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3395 3396 nested = type_token in self.NESTED_TYPE_TOKENS 3397 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3398 expressions = None 3399 maybe_func = False 3400 3401 if self._match(TokenType.L_PAREN): 3402 if is_struct: 3403 expressions = self._parse_csv(self._parse_struct_types) 3404 elif nested: 3405 expressions = self._parse_csv( 3406 lambda: self._parse_types( 3407 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3408 ) 3409 ) 3410 elif type_token in self.ENUM_TYPE_TOKENS: 3411 expressions = self._parse_csv(self._parse_equality) 3412 else: 3413 expressions = self._parse_csv(self._parse_type_size) 3414 3415 if not expressions or not self._match(TokenType.R_PAREN): 3416 self._retreat(index) 3417 return None 3418 3419 maybe_func = True 3420 3421 this: t.Optional[exp.Expression] = None 3422 values: t.Optional[t.List[exp.Expression]] = None 3423 3424 if nested and self._match(TokenType.LT): 3425 if is_struct: 3426 expressions = self._parse_csv(self._parse_struct_types) 3427 else: 3428 expressions = self._parse_csv( 3429 lambda: self._parse_types( 3430 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3431 ) 3432 ) 3433 3434 if not self._match(TokenType.GT): 3435 self.raise_error("Expecting >") 3436 3437 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3438 values = self._parse_csv(self._parse_conjunction) 3439 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3440 3441 if type_token in self.TIMESTAMPS: 3442 if self._match_text_seq("WITH", "TIME", "ZONE"): 3443 maybe_func = False 3444 tz_type = ( 3445 exp.DataType.Type.TIMETZ 3446 if type_token in self.TIMES 3447 else exp.DataType.Type.TIMESTAMPTZ 3448 ) 3449 this = exp.DataType(this=tz_type, expressions=expressions) 3450 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3451 maybe_func = False 3452 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3453 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3454 maybe_func = False 3455 elif type_token == TokenType.INTERVAL: 3456 unit = self._parse_var() 3457 3458 if self._match_text_seq("TO"): 3459 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3460 else: 3461 span = None 3462 3463 if span or not unit: 3464 this = self.expression( 3465 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3466 ) 3467 else: 3468 this = self.expression(exp.Interval, unit=unit) 3469 3470 if maybe_func and check_func: 3471 index2 = self._index 3472 peek = self._parse_string() 3473 3474 if not peek: 3475 self._retreat(index) 3476 return None 3477 3478 self._retreat(index2) 3479 3480 if not this: 3481 if self._match_text_seq("UNSIGNED"): 3482 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3483 if not unsigned_type_token: 3484 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3485 3486 type_token = unsigned_type_token or type_token 3487 3488 this = exp.DataType( 3489 this=exp.DataType.Type[type_token.value], 3490 expressions=expressions, 3491 nested=nested, 3492 values=values, 3493 prefix=prefix, 3494 ) 3495 3496 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3497 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3498 3499 return this 3500 3501 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3502 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3503 self._match(TokenType.COLON) 3504 return self._parse_column_def(this) 3505 3506 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3507 if not self._match_text_seq("AT", "TIME", "ZONE"): 3508 return this 3509 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3510 3511 def _parse_column(self) -> t.Optional[exp.Expression]: 3512 this = self._parse_field() 3513 if isinstance(this, exp.Identifier): 3514 this = self.expression(exp.Column, this=this) 3515 elif not this: 3516 return self._parse_bracket(this) 3517 return self._parse_column_ops(this) 3518 3519 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3520 this = self._parse_bracket(this) 3521 3522 while self._match_set(self.COLUMN_OPERATORS): 3523 op_token = self._prev.token_type 3524 op = self.COLUMN_OPERATORS.get(op_token) 3525 3526 if op_token == TokenType.DCOLON: 3527 field = self._parse_types() 3528 if not field: 3529 self.raise_error("Expected type") 3530 elif op and self._curr: 3531 self._advance() 3532 value = self._prev.text 3533 field = ( 3534 exp.Literal.number(value) 3535 if self._prev.token_type == TokenType.NUMBER 3536 else exp.Literal.string(value) 3537 ) 3538 else: 3539 field = self._parse_field(anonymous_func=True, any_token=True) 3540 3541 if isinstance(field, exp.Func): 3542 # bigquery allows function calls like x.y.count(...) 3543 # SAFE.SUBSTR(...) 3544 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3545 this = self._replace_columns_with_dots(this) 3546 3547 if op: 3548 this = op(self, this, field) 3549 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3550 this = self.expression( 3551 exp.Column, 3552 this=field, 3553 table=this.this, 3554 db=this.args.get("table"), 3555 catalog=this.args.get("db"), 3556 ) 3557 else: 3558 this = self.expression(exp.Dot, this=this, expression=field) 3559 this = self._parse_bracket(this) 3560 return this 3561 3562 def _parse_primary(self) -> t.Optional[exp.Expression]: 3563 if self._match_set(self.PRIMARY_PARSERS): 3564 token_type = self._prev.token_type 3565 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3566 3567 if token_type == TokenType.STRING: 3568 expressions = [primary] 3569 while self._match(TokenType.STRING): 3570 expressions.append(exp.Literal.string(self._prev.text)) 3571 3572 if len(expressions) > 1: 3573 return self.expression(exp.Concat, expressions=expressions) 3574 3575 return primary 3576 3577 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3578 return exp.Literal.number(f"0.{self._prev.text}") 3579 3580 if self._match(TokenType.L_PAREN): 3581 comments = self._prev_comments 3582 query = self._parse_select() 3583 3584 if query: 3585 expressions = [query] 3586 else: 3587 expressions = self._parse_expressions() 3588 3589 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3590 3591 if isinstance(this, exp.Subqueryable): 3592 this = self._parse_set_operations( 3593 self._parse_subquery(this=this, parse_alias=False) 3594 ) 3595 elif len(expressions) > 1: 3596 this = self.expression(exp.Tuple, expressions=expressions) 3597 else: 3598 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3599 3600 if this: 3601 this.add_comments(comments) 3602 3603 self._match_r_paren(expression=this) 3604 return this 3605 3606 return None 3607 3608 def _parse_field( 3609 self, 3610 any_token: bool = False, 3611 tokens: t.Optional[t.Collection[TokenType]] = None, 3612 anonymous_func: bool = False, 3613 ) -> t.Optional[exp.Expression]: 3614 return ( 3615 self._parse_primary() 3616 or self._parse_function(anonymous=anonymous_func) 3617 or self._parse_id_var(any_token=any_token, tokens=tokens) 3618 ) 3619 3620 def _parse_function( 3621 self, 3622 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3623 anonymous: bool = False, 3624 optional_parens: bool = True, 3625 ) -> t.Optional[exp.Expression]: 3626 if not self._curr: 3627 return None 3628 3629 token_type = self._curr.token_type 3630 this = self._curr.text 3631 upper = this.upper() 3632 3633 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3634 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3635 self._advance() 3636 return parser(self) 3637 3638 if not self._next or self._next.token_type != TokenType.L_PAREN: 3639 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3640 self._advance() 3641 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3642 3643 return None 3644 3645 if token_type not in self.FUNC_TOKENS: 3646 return None 3647 3648 self._advance(2) 3649 3650 parser = self.FUNCTION_PARSERS.get(upper) 3651 if parser and not anonymous: 3652 this = parser(self) 3653 else: 3654 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3655 3656 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3657 this = self.expression(subquery_predicate, this=self._parse_select()) 3658 self._match_r_paren() 3659 return this 3660 3661 if functions is None: 3662 functions = self.FUNCTIONS 3663 3664 function = functions.get(upper) 3665 3666 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3667 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3668 3669 if function and not anonymous: 3670 func = self.validate_expression(function(args), args) 3671 if not self.NORMALIZE_FUNCTIONS: 3672 func.meta["name"] = this 3673 this = func 3674 else: 3675 this = self.expression(exp.Anonymous, this=this, expressions=args) 3676 3677 self._match_r_paren(this) 3678 return self._parse_window(this) 3679 3680 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3681 return self._parse_column_def(self._parse_id_var()) 3682 3683 def _parse_user_defined_function( 3684 self, kind: t.Optional[TokenType] = None 3685 ) -> t.Optional[exp.Expression]: 3686 this = self._parse_id_var() 3687 3688 while self._match(TokenType.DOT): 3689 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3690 3691 if not self._match(TokenType.L_PAREN): 3692 return this 3693 3694 expressions = self._parse_csv(self._parse_function_parameter) 3695 self._match_r_paren() 3696 return self.expression( 3697 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3698 ) 3699 3700 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3701 literal = self._parse_primary() 3702 if literal: 3703 return self.expression(exp.Introducer, this=token.text, expression=literal) 3704 3705 return self.expression(exp.Identifier, this=token.text) 3706 3707 def _parse_session_parameter(self) -> exp.SessionParameter: 3708 kind = None 3709 this = self._parse_id_var() or self._parse_primary() 3710 3711 if this and self._match(TokenType.DOT): 3712 kind = this.name 3713 this = self._parse_var() or self._parse_primary() 3714 3715 return self.expression(exp.SessionParameter, this=this, kind=kind) 3716 3717 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3718 index = self._index 3719 3720 if self._match(TokenType.L_PAREN): 3721 expressions = t.cast( 3722 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3723 ) 3724 3725 if not self._match(TokenType.R_PAREN): 3726 self._retreat(index) 3727 else: 3728 expressions = [self._parse_id_var()] 3729 3730 if self._match_set(self.LAMBDAS): 3731 return self.LAMBDAS[self._prev.token_type](self, expressions) 3732 3733 self._retreat(index) 3734 3735 this: t.Optional[exp.Expression] 3736 3737 if self._match(TokenType.DISTINCT): 3738 this = self.expression( 3739 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3740 ) 3741 else: 3742 this = self._parse_select_or_expression(alias=alias) 3743 3744 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3745 3746 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3747 index = self._index 3748 3749 if not self.errors: 3750 try: 3751 if self._parse_select(nested=True): 3752 return this 3753 except ParseError: 3754 pass 3755 finally: 3756 self.errors.clear() 3757 self._retreat(index) 3758 3759 if not self._match(TokenType.L_PAREN): 3760 return this 3761 3762 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3763 3764 self._match_r_paren() 3765 return self.expression(exp.Schema, this=this, expressions=args) 3766 3767 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3768 return self._parse_column_def(self._parse_field(any_token=True)) 3769 3770 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3771 # column defs are not really columns, they're identifiers 3772 if isinstance(this, exp.Column): 3773 this = this.this 3774 3775 kind = self._parse_types(schema=True) 3776 3777 if self._match_text_seq("FOR", "ORDINALITY"): 3778 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3779 3780 constraints: t.List[exp.Expression] = [] 3781 3782 if not kind and self._match(TokenType.ALIAS): 3783 constraints.append( 3784 self.expression( 3785 exp.ComputedColumnConstraint, 3786 this=self._parse_conjunction(), 3787 persisted=self._match_text_seq("PERSISTED"), 3788 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3789 ) 3790 ) 3791 3792 while True: 3793 constraint = self._parse_column_constraint() 3794 if not constraint: 3795 break 3796 constraints.append(constraint) 3797 3798 if not kind and not constraints: 3799 return this 3800 3801 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3802 3803 def _parse_auto_increment( 3804 self, 3805 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3806 start = None 3807 increment = None 3808 3809 if self._match(TokenType.L_PAREN, advance=False): 3810 args = self._parse_wrapped_csv(self._parse_bitwise) 3811 start = seq_get(args, 0) 3812 increment = seq_get(args, 1) 3813 elif self._match_text_seq("START"): 3814 start = self._parse_bitwise() 3815 self._match_text_seq("INCREMENT") 3816 increment = self._parse_bitwise() 3817 3818 if start and increment: 3819 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3820 3821 return exp.AutoIncrementColumnConstraint() 3822 3823 def _parse_compress(self) -> exp.CompressColumnConstraint: 3824 if self._match(TokenType.L_PAREN, advance=False): 3825 return self.expression( 3826 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3827 ) 3828 3829 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3830 3831 def _parse_generated_as_identity( 3832 self, 3833 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3834 if self._match_text_seq("BY", "DEFAULT"): 3835 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3836 this = self.expression( 3837 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3838 ) 3839 else: 3840 self._match_text_seq("ALWAYS") 3841 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3842 3843 self._match(TokenType.ALIAS) 3844 identity = self._match_text_seq("IDENTITY") 3845 3846 if self._match(TokenType.L_PAREN): 3847 if self._match(TokenType.START_WITH): 3848 this.set("start", self._parse_bitwise()) 3849 if self._match_text_seq("INCREMENT", "BY"): 3850 this.set("increment", self._parse_bitwise()) 3851 if self._match_text_seq("MINVALUE"): 3852 this.set("minvalue", self._parse_bitwise()) 3853 if self._match_text_seq("MAXVALUE"): 3854 this.set("maxvalue", self._parse_bitwise()) 3855 3856 if self._match_text_seq("CYCLE"): 3857 this.set("cycle", True) 3858 elif self._match_text_seq("NO", "CYCLE"): 3859 this.set("cycle", False) 3860 3861 if not identity: 3862 this.set("expression", self._parse_bitwise()) 3863 3864 self._match_r_paren() 3865 3866 return this 3867 3868 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3869 self._match_text_seq("LENGTH") 3870 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3871 3872 def _parse_not_constraint( 3873 self, 3874 ) -> t.Optional[exp.Expression]: 3875 if self._match_text_seq("NULL"): 3876 return self.expression(exp.NotNullColumnConstraint) 3877 if self._match_text_seq("CASESPECIFIC"): 3878 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3879 if self._match_text_seq("FOR", "REPLICATION"): 3880 return self.expression(exp.NotForReplicationColumnConstraint) 3881 return None 3882 3883 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3884 if self._match(TokenType.CONSTRAINT): 3885 this = self._parse_id_var() 3886 else: 3887 this = None 3888 3889 if self._match_texts(self.CONSTRAINT_PARSERS): 3890 return self.expression( 3891 exp.ColumnConstraint, 3892 this=this, 3893 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3894 ) 3895 3896 return this 3897 3898 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3899 if not self._match(TokenType.CONSTRAINT): 3900 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3901 3902 this = self._parse_id_var() 3903 expressions = [] 3904 3905 while True: 3906 constraint = self._parse_unnamed_constraint() or self._parse_function() 3907 if not constraint: 3908 break 3909 expressions.append(constraint) 3910 3911 return self.expression(exp.Constraint, this=this, expressions=expressions) 3912 3913 def _parse_unnamed_constraint( 3914 self, constraints: t.Optional[t.Collection[str]] = None 3915 ) -> t.Optional[exp.Expression]: 3916 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3917 constraints or self.CONSTRAINT_PARSERS 3918 ): 3919 return None 3920 3921 constraint = self._prev.text.upper() 3922 if constraint not in self.CONSTRAINT_PARSERS: 3923 self.raise_error(f"No parser found for schema constraint {constraint}.") 3924 3925 return self.CONSTRAINT_PARSERS[constraint](self) 3926 3927 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3928 self._match_text_seq("KEY") 3929 return self.expression( 3930 exp.UniqueColumnConstraint, 3931 this=self._parse_schema(self._parse_id_var(any_token=False)), 3932 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3933 ) 3934 3935 def _parse_key_constraint_options(self) -> t.List[str]: 3936 options = [] 3937 while True: 3938 if not self._curr: 3939 break 3940 3941 if self._match(TokenType.ON): 3942 action = None 3943 on = self._advance_any() and self._prev.text 3944 3945 if self._match_text_seq("NO", "ACTION"): 3946 action = "NO ACTION" 3947 elif self._match_text_seq("CASCADE"): 3948 action = "CASCADE" 3949 elif self._match_text_seq("RESTRICT"): 3950 action = "RESTRICT" 3951 elif self._match_pair(TokenType.SET, TokenType.NULL): 3952 action = "SET NULL" 3953 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3954 action = "SET DEFAULT" 3955 else: 3956 self.raise_error("Invalid key constraint") 3957 3958 options.append(f"ON {on} {action}") 3959 elif self._match_text_seq("NOT", "ENFORCED"): 3960 options.append("NOT ENFORCED") 3961 elif self._match_text_seq("DEFERRABLE"): 3962 options.append("DEFERRABLE") 3963 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3964 options.append("INITIALLY DEFERRED") 3965 elif self._match_text_seq("NORELY"): 3966 options.append("NORELY") 3967 elif self._match_text_seq("MATCH", "FULL"): 3968 options.append("MATCH FULL") 3969 else: 3970 break 3971 3972 return options 3973 3974 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3975 if match and not self._match(TokenType.REFERENCES): 3976 return None 3977 3978 expressions = None 3979 this = self._parse_table(schema=True) 3980 options = self._parse_key_constraint_options() 3981 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3982 3983 def _parse_foreign_key(self) -> exp.ForeignKey: 3984 expressions = self._parse_wrapped_id_vars() 3985 reference = self._parse_references() 3986 options = {} 3987 3988 while self._match(TokenType.ON): 3989 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3990 self.raise_error("Expected DELETE or UPDATE") 3991 3992 kind = self._prev.text.lower() 3993 3994 if self._match_text_seq("NO", "ACTION"): 3995 action = "NO ACTION" 3996 elif self._match(TokenType.SET): 3997 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3998 action = "SET " + self._prev.text.upper() 3999 else: 4000 self._advance() 4001 action = self._prev.text.upper() 4002 4003 options[kind] = action 4004 4005 return self.expression( 4006 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4007 ) 4008 4009 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4010 return self._parse_field() 4011 4012 def _parse_primary_key( 4013 self, wrapped_optional: bool = False, in_props: bool = False 4014 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4015 desc = ( 4016 self._match_set((TokenType.ASC, TokenType.DESC)) 4017 and self._prev.token_type == TokenType.DESC 4018 ) 4019 4020 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4021 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4022 4023 expressions = self._parse_wrapped_csv( 4024 self._parse_primary_key_part, optional=wrapped_optional 4025 ) 4026 options = self._parse_key_constraint_options() 4027 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4028 4029 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4030 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4031 return this 4032 4033 bracket_kind = self._prev.token_type 4034 4035 if self._match(TokenType.COLON): 4036 expressions: t.List[exp.Expression] = [ 4037 self.expression(exp.Slice, expression=self._parse_conjunction()) 4038 ] 4039 else: 4040 expressions = self._parse_csv( 4041 lambda: self._parse_slice( 4042 self._parse_alias(self._parse_conjunction(), explicit=True) 4043 ) 4044 ) 4045 4046 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4047 if bracket_kind == TokenType.L_BRACE: 4048 this = self.expression(exp.Struct, expressions=expressions) 4049 elif not this or this.name.upper() == "ARRAY": 4050 this = self.expression(exp.Array, expressions=expressions) 4051 else: 4052 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4053 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4054 4055 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4056 self.raise_error("Expected ]") 4057 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4058 self.raise_error("Expected }") 4059 4060 self._add_comments(this) 4061 return self._parse_bracket(this) 4062 4063 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4064 if self._match(TokenType.COLON): 4065 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4066 return this 4067 4068 def _parse_case(self) -> t.Optional[exp.Expression]: 4069 ifs = [] 4070 default = None 4071 4072 comments = self._prev_comments 4073 expression = self._parse_conjunction() 4074 4075 while self._match(TokenType.WHEN): 4076 this = self._parse_conjunction() 4077 self._match(TokenType.THEN) 4078 then = self._parse_conjunction() 4079 ifs.append(self.expression(exp.If, this=this, true=then)) 4080 4081 if self._match(TokenType.ELSE): 4082 default = self._parse_conjunction() 4083 4084 if not self._match(TokenType.END): 4085 self.raise_error("Expected END after CASE", self._prev) 4086 4087 return self._parse_window( 4088 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4089 ) 4090 4091 def _parse_if(self) -> t.Optional[exp.Expression]: 4092 if self._match(TokenType.L_PAREN): 4093 args = self._parse_csv(self._parse_conjunction) 4094 this = self.validate_expression(exp.If.from_arg_list(args), args) 4095 self._match_r_paren() 4096 else: 4097 index = self._index - 1 4098 condition = self._parse_conjunction() 4099 4100 if not condition: 4101 self._retreat(index) 4102 return None 4103 4104 self._match(TokenType.THEN) 4105 true = self._parse_conjunction() 4106 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4107 self._match(TokenType.END) 4108 this = self.expression(exp.If, this=condition, true=true, false=false) 4109 4110 return self._parse_window(this) 4111 4112 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4113 if not self._match_text_seq("VALUE", "FOR"): 4114 self._retreat(self._index - 1) 4115 return None 4116 4117 return self.expression( 4118 exp.NextValueFor, 4119 this=self._parse_column(), 4120 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4121 ) 4122 4123 def _parse_extract(self) -> exp.Extract: 4124 this = self._parse_function() or self._parse_var() or self._parse_type() 4125 4126 if self._match(TokenType.FROM): 4127 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4128 4129 if not self._match(TokenType.COMMA): 4130 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4131 4132 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4133 4134 def _parse_any_value(self) -> exp.AnyValue: 4135 this = self._parse_lambda() 4136 is_max = None 4137 having = None 4138 4139 if self._match(TokenType.HAVING): 4140 self._match_texts(("MAX", "MIN")) 4141 is_max = self._prev.text == "MAX" 4142 having = self._parse_column() 4143 4144 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4145 4146 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4147 this = self._parse_conjunction() 4148 4149 if not self._match(TokenType.ALIAS): 4150 if self._match(TokenType.COMMA): 4151 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4152 4153 self.raise_error("Expected AS after CAST") 4154 4155 fmt = None 4156 to = self._parse_types() 4157 4158 if not to: 4159 self.raise_error("Expected TYPE after CAST") 4160 elif isinstance(to, exp.Identifier): 4161 to = exp.DataType.build(to.name, udt=True) 4162 elif to.this == exp.DataType.Type.CHAR: 4163 if self._match(TokenType.CHARACTER_SET): 4164 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4165 elif self._match(TokenType.FORMAT): 4166 fmt_string = self._parse_string() 4167 fmt = self._parse_at_time_zone(fmt_string) 4168 4169 if to.this in exp.DataType.TEMPORAL_TYPES: 4170 this = self.expression( 4171 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4172 this=this, 4173 format=exp.Literal.string( 4174 format_time( 4175 fmt_string.this if fmt_string else "", 4176 self.FORMAT_MAPPING or self.TIME_MAPPING, 4177 self.FORMAT_TRIE or self.TIME_TRIE, 4178 ) 4179 ), 4180 ) 4181 4182 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4183 this.set("zone", fmt.args["zone"]) 4184 4185 return this 4186 4187 return self.expression( 4188 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4189 ) 4190 4191 def _parse_concat(self) -> t.Optional[exp.Expression]: 4192 args = self._parse_csv(self._parse_conjunction) 4193 if self.CONCAT_NULL_OUTPUTS_STRING: 4194 args = self._ensure_string_if_null(args) 4195 4196 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4197 # we find such a call we replace it with its argument. 4198 if len(args) == 1: 4199 return args[0] 4200 4201 return self.expression( 4202 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4203 ) 4204 4205 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4206 args = self._parse_csv(self._parse_conjunction) 4207 if len(args) < 2: 4208 return self.expression(exp.ConcatWs, expressions=args) 4209 delim, *values = args 4210 if self.CONCAT_NULL_OUTPUTS_STRING: 4211 values = self._ensure_string_if_null(values) 4212 4213 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4214 4215 def _parse_string_agg(self) -> exp.Expression: 4216 if self._match(TokenType.DISTINCT): 4217 args: t.List[t.Optional[exp.Expression]] = [ 4218 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4219 ] 4220 if self._match(TokenType.COMMA): 4221 args.extend(self._parse_csv(self._parse_conjunction)) 4222 else: 4223 args = self._parse_csv(self._parse_conjunction) # type: ignore 4224 4225 index = self._index 4226 if not self._match(TokenType.R_PAREN) and args: 4227 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4228 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4229 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4230 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4231 4232 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4233 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4234 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4235 if not self._match_text_seq("WITHIN", "GROUP"): 4236 self._retreat(index) 4237 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4238 4239 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4240 order = self._parse_order(this=seq_get(args, 0)) 4241 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4242 4243 def _parse_convert( 4244 self, strict: bool, safe: t.Optional[bool] = None 4245 ) -> t.Optional[exp.Expression]: 4246 this = self._parse_bitwise() 4247 4248 if self._match(TokenType.USING): 4249 to: t.Optional[exp.Expression] = self.expression( 4250 exp.CharacterSet, this=self._parse_var() 4251 ) 4252 elif self._match(TokenType.COMMA): 4253 to = self._parse_types() 4254 else: 4255 to = None 4256 4257 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4258 4259 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4260 """ 4261 There are generally two variants of the DECODE function: 4262 4263 - DECODE(bin, charset) 4264 - DECODE(expression, search, result [, search, result] ... [, default]) 4265 4266 The second variant will always be parsed into a CASE expression. Note that NULL 4267 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4268 instead of relying on pattern matching. 4269 """ 4270 args = self._parse_csv(self._parse_conjunction) 4271 4272 if len(args) < 3: 4273 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4274 4275 expression, *expressions = args 4276 if not expression: 4277 return None 4278 4279 ifs = [] 4280 for search, result in zip(expressions[::2], expressions[1::2]): 4281 if not search or not result: 4282 return None 4283 4284 if isinstance(search, exp.Literal): 4285 ifs.append( 4286 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4287 ) 4288 elif isinstance(search, exp.Null): 4289 ifs.append( 4290 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4291 ) 4292 else: 4293 cond = exp.or_( 4294 exp.EQ(this=expression.copy(), expression=search), 4295 exp.and_( 4296 exp.Is(this=expression.copy(), expression=exp.Null()), 4297 exp.Is(this=search.copy(), expression=exp.Null()), 4298 copy=False, 4299 ), 4300 copy=False, 4301 ) 4302 ifs.append(exp.If(this=cond, true=result)) 4303 4304 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4305 4306 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4307 self._match_text_seq("KEY") 4308 key = self._parse_column() 4309 self._match_set((TokenType.COLON, TokenType.COMMA)) 4310 self._match_text_seq("VALUE") 4311 value = self._parse_bitwise() 4312 4313 if not key and not value: 4314 return None 4315 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4316 4317 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4318 if not this or not self._match_text_seq("FORMAT", "JSON"): 4319 return this 4320 4321 return self.expression(exp.FormatJson, this=this) 4322 4323 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4324 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4325 for value in values: 4326 if self._match_text_seq(value, "ON", on): 4327 return f"{value} ON {on}" 4328 4329 return None 4330 4331 def _parse_json_object(self) -> exp.JSONObject: 4332 star = self._parse_star() 4333 expressions = ( 4334 [star] 4335 if star 4336 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4337 ) 4338 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4339 4340 unique_keys = None 4341 if self._match_text_seq("WITH", "UNIQUE"): 4342 unique_keys = True 4343 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4344 unique_keys = False 4345 4346 self._match_text_seq("KEYS") 4347 4348 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4349 self._parse_type() 4350 ) 4351 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4352 4353 return self.expression( 4354 exp.JSONObject, 4355 expressions=expressions, 4356 null_handling=null_handling, 4357 unique_keys=unique_keys, 4358 return_type=return_type, 4359 encoding=encoding, 4360 ) 4361 4362 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4363 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4364 if not self._match_text_seq("NESTED"): 4365 this = self._parse_id_var() 4366 kind = self._parse_types(allow_identifiers=False) 4367 nested = None 4368 else: 4369 this = None 4370 kind = None 4371 nested = True 4372 4373 path = self._match_text_seq("PATH") and self._parse_string() 4374 nested_schema = nested and self._parse_json_schema() 4375 4376 return self.expression( 4377 exp.JSONColumnDef, 4378 this=this, 4379 kind=kind, 4380 path=path, 4381 nested_schema=nested_schema, 4382 ) 4383 4384 def _parse_json_schema(self) -> exp.JSONSchema: 4385 self._match_text_seq("COLUMNS") 4386 return self.expression( 4387 exp.JSONSchema, 4388 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4389 ) 4390 4391 def _parse_json_table(self) -> exp.JSONTable: 4392 this = self._parse_format_json(self._parse_bitwise()) 4393 path = self._match(TokenType.COMMA) and self._parse_string() 4394 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4395 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4396 schema = self._parse_json_schema() 4397 4398 return exp.JSONTable( 4399 this=this, 4400 schema=schema, 4401 path=path, 4402 error_handling=error_handling, 4403 empty_handling=empty_handling, 4404 ) 4405 4406 def _parse_logarithm(self) -> exp.Func: 4407 # Default argument order is base, expression 4408 args = self._parse_csv(self._parse_range) 4409 4410 if len(args) > 1: 4411 if not self.LOG_BASE_FIRST: 4412 args.reverse() 4413 return exp.Log.from_arg_list(args) 4414 4415 return self.expression( 4416 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4417 ) 4418 4419 def _parse_match_against(self) -> exp.MatchAgainst: 4420 expressions = self._parse_csv(self._parse_column) 4421 4422 self._match_text_seq(")", "AGAINST", "(") 4423 4424 this = self._parse_string() 4425 4426 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4427 modifier = "IN NATURAL LANGUAGE MODE" 4428 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4429 modifier = f"{modifier} WITH QUERY EXPANSION" 4430 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4431 modifier = "IN BOOLEAN MODE" 4432 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4433 modifier = "WITH QUERY EXPANSION" 4434 else: 4435 modifier = None 4436 4437 return self.expression( 4438 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4439 ) 4440 4441 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4442 def _parse_open_json(self) -> exp.OpenJSON: 4443 this = self._parse_bitwise() 4444 path = self._match(TokenType.COMMA) and self._parse_string() 4445 4446 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4447 this = self._parse_field(any_token=True) 4448 kind = self._parse_types() 4449 path = self._parse_string() 4450 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4451 4452 return self.expression( 4453 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4454 ) 4455 4456 expressions = None 4457 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4458 self._match_l_paren() 4459 expressions = self._parse_csv(_parse_open_json_column_def) 4460 4461 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4462 4463 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4464 args = self._parse_csv(self._parse_bitwise) 4465 4466 if self._match(TokenType.IN): 4467 return self.expression( 4468 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4469 ) 4470 4471 if haystack_first: 4472 haystack = seq_get(args, 0) 4473 needle = seq_get(args, 1) 4474 else: 4475 needle = seq_get(args, 0) 4476 haystack = seq_get(args, 1) 4477 4478 return self.expression( 4479 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4480 ) 4481 4482 def _parse_predict(self) -> exp.Predict: 4483 self._match_text_seq("MODEL") 4484 this = self._parse_table() 4485 4486 self._match(TokenType.COMMA) 4487 self._match_text_seq("TABLE") 4488 4489 return self.expression( 4490 exp.Predict, 4491 this=this, 4492 expression=self._parse_table(), 4493 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4494 ) 4495 4496 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4497 args = self._parse_csv(self._parse_table) 4498 return exp.JoinHint(this=func_name.upper(), expressions=args) 4499 4500 def _parse_substring(self) -> exp.Substring: 4501 # Postgres supports the form: substring(string [from int] [for int]) 4502 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4503 4504 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4505 4506 if self._match(TokenType.FROM): 4507 args.append(self._parse_bitwise()) 4508 if self._match(TokenType.FOR): 4509 args.append(self._parse_bitwise()) 4510 4511 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4512 4513 def _parse_trim(self) -> exp.Trim: 4514 # https://www.w3resource.com/sql/character-functions/trim.php 4515 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4516 4517 position = None 4518 collation = None 4519 expression = None 4520 4521 if self._match_texts(self.TRIM_TYPES): 4522 position = self._prev.text.upper() 4523 4524 this = self._parse_bitwise() 4525 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4526 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4527 expression = self._parse_bitwise() 4528 4529 if invert_order: 4530 this, expression = expression, this 4531 4532 if self._match(TokenType.COLLATE): 4533 collation = self._parse_bitwise() 4534 4535 return self.expression( 4536 exp.Trim, this=this, position=position, expression=expression, collation=collation 4537 ) 4538 4539 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4540 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4541 4542 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4543 return self._parse_window(self._parse_id_var(), alias=True) 4544 4545 def _parse_respect_or_ignore_nulls( 4546 self, this: t.Optional[exp.Expression] 4547 ) -> t.Optional[exp.Expression]: 4548 if self._match_text_seq("IGNORE", "NULLS"): 4549 return self.expression(exp.IgnoreNulls, this=this) 4550 if self._match_text_seq("RESPECT", "NULLS"): 4551 return self.expression(exp.RespectNulls, this=this) 4552 return this 4553 4554 def _parse_window( 4555 self, this: t.Optional[exp.Expression], alias: bool = False 4556 ) -> t.Optional[exp.Expression]: 4557 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4558 self._match(TokenType.WHERE) 4559 this = self.expression( 4560 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4561 ) 4562 self._match_r_paren() 4563 4564 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4565 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4566 if self._match_text_seq("WITHIN", "GROUP"): 4567 order = self._parse_wrapped(self._parse_order) 4568 this = self.expression(exp.WithinGroup, this=this, expression=order) 4569 4570 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4571 # Some dialects choose to implement and some do not. 4572 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4573 4574 # There is some code above in _parse_lambda that handles 4575 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4576 4577 # The below changes handle 4578 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4579 4580 # Oracle allows both formats 4581 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4582 # and Snowflake chose to do the same for familiarity 4583 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4584 this = self._parse_respect_or_ignore_nulls(this) 4585 4586 # bigquery select from window x AS (partition by ...) 4587 if alias: 4588 over = None 4589 self._match(TokenType.ALIAS) 4590 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4591 return this 4592 else: 4593 over = self._prev.text.upper() 4594 4595 if not self._match(TokenType.L_PAREN): 4596 return self.expression( 4597 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4598 ) 4599 4600 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4601 4602 first = self._match(TokenType.FIRST) 4603 if self._match_text_seq("LAST"): 4604 first = False 4605 4606 partition, order = self._parse_partition_and_order() 4607 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4608 4609 if kind: 4610 self._match(TokenType.BETWEEN) 4611 start = self._parse_window_spec() 4612 self._match(TokenType.AND) 4613 end = self._parse_window_spec() 4614 4615 spec = self.expression( 4616 exp.WindowSpec, 4617 kind=kind, 4618 start=start["value"], 4619 start_side=start["side"], 4620 end=end["value"], 4621 end_side=end["side"], 4622 ) 4623 else: 4624 spec = None 4625 4626 self._match_r_paren() 4627 4628 window = self.expression( 4629 exp.Window, 4630 this=this, 4631 partition_by=partition, 4632 order=order, 4633 spec=spec, 4634 alias=window_alias, 4635 over=over, 4636 first=first, 4637 ) 4638 4639 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4640 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4641 return self._parse_window(window, alias=alias) 4642 4643 return window 4644 4645 def _parse_partition_and_order( 4646 self, 4647 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4648 return self._parse_partition_by(), self._parse_order() 4649 4650 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4651 self._match(TokenType.BETWEEN) 4652 4653 return { 4654 "value": ( 4655 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4656 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4657 or self._parse_bitwise() 4658 ), 4659 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4660 } 4661 4662 def _parse_alias( 4663 self, this: t.Optional[exp.Expression], explicit: bool = False 4664 ) -> t.Optional[exp.Expression]: 4665 any_token = self._match(TokenType.ALIAS) 4666 4667 if explicit and not any_token: 4668 return this 4669 4670 if self._match(TokenType.L_PAREN): 4671 aliases = self.expression( 4672 exp.Aliases, 4673 this=this, 4674 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4675 ) 4676 self._match_r_paren(aliases) 4677 return aliases 4678 4679 alias = self._parse_id_var(any_token) 4680 4681 if alias: 4682 return self.expression(exp.Alias, this=this, alias=alias) 4683 4684 return this 4685 4686 def _parse_id_var( 4687 self, 4688 any_token: bool = True, 4689 tokens: t.Optional[t.Collection[TokenType]] = None, 4690 ) -> t.Optional[exp.Expression]: 4691 identifier = self._parse_identifier() 4692 4693 if identifier: 4694 return identifier 4695 4696 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4697 quoted = self._prev.token_type == TokenType.STRING 4698 return exp.Identifier(this=self._prev.text, quoted=quoted) 4699 4700 return None 4701 4702 def _parse_string(self) -> t.Optional[exp.Expression]: 4703 if self._match(TokenType.STRING): 4704 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4705 return self._parse_placeholder() 4706 4707 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4708 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4709 4710 def _parse_number(self) -> t.Optional[exp.Expression]: 4711 if self._match(TokenType.NUMBER): 4712 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4713 return self._parse_placeholder() 4714 4715 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4716 if self._match(TokenType.IDENTIFIER): 4717 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4718 return self._parse_placeholder() 4719 4720 def _parse_var( 4721 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4722 ) -> t.Optional[exp.Expression]: 4723 if ( 4724 (any_token and self._advance_any()) 4725 or self._match(TokenType.VAR) 4726 or (self._match_set(tokens) if tokens else False) 4727 ): 4728 return self.expression(exp.Var, this=self._prev.text) 4729 return self._parse_placeholder() 4730 4731 def _advance_any(self) -> t.Optional[Token]: 4732 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4733 self._advance() 4734 return self._prev 4735 return None 4736 4737 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4738 return self._parse_var() or self._parse_string() 4739 4740 def _parse_null(self) -> t.Optional[exp.Expression]: 4741 if self._match_set(self.NULL_TOKENS): 4742 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4743 return self._parse_placeholder() 4744 4745 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4746 if self._match(TokenType.TRUE): 4747 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4748 if self._match(TokenType.FALSE): 4749 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4750 return self._parse_placeholder() 4751 4752 def _parse_star(self) -> t.Optional[exp.Expression]: 4753 if self._match(TokenType.STAR): 4754 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4755 return self._parse_placeholder() 4756 4757 def _parse_parameter(self) -> exp.Parameter: 4758 wrapped = self._match(TokenType.L_BRACE) 4759 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4760 self._match(TokenType.R_BRACE) 4761 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4762 4763 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4764 if self._match_set(self.PLACEHOLDER_PARSERS): 4765 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4766 if placeholder: 4767 return placeholder 4768 self._advance(-1) 4769 return None 4770 4771 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4772 if not self._match(TokenType.EXCEPT): 4773 return None 4774 if self._match(TokenType.L_PAREN, advance=False): 4775 return self._parse_wrapped_csv(self._parse_column) 4776 4777 except_column = self._parse_column() 4778 return [except_column] if except_column else None 4779 4780 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4781 if not self._match(TokenType.REPLACE): 4782 return None 4783 if self._match(TokenType.L_PAREN, advance=False): 4784 return self._parse_wrapped_csv(self._parse_expression) 4785 4786 replace_expression = self._parse_expression() 4787 return [replace_expression] if replace_expression else None 4788 4789 def _parse_csv( 4790 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4791 ) -> t.List[exp.Expression]: 4792 parse_result = parse_method() 4793 items = [parse_result] if parse_result is not None else [] 4794 4795 while self._match(sep): 4796 self._add_comments(parse_result) 4797 parse_result = parse_method() 4798 if parse_result is not None: 4799 items.append(parse_result) 4800 4801 return items 4802 4803 def _parse_tokens( 4804 self, parse_method: t.Callable, expressions: t.Dict 4805 ) -> t.Optional[exp.Expression]: 4806 this = parse_method() 4807 4808 while self._match_set(expressions): 4809 this = self.expression( 4810 expressions[self._prev.token_type], 4811 this=this, 4812 comments=self._prev_comments, 4813 expression=parse_method(), 4814 ) 4815 4816 return this 4817 4818 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4819 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4820 4821 def _parse_wrapped_csv( 4822 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4823 ) -> t.List[exp.Expression]: 4824 return self._parse_wrapped( 4825 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4826 ) 4827 4828 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4829 wrapped = self._match(TokenType.L_PAREN) 4830 if not wrapped and not optional: 4831 self.raise_error("Expecting (") 4832 parse_result = parse_method() 4833 if wrapped: 4834 self._match_r_paren() 4835 return parse_result 4836 4837 def _parse_expressions(self) -> t.List[exp.Expression]: 4838 return self._parse_csv(self._parse_expression) 4839 4840 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4841 return self._parse_select() or self._parse_set_operations( 4842 self._parse_expression() if alias else self._parse_conjunction() 4843 ) 4844 4845 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4846 return self._parse_query_modifiers( 4847 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4848 ) 4849 4850 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4851 this = None 4852 if self._match_texts(self.TRANSACTION_KIND): 4853 this = self._prev.text 4854 4855 self._match_texts({"TRANSACTION", "WORK"}) 4856 4857 modes = [] 4858 while True: 4859 mode = [] 4860 while self._match(TokenType.VAR): 4861 mode.append(self._prev.text) 4862 4863 if mode: 4864 modes.append(" ".join(mode)) 4865 if not self._match(TokenType.COMMA): 4866 break 4867 4868 return self.expression(exp.Transaction, this=this, modes=modes) 4869 4870 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4871 chain = None 4872 savepoint = None 4873 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4874 4875 self._match_texts({"TRANSACTION", "WORK"}) 4876 4877 if self._match_text_seq("TO"): 4878 self._match_text_seq("SAVEPOINT") 4879 savepoint = self._parse_id_var() 4880 4881 if self._match(TokenType.AND): 4882 chain = not self._match_text_seq("NO") 4883 self._match_text_seq("CHAIN") 4884 4885 if is_rollback: 4886 return self.expression(exp.Rollback, savepoint=savepoint) 4887 4888 return self.expression(exp.Commit, chain=chain) 4889 4890 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4891 if not self._match_text_seq("ADD"): 4892 return None 4893 4894 self._match(TokenType.COLUMN) 4895 exists_column = self._parse_exists(not_=True) 4896 expression = self._parse_field_def() 4897 4898 if expression: 4899 expression.set("exists", exists_column) 4900 4901 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4902 if self._match_texts(("FIRST", "AFTER")): 4903 position = self._prev.text 4904 column_position = self.expression( 4905 exp.ColumnPosition, this=self._parse_column(), position=position 4906 ) 4907 expression.set("position", column_position) 4908 4909 return expression 4910 4911 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4912 drop = self._match(TokenType.DROP) and self._parse_drop() 4913 if drop and not isinstance(drop, exp.Command): 4914 drop.set("kind", drop.args.get("kind", "COLUMN")) 4915 return drop 4916 4917 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4918 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4919 return self.expression( 4920 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4921 ) 4922 4923 def _parse_add_constraint(self) -> exp.AddConstraint: 4924 this = None 4925 kind = self._prev.token_type 4926 4927 if kind == TokenType.CONSTRAINT: 4928 this = self._parse_id_var() 4929 4930 if self._match_text_seq("CHECK"): 4931 expression = self._parse_wrapped(self._parse_conjunction) 4932 enforced = self._match_text_seq("ENFORCED") 4933 4934 return self.expression( 4935 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4936 ) 4937 4938 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4939 expression = self._parse_foreign_key() 4940 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4941 expression = self._parse_primary_key() 4942 else: 4943 expression = None 4944 4945 return self.expression(exp.AddConstraint, this=this, expression=expression) 4946 4947 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4948 index = self._index - 1 4949 4950 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4951 return self._parse_csv(self._parse_add_constraint) 4952 4953 self._retreat(index) 4954 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4955 return self._parse_csv(self._parse_field_def) 4956 4957 return self._parse_csv(self._parse_add_column) 4958 4959 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4960 self._match(TokenType.COLUMN) 4961 column = self._parse_field(any_token=True) 4962 4963 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4964 return self.expression(exp.AlterColumn, this=column, drop=True) 4965 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4966 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4967 4968 self._match_text_seq("SET", "DATA") 4969 return self.expression( 4970 exp.AlterColumn, 4971 this=column, 4972 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4973 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4974 using=self._match(TokenType.USING) and self._parse_conjunction(), 4975 ) 4976 4977 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4978 index = self._index - 1 4979 4980 partition_exists = self._parse_exists() 4981 if self._match(TokenType.PARTITION, advance=False): 4982 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4983 4984 self._retreat(index) 4985 return self._parse_csv(self._parse_drop_column) 4986 4987 def _parse_alter_table_rename(self) -> exp.RenameTable: 4988 self._match_text_seq("TO") 4989 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4990 4991 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4992 start = self._prev 4993 4994 if not self._match(TokenType.TABLE): 4995 return self._parse_as_command(start) 4996 4997 exists = self._parse_exists() 4998 only = self._match_text_seq("ONLY") 4999 this = self._parse_table(schema=True) 5000 5001 if self._next: 5002 self._advance() 5003 5004 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5005 if parser: 5006 actions = ensure_list(parser(self)) 5007 5008 if not self._curr: 5009 return self.expression( 5010 exp.AlterTable, 5011 this=this, 5012 exists=exists, 5013 actions=actions, 5014 only=only, 5015 ) 5016 5017 return self._parse_as_command(start) 5018 5019 def _parse_merge(self) -> exp.Merge: 5020 self._match(TokenType.INTO) 5021 target = self._parse_table() 5022 5023 if target and self._match(TokenType.ALIAS, advance=False): 5024 target.set("alias", self._parse_table_alias()) 5025 5026 self._match(TokenType.USING) 5027 using = self._parse_table() 5028 5029 self._match(TokenType.ON) 5030 on = self._parse_conjunction() 5031 5032 return self.expression( 5033 exp.Merge, 5034 this=target, 5035 using=using, 5036 on=on, 5037 expressions=self._parse_when_matched(), 5038 ) 5039 5040 def _parse_when_matched(self) -> t.List[exp.When]: 5041 whens = [] 5042 5043 while self._match(TokenType.WHEN): 5044 matched = not self._match(TokenType.NOT) 5045 self._match_text_seq("MATCHED") 5046 source = ( 5047 False 5048 if self._match_text_seq("BY", "TARGET") 5049 else self._match_text_seq("BY", "SOURCE") 5050 ) 5051 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5052 5053 self._match(TokenType.THEN) 5054 5055 if self._match(TokenType.INSERT): 5056 _this = self._parse_star() 5057 if _this: 5058 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5059 else: 5060 then = self.expression( 5061 exp.Insert, 5062 this=self._parse_value(), 5063 expression=self._match(TokenType.VALUES) and self._parse_value(), 5064 ) 5065 elif self._match(TokenType.UPDATE): 5066 expressions = self._parse_star() 5067 if expressions: 5068 then = self.expression(exp.Update, expressions=expressions) 5069 else: 5070 then = self.expression( 5071 exp.Update, 5072 expressions=self._match(TokenType.SET) 5073 and self._parse_csv(self._parse_equality), 5074 ) 5075 elif self._match(TokenType.DELETE): 5076 then = self.expression(exp.Var, this=self._prev.text) 5077 else: 5078 then = None 5079 5080 whens.append( 5081 self.expression( 5082 exp.When, 5083 matched=matched, 5084 source=source, 5085 condition=condition, 5086 then=then, 5087 ) 5088 ) 5089 return whens 5090 5091 def _parse_show(self) -> t.Optional[exp.Expression]: 5092 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5093 if parser: 5094 return parser(self) 5095 return self._parse_as_command(self._prev) 5096 5097 def _parse_set_item_assignment( 5098 self, kind: t.Optional[str] = None 5099 ) -> t.Optional[exp.Expression]: 5100 index = self._index 5101 5102 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5103 return self._parse_set_transaction(global_=kind == "GLOBAL") 5104 5105 left = self._parse_primary() or self._parse_id_var() 5106 assignment_delimiter = self._match_texts(("=", "TO")) 5107 5108 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5109 self._retreat(index) 5110 return None 5111 5112 right = self._parse_statement() or self._parse_id_var() 5113 this = self.expression(exp.EQ, this=left, expression=right) 5114 5115 return self.expression(exp.SetItem, this=this, kind=kind) 5116 5117 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5118 self._match_text_seq("TRANSACTION") 5119 characteristics = self._parse_csv( 5120 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5121 ) 5122 return self.expression( 5123 exp.SetItem, 5124 expressions=characteristics, 5125 kind="TRANSACTION", 5126 **{"global": global_}, # type: ignore 5127 ) 5128 5129 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5130 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5131 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5132 5133 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5134 index = self._index 5135 set_ = self.expression( 5136 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5137 ) 5138 5139 if self._curr: 5140 self._retreat(index) 5141 return self._parse_as_command(self._prev) 5142 5143 return set_ 5144 5145 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5146 for option in options: 5147 if self._match_text_seq(*option.split(" ")): 5148 return exp.var(option) 5149 return None 5150 5151 def _parse_as_command(self, start: Token) -> exp.Command: 5152 while self._curr: 5153 self._advance() 5154 text = self._find_sql(start, self._prev) 5155 size = len(start.text) 5156 return exp.Command(this=text[:size], expression=text[size:]) 5157 5158 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5159 settings = [] 5160 5161 self._match_l_paren() 5162 kind = self._parse_id_var() 5163 5164 if self._match(TokenType.L_PAREN): 5165 while True: 5166 key = self._parse_id_var() 5167 value = self._parse_primary() 5168 5169 if not key and value is None: 5170 break 5171 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5172 self._match(TokenType.R_PAREN) 5173 5174 self._match_r_paren() 5175 5176 return self.expression( 5177 exp.DictProperty, 5178 this=this, 5179 kind=kind.this if kind else None, 5180 settings=settings, 5181 ) 5182 5183 def _parse_dict_range(self, this: str) -> exp.DictRange: 5184 self._match_l_paren() 5185 has_min = self._match_text_seq("MIN") 5186 if has_min: 5187 min = self._parse_var() or self._parse_primary() 5188 self._match_text_seq("MAX") 5189 max = self._parse_var() or self._parse_primary() 5190 else: 5191 max = self._parse_var() or self._parse_primary() 5192 min = exp.Literal.number(0) 5193 self._match_r_paren() 5194 return self.expression(exp.DictRange, this=this, min=min, max=max) 5195 5196 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5197 index = self._index 5198 expression = self._parse_column() 5199 if not self._match(TokenType.IN): 5200 self._retreat(index - 1) 5201 return None 5202 iterator = self._parse_column() 5203 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5204 return self.expression( 5205 exp.Comprehension, 5206 this=this, 5207 expression=expression, 5208 iterator=iterator, 5209 condition=condition, 5210 ) 5211 5212 def _find_parser( 5213 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5214 ) -> t.Optional[t.Callable]: 5215 if not self._curr: 5216 return None 5217 5218 index = self._index 5219 this = [] 5220 while True: 5221 # The current token might be multiple words 5222 curr = self._curr.text.upper() 5223 key = curr.split(" ") 5224 this.append(curr) 5225 5226 self._advance() 5227 result, trie = in_trie(trie, key) 5228 if result == TrieResult.FAILED: 5229 break 5230 5231 if result == TrieResult.EXISTS: 5232 subparser = parsers[" ".join(this)] 5233 return subparser 5234 5235 self._retreat(index) 5236 return None 5237 5238 def _match(self, token_type, advance=True, expression=None): 5239 if not self._curr: 5240 return None 5241 5242 if self._curr.token_type == token_type: 5243 if advance: 5244 self._advance() 5245 self._add_comments(expression) 5246 return True 5247 5248 return None 5249 5250 def _match_set(self, types, advance=True): 5251 if not self._curr: 5252 return None 5253 5254 if self._curr.token_type in types: 5255 if advance: 5256 self._advance() 5257 return True 5258 5259 return None 5260 5261 def _match_pair(self, token_type_a, token_type_b, advance=True): 5262 if not self._curr or not self._next: 5263 return None 5264 5265 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5266 if advance: 5267 self._advance(2) 5268 return True 5269 5270 return None 5271 5272 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5273 if not self._match(TokenType.L_PAREN, expression=expression): 5274 self.raise_error("Expecting (") 5275 5276 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5277 if not self._match(TokenType.R_PAREN, expression=expression): 5278 self.raise_error("Expecting )") 5279 5280 def _match_texts(self, texts, advance=True): 5281 if self._curr and self._curr.text.upper() in texts: 5282 if advance: 5283 self._advance() 5284 return True 5285 return False 5286 5287 def _match_text_seq(self, *texts, advance=True): 5288 index = self._index 5289 for text in texts: 5290 if self._curr and self._curr.text.upper() == text: 5291 self._advance() 5292 else: 5293 self._retreat(index) 5294 return False 5295 5296 if not advance: 5297 self._retreat(index) 5298 5299 return True 5300 5301 @t.overload 5302 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5303 ... 5304 5305 @t.overload 5306 def _replace_columns_with_dots( 5307 self, this: t.Optional[exp.Expression] 5308 ) -> t.Optional[exp.Expression]: 5309 ... 5310 5311 def _replace_columns_with_dots(self, this): 5312 if isinstance(this, exp.Dot): 5313 exp.replace_children(this, self._replace_columns_with_dots) 5314 elif isinstance(this, exp.Column): 5315 exp.replace_children(this, self._replace_columns_with_dots) 5316 table = this.args.get("table") 5317 this = ( 5318 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5319 ) 5320 5321 return this 5322 5323 def _replace_lambda( 5324 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5325 ) -> t.Optional[exp.Expression]: 5326 if not node: 5327 return node 5328 5329 for column in node.find_all(exp.Column): 5330 if column.parts[0].name in lambda_variables: 5331 dot_or_id = column.to_dot() if column.table else column.this 5332 parent = column.parent 5333 5334 while isinstance(parent, exp.Dot): 5335 if not isinstance(parent.parent, exp.Dot): 5336 parent.replace(dot_or_id) 5337 break 5338 parent = parent.parent 5339 else: 5340 if column is node: 5341 node = dot_or_id 5342 else: 5343 column.replace(dot_or_id) 5344 return node 5345 5346 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5347 return [ 5348 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5349 for value in values 5350 if value 5351 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
947 def __init__( 948 self, 949 error_level: t.Optional[ErrorLevel] = None, 950 error_message_context: int = 100, 951 max_errors: int = 3, 952 ): 953 self.error_level = error_level or ErrorLevel.IMMEDIATE 954 self.error_message_context = error_message_context 955 self.max_errors = max_errors 956 self._tokenizer = self.TOKENIZER_CLASS() 957 self.reset()
969 def parse( 970 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 971 ) -> t.List[t.Optional[exp.Expression]]: 972 """ 973 Parses a list of tokens and returns a list of syntax trees, one tree 974 per parsed SQL statement. 975 976 Args: 977 raw_tokens: The list of tokens. 978 sql: The original SQL string, used to produce helpful debug messages. 979 980 Returns: 981 The list of the produced syntax trees. 982 """ 983 return self._parse( 984 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 985 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
987 def parse_into( 988 self, 989 expression_types: exp.IntoType, 990 raw_tokens: t.List[Token], 991 sql: t.Optional[str] = None, 992 ) -> t.List[t.Optional[exp.Expression]]: 993 """ 994 Parses a list of tokens into a given Expression type. If a collection of Expression 995 types is given instead, this method will try to parse the token list into each one 996 of them, stopping at the first for which the parsing succeeds. 997 998 Args: 999 expression_types: The expression type(s) to try and parse the token list into. 1000 raw_tokens: The list of tokens. 1001 sql: The original SQL string, used to produce helpful debug messages. 1002 1003 Returns: 1004 The target Expression. 1005 """ 1006 errors = [] 1007 for expression_type in ensure_list(expression_types): 1008 parser = self.EXPRESSION_PARSERS.get(expression_type) 1009 if not parser: 1010 raise TypeError(f"No parser registered for {expression_type}") 1011 1012 try: 1013 return self._parse(parser, raw_tokens, sql) 1014 except ParseError as e: 1015 e.errors[0]["into_expression"] = expression_type 1016 errors.append(e) 1017 1018 raise ParseError( 1019 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1020 errors=merge_errors(errors), 1021 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1058 def check_errors(self) -> None: 1059 """Logs or raises any found errors, depending on the chosen error level setting.""" 1060 if self.error_level == ErrorLevel.WARN: 1061 for error in self.errors: 1062 logger.error(str(error)) 1063 elif self.error_level == ErrorLevel.RAISE and self.errors: 1064 raise ParseError( 1065 concat_messages(self.errors, self.max_errors), 1066 errors=merge_errors(self.errors), 1067 )
Logs or raises any found errors, depending on the chosen error level setting.
1069 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1070 """ 1071 Appends an error in the list of recorded errors or raises it, depending on the chosen 1072 error level setting. 1073 """ 1074 token = token or self._curr or self._prev or Token.string("") 1075 start = token.start 1076 end = token.end + 1 1077 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1078 highlight = self.sql[start:end] 1079 end_context = self.sql[end : end + self.error_message_context] 1080 1081 error = ParseError.new( 1082 f"{message}. Line {token.line}, Col: {token.col}.\n" 1083 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1084 description=message, 1085 line=token.line, 1086 col=token.col, 1087 start_context=start_context, 1088 highlight=highlight, 1089 end_context=end_context, 1090 ) 1091 1092 if self.error_level == ErrorLevel.IMMEDIATE: 1093 raise error 1094 1095 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1097 def expression( 1098 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1099 ) -> E: 1100 """ 1101 Creates a new, validated Expression. 1102 1103 Args: 1104 exp_class: The expression class to instantiate. 1105 comments: An optional list of comments to attach to the expression. 1106 kwargs: The arguments to set for the expression along with their respective values. 1107 1108 Returns: 1109 The target expression. 1110 """ 1111 instance = exp_class(**kwargs) 1112 instance.add_comments(comments) if comments else self._add_comments(instance) 1113 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1120 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1121 """ 1122 Validates an Expression, making sure that all its mandatory arguments are set. 1123 1124 Args: 1125 expression: The expression to validate. 1126 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1127 1128 Returns: 1129 The validated expression. 1130 """ 1131 if self.error_level != ErrorLevel.IGNORE: 1132 for error_message in expression.error_messages(args): 1133 self.raise_error(error_message) 1134 1135 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.