Files
email-tracker/external/duckdb/third_party/libpg_query/grammar/grammar.y
2025-10-24 19:21:19 -05:00

224 lines
7.3 KiB
Plaintext

%{
{{{ GRAMMAR_HEADER }}}
%}
#line 5 "third_party/libpg_query/grammar/grammar.y"
%pure-parser
%expect 0
%name-prefix="base_yy"
%locations
%parse-param {core_yyscan_t yyscanner}
%lex-param {core_yyscan_t yyscanner}
%union
{
core_YYSTYPE core_yystype;
/* these fields must match core_YYSTYPE: */
int ival;
char *str;
const char *keyword;
const char *conststr;
char chr;
bool boolean;
PGJoinType jtype;
PGDropBehavior dbehavior;
PGOnCommitAction oncommit;
PGOnCreateConflict oncreateconflict;
PGList *list;
PGNode *node;
PGValue *value;
PGObjectType objtype;
PGTypeName *typnam;
PGObjectWithArgs *objwithargs;
PGDefElem *defelt;
PGSortBy *sortby;
PGWindowDef *windef;
PGJoinExpr *jexpr;
PGIndexElem *ielem;
PGAlias *alias;
PGRangeVar *range;
PGIntoClause *into;
PGCTEMaterialize ctematerialize;
PGWithClause *with;
PGInferClause *infer;
PGOnConflictClause *onconflict;
PGOnConflictActionAlias onconflictshorthand;
PGAIndices *aind;
PGResTarget *target;
PGInsertStmt *istmt;
PGVariableSetStmt *vsetstmt;
PGOverridingKind override;
PGSortByDir sortorder;
PGSortByNulls nullorder;
PGIgnoreNulls ignorenulls;
PGConstrType constr;
PGLockClauseStrength lockstrength;
PGLockWaitPolicy lockwaitpolicy;
PGSubLinkType subquerytype;
PGViewCheckOption viewcheckoption;
PGInsertColumnOrder bynameorposition;
PGLoadInstallType loadinstalltype;
PGTransactionStmtType transactiontype;
PGMergeAction mergeaction;
}
%type <node> stmt
%type <list> stmtblock
%type <list> stmtmulti
{{{ TYPES }}}
/*
* Non-keyword token types. These are hard-wired into the "flex" lexer.
* They must be listed first so that their numeric codes do not depend on
* the set of keywords. PL/pgSQL depends on this so that it can share the
* same lexer. If you add/change tokens here, fix PL/pgSQL to match!
*
* DOT_DOT is unused in the core SQL grammar, and so will always provoke
* parse errors. It is needed by PL/pgSQL.
*/
%token <str> IDENT FCONST SCONST BCONST XCONST Op
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER INTEGER_DIVISION POWER_OF SINGLE_ARROW DOUBLE_ARROW SINGLE_COLON
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
/*
* If you want to make any keyword changes, update the keyword table in
* src/include/parser/kwlist.h and add new keywords to the appropriate one
* of the reserved-or-not-so-reserved keyword lists, below; search
* this file for "Keyword category lists".
*/
/* ordinary key words in alphabetical order */
{{{ KEYWORDS }}}
/*
* The grammar thinks these are keywords, but they are not in the kwlist.h
* list and so can never be entered directly. The filter in parser.c
* creates these tokens when required (based on looking one token ahead).
*
* NOT_LA exists so that productions such as NOT LIKE can be given the same
* precedence as LIKE; otherwise they'd effectively have the same precedence
* as NOT, at least with respect to their left-hand subexpression.
* NULLS_LA and WITH_LA are needed to make the grammar LALR(1).
*/
%token NOT_LA NULLS_LA WITH_LA
/* Precedence: lowest to highest */
%left SINGLE_COLON
%nonassoc SET /* see */
%left UNION EXCEPT
%left INTERSECT
%left SINGLE_ARROW DOUBLE_ARROW
%left OR
%left AND
%right NOT
%nonassoc IS ISNULL NOTNULL /* IS sets precedence for IS NULL, etc */
%nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
%nonassoc BETWEEN IN_P GLOB LIKE ILIKE SIMILAR NOT_LA
%nonassoc ESCAPE /* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
%left POSTFIXOP /* dummy for postfix Op rules */
/*
* To support target_el without AS, we must give IDENT an explicit priority
* between POSTFIXOP and Op. We can safely assign the same priority to
* various unreserved keywords as needed to resolve ambiguities (this can't
* have any bad effects since obviously the keywords will still behave the
* same as if they weren't keywords). We need to do this for PARTITION,
* RANGE, ROWS to support opt_existing_window_name; and for RANGE, ROWS
* so that they can follow a_expr without creating postfix-operator problems;
* for GENERATED so that it can follow b_expr;
* and for NULL so that it can follow b_expr in without creating
* postfix-operator problems.
*
* To support CUBE and ROLLUP in GROUP BY without reserving them, we give them
* an explicit priority lower than '(', so that a rule with CUBE '(' will shift
* rather than reducing a conflicting rule that takes CUBE as a function name.
* Using the same precedence as IDENT seems right for the reasons given above.
*
* The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING
* are even messier: since UNBOUNDED is an unreserved keyword (per spec!),
* there is no principled way to distinguish these from the productions
* a_expr PRECEDING/FOLLOWING. We hack this up by giving UNBOUNDED slightly
* lower precedence than PRECEDING and FOLLOWING. At present this doesn't
* appear to cause UNBOUNDED to be treated differently from other unreserved
* keywords anywhere else in the grammar, but it's definitely risky. We can
* blame any funny behavior of UNBOUNDED on the SQL standard, though.
*/
%nonassoc UNBOUNDED /* ideally should have same precedence as IDENT */
%nonassoc IDENT GENERATED NULL_P PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP ENUM_P
%left Op OPERATOR /* multi-character ops and user-defined operators */
%left '+' '-'
%left '*' '/' '%' INTEGER_DIVISION
%left '^' POWER_OF
/* Unary Operators */
%left AT /* sets precedence for AT TIME ZONE */
%left COLLATE
%right UMINUS
%left '[' ']'
%left '(' ')'
%left TYPECAST
%left '.'
/*
* These might seem to be low-precedence, but actually they are not part
* of the arithmetic hierarchy at all in their use as JOIN operators.
* We make them high-precedence to support their use as function names.
* They wouldn't be given a precedence at all, were it not that we need
* left-associativity among the JOIN rules themselves.
*/
%left JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL POSITIONAL PIVOT UNPIVOT ANTI SEMI ASOF
/* kluge to keep from causing shift/reduce conflicts */
%right PRESERVE STRIP_P IGNORE_P RESPECT_P
%%
/*
* The target production for the whole parse.
*/
stmtblock: stmtmulti
{
pg_yyget_extra(yyscanner)->parsetree = $1;
}
;
/*
* At top level, we wrap each stmt with a PGRawStmt node carrying start location
* and length of the stmt's text. Notice that the start loc/len are driven
* entirely from semicolon locations (@2). It would seem natural to use
* @1 or @3 to get the true start location of a stmt, but that doesn't work
* for statements that can start with empty nonterminals (opt_with_clause is
* the main offender here); as noted in the comments for YYLLOC_DEFAULT,
* we'd get -1 for the location in such cases.
* We also take care to discard empty statements entirely.
*/
stmtmulti: stmtmulti ';' stmt
{
if ($1 != NIL)
{
/* update length of previous stmt */
updateRawStmtEnd(llast_node(PGRawStmt, $1), @2);
}
if ($3 != NULL)
$$ = lappend($1, makeRawStmt($3, @2 + 1));
else
$$ = $1;
}
| stmt
{
if ($1 != NULL)
$$ = list_make1(makeRawStmt($1, 0));
else
$$ = NIL;
}
;
{{{ STATEMENTS }}}
{{{ GRAMMAR RULES }}}
{{{ KEYWORD_DEFINITIONS }}}
%%
{{{ GRAMMAR_SOURCE }}}