should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,12 @@
add_library_unity(
duckdb_peg_transformer
OBJECT
peg_transformer.cpp
peg_transformer_factory.cpp
transform_common.cpp
transform_expression.cpp
transform_set.cpp
transform_use.cpp)
set(AUTOCOMPLETE_EXTENSION_FILES
${AUTOCOMPLETE_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_peg_transformer>
PARENT_SCOPE)

View File

@@ -0,0 +1,47 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/statement/set_statement.hpp"
#include "duckdb/common/string_util.hpp"
namespace duckdb {
void PEGTransformer::ParamTypeCheck(PreparedParamType last_type, PreparedParamType new_type) {
// Mixing positional/auto-increment and named parameters is not supported
if (last_type == PreparedParamType::INVALID) {
return;
}
if (last_type == PreparedParamType::NAMED) {
if (new_type != PreparedParamType::NAMED) {
throw NotImplementedException("Mixing named and positional parameters is not supported yet");
}
}
if (last_type != PreparedParamType::NAMED) {
if (new_type == PreparedParamType::NAMED) {
throw NotImplementedException("Mixing named and positional parameters is not supported yet");
}
}
}
bool PEGTransformer::GetParam(const string &identifier, idx_t &index, PreparedParamType type) {
ParamTypeCheck(last_param_type, type);
auto entry = named_parameter_map.find(identifier);
if (entry == named_parameter_map.end()) {
return false;
}
index = entry->second;
return true;
}
void PEGTransformer::SetParam(const string &identifier, idx_t index, PreparedParamType type) {
ParamTypeCheck(last_param_type, type);
last_param_type = type;
D_ASSERT(!named_parameter_map.count(identifier));
named_parameter_map[identifier] = index;
}
void PEGTransformer::ClearParameters() {
prepared_statement_parameter_index = 0;
named_parameter_map.clear();
}
} // namespace duckdb

View File

@@ -0,0 +1,116 @@
#include "transformer/peg_transformer.hpp"
#include "matcher.hpp"
#include "duckdb/common/to_string.hpp"
#include "duckdb/parser/sql_statement.hpp"
#include "duckdb/parser/tableref/showref.hpp"
namespace duckdb {
unique_ptr<SQLStatement> PEGTransformerFactory::TransformStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &choice_pr = list_pr.Child<ChoiceParseResult>(0);
return transformer.Transform<unique_ptr<SQLStatement>>(choice_pr.result);
}
unique_ptr<SQLStatement> PEGTransformerFactory::Transform(vector<MatcherToken> &tokens, const char *root_rule) {
string token_stream;
for (auto &token : tokens) {
token_stream += token.text + " ";
}
vector<MatcherSuggestion> suggestions;
ParseResultAllocator parse_result_allocator;
MatchState state(tokens, suggestions, parse_result_allocator);
MatcherAllocator allocator;
auto &matcher = Matcher::RootMatcher(allocator);
auto match_result = matcher.MatchParseResult(state);
if (match_result == nullptr || state.token_index < state.tokens.size()) {
// TODO(dtenwolde) add error handling
string token_list;
for (idx_t i = 0; i < tokens.size(); i++) {
if (!token_list.empty()) {
token_list += "\n";
}
if (i < 10) {
token_list += " ";
}
token_list += to_string(i) + ":" + tokens[i].text;
}
throw ParserException("Failed to parse query - did not consume all tokens (got to token %d - %s)\nTokens:\n%s",
state.token_index, tokens[state.token_index].text, token_list);
}
match_result->name = root_rule;
ArenaAllocator transformer_allocator(Allocator::DefaultAllocator());
PEGTransformerState transformer_state(tokens);
auto &factory = GetInstance();
PEGTransformer transformer(transformer_allocator, transformer_state, factory.sql_transform_functions,
factory.parser.rules, factory.enum_mappings);
auto result = transformer.Transform<unique_ptr<SQLStatement>>(match_result);
return transformer.Transform<unique_ptr<SQLStatement>>(match_result);
}
#define REGISTER_TRANSFORM(FUNCTION) Register(string(#FUNCTION).substr(9), &FUNCTION)
PEGTransformerFactory &PEGTransformerFactory::GetInstance() {
static PEGTransformerFactory instance;
return instance;
}
PEGTransformerFactory::PEGTransformerFactory() {
REGISTER_TRANSFORM(TransformStatement);
// common.gram
REGISTER_TRANSFORM(TransformNumberLiteral);
REGISTER_TRANSFORM(TransformStringLiteral);
// expression.gram
REGISTER_TRANSFORM(TransformBaseExpression);
REGISTER_TRANSFORM(TransformExpression);
REGISTER_TRANSFORM(TransformLiteralExpression);
REGISTER_TRANSFORM(TransformSingleExpression);
REGISTER_TRANSFORM(TransformConstantLiteral);
// use.gram
REGISTER_TRANSFORM(TransformUseStatement);
REGISTER_TRANSFORM(TransformUseTarget);
// set.gram
REGISTER_TRANSFORM(TransformResetStatement);
REGISTER_TRANSFORM(TransformSetAssignment);
REGISTER_TRANSFORM(TransformSetSetting);
REGISTER_TRANSFORM(TransformSetStatement);
REGISTER_TRANSFORM(TransformSetTimeZone);
REGISTER_TRANSFORM(TransformSetVariable);
REGISTER_TRANSFORM(TransformStandardAssignment);
REGISTER_TRANSFORM(TransformVariableList);
RegisterEnum<SetScope>("LocalScope", SetScope::LOCAL);
RegisterEnum<SetScope>("GlobalScope", SetScope::GLOBAL);
RegisterEnum<SetScope>("SessionScope", SetScope::SESSION);
RegisterEnum<SetScope>("VariableScope", SetScope::VARIABLE);
RegisterEnum<Value>("FalseLiteral", Value(false));
RegisterEnum<Value>("TrueLiteral", Value(true));
RegisterEnum<Value>("NullLiteral", Value());
}
vector<optional_ptr<ParseResult>>
PEGTransformerFactory::ExtractParseResultsFromList(optional_ptr<ParseResult> parse_result) {
// List(D) <- D (',' D)* ','?
vector<optional_ptr<ParseResult>> result;
auto &list_pr = parse_result->Cast<ListParseResult>();
result.push_back(list_pr.GetChild(0));
auto opt_child = list_pr.Child<OptionalParseResult>(1);
if (opt_child.HasResult()) {
auto repeat_result = opt_child.optional_result->Cast<RepeatParseResult>();
for (auto &child : repeat_result.children) {
auto &list_child = child->Cast<ListParseResult>();
result.push_back(list_child.GetChild(1));
}
}
return result;
}
} // namespace duckdb

View File

@@ -0,0 +1,82 @@
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/types/decimal.hpp"
#include "transformer/peg_transformer.hpp"
namespace duckdb {
// NumberLiteral <- < [+-]?[0-9]*([.][0-9]*)? >
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformNumberLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto literal_pr = parse_result->Cast<NumberParseResult>();
string_t str_val(literal_pr.number);
bool try_cast_as_integer = true;
bool try_cast_as_decimal = true;
optional_idx decimal_position = optional_idx::Invalid();
idx_t num_underscores = 0;
idx_t num_integer_underscores = 0;
for (idx_t i = 0; i < str_val.GetSize(); i++) {
if (literal_pr.number[i] == '.') {
// decimal point: cast as either decimal or double
try_cast_as_integer = false;
decimal_position = i;
}
if (literal_pr.number[i] == 'e' || literal_pr.number[i] == 'E') {
// found exponent, cast as double
try_cast_as_integer = false;
try_cast_as_decimal = false;
}
if (literal_pr.number[i] == '_') {
num_underscores++;
if (!decimal_position.IsValid()) {
num_integer_underscores++;
}
}
}
if (try_cast_as_integer) {
int64_t bigint_value;
// try to cast as bigint first
if (TryCast::Operation<string_t, int64_t>(str_val, bigint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::BIGINT(bigint_value));
}
hugeint_t hugeint_value;
// if that is not successful; try to cast as hugeint
if (TryCast::Operation<string_t, hugeint_t>(str_val, hugeint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::HUGEINT(hugeint_value));
}
uhugeint_t uhugeint_value;
// if that is not successful; try to cast as uhugeint
if (TryCast::Operation<string_t, uhugeint_t>(str_val, uhugeint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::UHUGEINT(uhugeint_value));
}
}
idx_t decimal_offset = literal_pr.number[0] == '-' ? 3 : 2;
if (try_cast_as_decimal && decimal_position.IsValid() &&
str_val.GetSize() - num_underscores < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
// figure out the width/scale based on the decimal position
auto width = NumericCast<uint8_t>(str_val.GetSize() - 1 - num_underscores);
auto scale = NumericCast<uint8_t>(width - decimal_position.GetIndex() + num_integer_underscores);
if (literal_pr.number[0] == '-') {
width--;
}
if (width <= Decimal::MAX_WIDTH_DECIMAL) {
// we can cast the value as a decimal
Value val = Value(str_val);
val = val.DefaultCastAs(LogicalType::DECIMAL(width, scale));
return make_uniq<ConstantExpression>(std::move(val));
}
}
// if there is a decimal or the value is too big to cast as either hugeint or bigint
double dbl_value = Cast::Operation<string_t, double>(str_val);
return make_uniq<ConstantExpression>(Value::DOUBLE(dbl_value));
}
// StringLiteral <- '\'' [^\']* '\''
string PEGTransformerFactory::TransformStringLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &string_literal_pr = parse_result->Cast<StringLiteralParseResult>();
return string_literal_pr.result;
}
} // namespace duckdb

View File

@@ -0,0 +1,118 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/expression/comparison_expression.hpp"
#include "duckdb/parser/expression/between_expression.hpp"
#include "duckdb/parser/expression/operator_expression.hpp"
#include "duckdb/parser/expression/cast_expression.hpp"
namespace duckdb {
// BaseExpression <- SingleExpression Indirection*
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformBaseExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto expr = transformer.Transform<unique_ptr<ParsedExpression>>(list_pr.Child<ListParseResult>(0));
auto indirection_opt = list_pr.Child<OptionalParseResult>(1);
if (indirection_opt.HasResult()) {
auto indirection_repeat = indirection_opt.optional_result->Cast<RepeatParseResult>();
for (auto child : indirection_repeat.children) {
auto indirection_expr = transformer.Transform<unique_ptr<ParsedExpression>>(child);
if (indirection_expr->GetExpressionClass() == ExpressionClass::CAST) {
auto cast_expr = unique_ptr_cast<ParsedExpression, CastExpression>(std::move(indirection_expr));
cast_expr->child = std::move(expr);
expr = std::move(cast_expr);
} else if (indirection_expr->GetExpressionClass() == ExpressionClass::OPERATOR) {
auto operator_expr = unique_ptr_cast<ParsedExpression, OperatorExpression>(std::move(indirection_expr));
operator_expr->children.insert(operator_expr->children.begin(), std::move(expr));
expr = std::move(operator_expr);
} else if (indirection_expr->GetExpressionClass() == ExpressionClass::FUNCTION) {
auto function_expr = unique_ptr_cast<ParsedExpression, FunctionExpression>(std::move(indirection_expr));
function_expr->children.push_back(std::move(expr));
expr = std::move(function_expr);
}
}
}
return expr;
}
// Expression <- BaseExpression RecursiveExpression*
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &base_expr_pr = list_pr.Child<ListParseResult>(0);
unique_ptr<ParsedExpression> base_expr = transformer.Transform<unique_ptr<ParsedExpression>>(base_expr_pr);
auto &indirection_pr = list_pr.Child<OptionalParseResult>(1);
if (indirection_pr.HasResult()) {
auto repeat_expression_pr = indirection_pr.optional_result->Cast<RepeatParseResult>();
vector<unique_ptr<ParsedExpression>> expr_children;
for (auto &child : repeat_expression_pr.children) {
auto expr = transformer.Transform<unique_ptr<ParsedExpression>>(child);
if (expr->expression_class == ExpressionClass::COMPARISON) {
auto compare_expr = unique_ptr_cast<ParsedExpression, ComparisonExpression>(std::move(expr));
compare_expr->left = std::move(base_expr);
base_expr = std::move(compare_expr);
} else if (expr->expression_class == ExpressionClass::FUNCTION) {
auto func_expr = unique_ptr_cast<ParsedExpression, FunctionExpression>(std::move(expr));
func_expr->children.insert(func_expr->children.begin(), std::move(base_expr));
base_expr = std::move(func_expr);
} else if (expr->expression_class == ExpressionClass::LAMBDA) {
auto lambda_expr = unique_ptr_cast<ParsedExpression, LambdaExpression>(std::move(expr));
lambda_expr->lhs = std::move(base_expr);
base_expr = std::move(lambda_expr);
} else if (expr->expression_class == ExpressionClass::BETWEEN) {
auto between_expr = unique_ptr_cast<ParsedExpression, BetweenExpression>(std::move(expr));
between_expr->input = std::move(base_expr);
base_expr = std::move(between_expr);
} else {
base_expr = make_uniq<OperatorExpression>(expr->type, std::move(base_expr), std::move(expr));
}
}
}
return base_expr;
}
// LiteralExpression <- StringLiteral / NumberLiteral / 'NULL' / 'TRUE' / 'FALSE'
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformLiteralExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &choice_result = parse_result->Cast<ListParseResult>();
auto &matched_rule_result = choice_result.Child<ChoiceParseResult>(0);
if (matched_rule_result.name == "StringLiteral") {
return make_uniq<ConstantExpression>(Value(transformer.Transform<string>(matched_rule_result.result)));
}
return transformer.Transform<unique_ptr<ParsedExpression>>(matched_rule_result.result);
}
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformConstantLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return make_uniq<ConstantExpression>(transformer.TransformEnum<Value>(list_pr.Child<ChoiceParseResult>(0).result));
}
// SingleExpression <- LiteralExpression /
// Parameter /
// SubqueryExpression /
// SpecialFunctionExpression /
// ParenthesisExpression /
// IntervalLiteral /
// TypeLiteral /
// CaseExpression /
// StarExpression /
// CastExpression /
// GroupingExpression /
// MapExpression /
// FunctionExpression /
// ColumnReference /
// PrefixExpression /
// ListComprehensionExpression /
// ListExpression /
// StructExpression /
// PositionalExpression /
// DefaultExpression
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformSingleExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return transformer.Transform<unique_ptr<ParsedExpression>>(list_pr.Child<ChoiceParseResult>(0).result);
}
} // namespace duckdb

View File

@@ -0,0 +1,93 @@
#include "transformer/peg_transformer.hpp"
namespace duckdb {
// ResetStatement <- 'RESET' (SetVariable / SetSetting)
unique_ptr<SQLStatement> PEGTransformerFactory::TransformResetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &child_pr = list_pr.Child<ListParseResult>(1);
auto &choice_pr = child_pr.Child<ChoiceParseResult>(0);
SettingInfo setting_info = transformer.Transform<SettingInfo>(choice_pr.result);
return make_uniq<ResetVariableStatement>(setting_info.name, setting_info.scope);
}
// SetAssignment <- VariableAssign VariableList
vector<unique_ptr<ParsedExpression>>
PEGTransformerFactory::TransformSetAssignment(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return transformer.Transform<vector<unique_ptr<ParsedExpression>>>(list_pr, 1);
}
// SetSetting <- SettingScope? SettingName
SettingInfo PEGTransformerFactory::TransformSetSetting(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &optional_scope_pr = list_pr.Child<OptionalParseResult>(0);
SettingInfo result;
result.name = list_pr.Child<IdentifierParseResult>(1).identifier;
if (optional_scope_pr.optional_result) {
auto setting_scope = optional_scope_pr.optional_result->Cast<ListParseResult>();
auto scope_value = setting_scope.Child<ChoiceParseResult>(0);
result.scope = transformer.TransformEnum<SetScope>(scope_value);
}
return result;
}
// SetStatement <- 'SET' (StandardAssignment / SetTimeZone)
unique_ptr<SQLStatement> PEGTransformerFactory::TransformSetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &child_pr = list_pr.Child<ListParseResult>(1);
auto &assignment_or_timezone = child_pr.Child<ChoiceParseResult>(0);
return transformer.Transform<unique_ptr<SetVariableStatement>>(assignment_or_timezone);
}
// SetTimeZone <- 'TIME' 'ZONE' Expression
unique_ptr<SQLStatement> PEGTransformerFactory::TransformSetTimeZone(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
throw NotImplementedException("Rule 'SetTimeZone' has not been implemented yet");
}
// SetVariable <- VariableScope Identifier
SettingInfo PEGTransformerFactory::TransformSetVariable(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
SettingInfo result;
result.scope = transformer.TransformEnum<SetScope>(list_pr.Child<ListParseResult>(0));
result.name = list_pr.Child<IdentifierParseResult>(1).identifier;
return result;
}
// StandardAssignment <- (SetVariable / SetSetting) SetAssignment
unique_ptr<SetVariableStatement>
PEGTransformerFactory::TransformStandardAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &choice_pr = parse_result->Cast<ChoiceParseResult>();
auto &list_pr = choice_pr.result->Cast<ListParseResult>();
auto &first_sub_rule = list_pr.Child<ListParseResult>(0);
auto &setting_or_var_pr = first_sub_rule.Child<ChoiceParseResult>(0);
SettingInfo setting_info = transformer.Transform<SettingInfo>(setting_or_var_pr.result);
auto &set_assignment_pr = list_pr.Child<ListParseResult>(1);
auto value = transformer.Transform<vector<unique_ptr<ParsedExpression>>>(set_assignment_pr);
// TODO(dtenwolde) Needs to throw error if more than 1 value (e.g. set threads=1,2;)
return make_uniq<SetVariableStatement>(setting_info.name, std::move(value[0]), setting_info.scope);
}
// VariableList <- List(Expression)
vector<unique_ptr<ParsedExpression>>
PEGTransformerFactory::TransformVariableList(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto expr_list = ExtractParseResultsFromList(list_pr.Child<ListParseResult>(0));
vector<unique_ptr<ParsedExpression>> expressions;
for (auto &expr : expr_list) {
expressions.push_back(transformer.Transform<unique_ptr<ParsedExpression>>(expr));
}
return expressions;
}
} // namespace duckdb

View File

@@ -0,0 +1,51 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/sql_statement.hpp"
namespace duckdb {
// UseStatement <- 'USE' UseTarget
unique_ptr<SQLStatement> PEGTransformerFactory::TransformUseStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto qn = transformer.Transform<QualifiedName>(list_pr, 1);
string value_str;
if (IsInvalidSchema(qn.schema)) {
value_str = qn.name;
} else {
value_str = qn.schema + "." + qn.name;
}
auto value_expr = make_uniq<ConstantExpression>(Value(value_str));
return make_uniq<SetVariableStatement>("schema", std::move(value_expr), SetScope::AUTOMATIC);
}
// UseTarget <- (CatalogName '.' ReservedSchemaName) / SchemaName / CatalogName
QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &choice_pr = list_pr.Child<ChoiceParseResult>(0);
QualifiedName result;
if (choice_pr.result->type == ParseResultType::LIST) {
vector<string> entries;
auto use_target_children = choice_pr.result->Cast<ListParseResult>();
for (auto &child : use_target_children.GetChildren()) {
if (child->type == ParseResultType::IDENTIFIER) {
entries.push_back(child->Cast<IdentifierParseResult>().identifier);
}
}
if (entries.size() == 2) {
result.catalog = INVALID_CATALOG;
result.schema = entries[0];
result.name = entries[1];
} else {
throw InternalException("Invalid amount of entries for use statement");
}
} else if (choice_pr.result->type == ParseResultType::IDENTIFIER) {
result.name = choice_pr.result->Cast<IdentifierParseResult>().identifier;
} else {
throw InternalException("Unexpected parse result type encountered in UseTarget");
}
return result;
}
} // namespace duckdb