should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,325 @@
#pragma once
#include "duckdb/common/arena_linked_list.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/common/types/string_type.hpp"
#include "duckdb/parser/parsed_expression.hpp"
namespace duckdb {
class PEGTransformer; // Forward declaration
enum class ParseResultType : uint8_t {
LIST,
OPTIONAL,
REPEAT,
CHOICE,
EXPRESSION,
IDENTIFIER,
KEYWORD,
OPERATOR,
STATEMENT,
EXTENSION,
NUMBER,
STRING,
INVALID
};
inline const char *ParseResultToString(ParseResultType type) {
switch (type) {
case ParseResultType::LIST:
return "LIST";
case ParseResultType::OPTIONAL:
return "OPTIONAL";
case ParseResultType::REPEAT:
return "REPEAT";
case ParseResultType::CHOICE:
return "CHOICE";
case ParseResultType::EXPRESSION:
return "EXPRESSION";
case ParseResultType::IDENTIFIER:
return "IDENTIFIER";
case ParseResultType::KEYWORD:
return "KEYWORD";
case ParseResultType::OPERATOR:
return "OPERATOR";
case ParseResultType::STATEMENT:
return "STATEMENT";
case ParseResultType::EXTENSION:
return "EXTENSION";
case ParseResultType::NUMBER:
return "NUMBER";
case ParseResultType::STRING:
return "STRING";
case ParseResultType::INVALID:
return "INVALID";
}
return "INVALID";
}
class ParseResult {
public:
explicit ParseResult(ParseResultType type) : type(type) {
}
virtual ~ParseResult() = default;
template <class TARGET>
TARGET &Cast() {
if (TARGET::TYPE != ParseResultType::INVALID && type != TARGET::TYPE) {
throw InternalException("Failed to cast parse result of type %s to type %s for rule %s",
ParseResultToString(TARGET::TYPE), ParseResultToString(type), name);
}
return reinterpret_cast<TARGET &>(*this);
}
ParseResultType type;
string name;
virtual void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const {
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
}
// The public entry point
std::string ToString() const {
std::stringstream ss;
std::unordered_set<const ParseResult *> visited;
// The root is always the "last" element at its level
ToStringInternal(ss, visited, "", true);
return ss.str();
}
};
struct IdentifierParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::IDENTIFIER;
string identifier;
explicit IdentifierParseResult(string identifier_p) : ParseResult(TYPE), identifier(std::move(identifier_p)) {
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << identifier << "\"\n";
}
};
struct KeywordParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::KEYWORD;
string keyword;
explicit KeywordParseResult(string keyword_p) : ParseResult(TYPE), keyword(std::move(keyword_p)) {
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << keyword << "\"\n";
}
};
struct ListParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::LIST;
public:
explicit ListParseResult(vector<optional_ptr<ParseResult>> results_p, string name_p)
: ParseResult(TYPE), children(std::move(results_p)) {
name = name_p;
}
vector<optional_ptr<ParseResult>> GetChildren() const {
return children;
}
optional_ptr<ParseResult> GetChild(idx_t index) {
if (index >= children.size()) {
throw InternalException("Child index out of bounds");
}
return children[index];
}
template <class T>
T &Child(idx_t index) {
auto child_ptr = GetChild(index);
return child_ptr->Cast<T>();
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ss << indent << (is_last ? "└─" : "├─");
if (visited.count(this)) {
ss << " List (" << name << ") [... already printed ...]\n";
return;
}
visited.insert(this);
ss << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
ss << " [" << children.size() << " children]\n";
std::string child_indent = indent + (is_last ? " " : "");
for (size_t i = 0; i < children.size(); ++i) {
if (children[i]) {
children[i]->ToStringInternal(ss, visited, child_indent, i == children.size() - 1);
} else {
ss << child_indent << (i == children.size() - 1 ? "└─" : "├─") << " [nullptr]\n";
}
}
}
private:
vector<optional_ptr<ParseResult>> children;
};
struct RepeatParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::REPEAT;
vector<optional_ptr<ParseResult>> children;
explicit RepeatParseResult(vector<optional_ptr<ParseResult>> results_p)
: ParseResult(TYPE), children(std::move(results_p)) {
}
template <class T>
T &Child(idx_t index) {
if (index >= children.size()) {
throw InternalException("Child index out of bounds");
}
return children[index]->Cast<T>();
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ss << indent << (is_last ? "└─" : "├─");
if (visited.count(this)) {
ss << " Repeat (" << name << ") [... already printed ...]\n";
return;
}
visited.insert(this);
ss << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
ss << " [" << children.size() << " children]\n";
std::string child_indent = indent + (is_last ? " " : "");
for (size_t i = 0; i < children.size(); ++i) {
if (children[i]) {
children[i]->ToStringInternal(ss, visited, child_indent, i == children.size() - 1);
} else {
ss << child_indent << (i == children.size() - 1 ? "└─" : "├─") << " [nullptr]\n";
}
}
}
};
struct OptionalParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::OPTIONAL;
optional_ptr<ParseResult> optional_result;
explicit OptionalParseResult() : ParseResult(TYPE), optional_result(nullptr) {
}
explicit OptionalParseResult(optional_ptr<ParseResult> result_p) : ParseResult(TYPE), optional_result(result_p) {
name = result_p->name;
}
bool HasResult() const {
return optional_result != nullptr;
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
if (HasResult()) {
// The optional node has a value, so we "collapse" it by just printing its child.
// We pass the same indentation and is_last status, so it takes the place of the Optional node.
optional_result->ToStringInternal(ss, visited, indent, is_last);
} else {
// The optional node is empty, which is useful information, so we print it.
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type) << " [empty]\n";
}
}
};
class ChoiceParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::CHOICE;
explicit ChoiceParseResult(optional_ptr<ParseResult> parse_result_p, idx_t selected_idx_p)
: ParseResult(TYPE), result(parse_result_p), selected_idx(selected_idx_p) {
name = parse_result_p->name;
}
optional_ptr<ParseResult> result;
idx_t selected_idx;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
if (result) {
// The choice was resolved. We print a marker and then print the child below it.
ss << indent << (is_last ? "└─" : "├─") << " [" << ParseResultToString(type) << " (idx: " << selected_idx
<< ")] ->\n";
// The child is now on a new indentation level and is the only child of our marker.
std::string child_indent = indent + (is_last ? " " : "");
result->ToStringInternal(ss, visited, child_indent, true);
} else {
// The choice had no result.
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type) << " [no result]\n";
}
}
};
class NumberParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::NUMBER;
explicit NumberParseResult(string number_p) : ParseResult(TYPE), number(std::move(number_p)) {
}
string number;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": " << number << "\n";
}
};
class StringLiteralParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::STRING;
explicit StringLiteralParseResult(string string_p) : ParseResult(TYPE), result(std::move(string_p)) {
}
string result;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << result << "\"\n";
}
};
class OperatorParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::OPERATOR;
explicit OperatorParseResult(string operator_p) : ParseResult(TYPE), operator_token(std::move(operator_p)) {
}
string operator_token;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": " << operator_token << "\n";
}
};
} // namespace duckdb

View File

@@ -0,0 +1,208 @@
#pragma once
#include "tokenizer.hpp"
#include "parse_result.hpp"
#include "transform_enum_result.hpp"
#include "transform_result.hpp"
#include "ast/setting_info.hpp"
#include "duckdb/function/macro_function.hpp"
#include "duckdb/parser/expression/case_expression.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
#include "duckdb/parser/expression/parameter_expression.hpp"
#include "duckdb/parser/expression/window_expression.hpp"
#include "duckdb/parser/parsed_data/create_type_info.hpp"
#include "duckdb/parser/parsed_data/transaction_info.hpp"
#include "duckdb/parser/statement/copy_database_statement.hpp"
#include "duckdb/parser/statement/set_statement.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/tableref/basetableref.hpp"
#include "parser/peg_parser.hpp"
#include "duckdb/storage/arena_allocator.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/statement/drop_statement.hpp"
#include "duckdb/parser/statement/insert_statement.hpp"
namespace duckdb {
// Forward declare
struct QualifiedName;
struct MatcherToken;
struct PEGTransformerState {
explicit PEGTransformerState(const vector<MatcherToken> &tokens_p) : tokens(tokens_p), token_index(0) {
}
const vector<MatcherToken> &tokens;
idx_t token_index;
};
class PEGTransformer {
public:
using AnyTransformFunction =
std::function<unique_ptr<TransformResultValue>(PEGTransformer &, optional_ptr<ParseResult>)>;
PEGTransformer(ArenaAllocator &allocator, PEGTransformerState &state,
const case_insensitive_map_t<AnyTransformFunction> &transform_functions,
const case_insensitive_map_t<PEGRule> &grammar_rules,
const case_insensitive_map_t<unique_ptr<TransformEnumValue>> &enum_mappings)
: allocator(allocator), state(state), grammar_rules(grammar_rules), transform_functions(transform_functions),
enum_mappings(enum_mappings) {
}
public:
template <typename T>
T Transform(optional_ptr<ParseResult> parse_result) {
auto it = transform_functions.find(parse_result->name);
if (it == transform_functions.end()) {
throw NotImplementedException("No transformer function found for rule '%s'", parse_result->name);
}
auto &func = it->second;
unique_ptr<TransformResultValue> base_result = func(*this, parse_result);
if (!base_result) {
throw InternalException("Transformer for rule '%s' returned a nullptr.", parse_result->name);
}
auto *typed_result_ptr = dynamic_cast<TypedTransformResult<T> *>(base_result.get());
if (!typed_result_ptr) {
throw InternalException("Transformer for rule '" + parse_result->name + "' returned an unexpected type.");
}
return std::move(typed_result_ptr->value);
}
template <typename T>
T Transform(ListParseResult &parse_result, idx_t child_index) {
auto child_parse_result = parse_result.GetChild(child_index);
return Transform<T>(child_parse_result);
}
template <typename T>
T TransformEnum(optional_ptr<ParseResult> parse_result) {
auto enum_rule_name = parse_result->name;
auto rule_value = enum_mappings.find(enum_rule_name);
if (rule_value == enum_mappings.end()) {
throw ParserException("Enum transform failed: could not find mapping for '%s'", enum_rule_name);
}
auto *typed_enum_ptr = dynamic_cast<TypedTransformEnumResult<T> *>(rule_value->second.get());
if (!typed_enum_ptr) {
throw InternalException("Enum mapping for rule '%s' has an unexpected type.", enum_rule_name);
}
return typed_enum_ptr->value;
}
template <typename T>
void TransformOptional(ListParseResult &list_pr, idx_t child_idx, T &target) {
auto &opt = list_pr.Child<OptionalParseResult>(child_idx);
if (opt.HasResult()) {
target = Transform<T>(opt.optional_result);
}
}
// Make overloads return raw pointers, as ownership is handled by the ArenaAllocator.
template <class T, typename... Args>
T *Make(Args &&...args) {
return allocator.Make<T>(std::forward<Args>(args)...);
}
void ClearParameters();
static void ParamTypeCheck(PreparedParamType last_type, PreparedParamType new_type);
void SetParam(const string &name, idx_t index, PreparedParamType type);
bool GetParam(const string &name, idx_t &index, PreparedParamType type);
public:
ArenaAllocator &allocator;
PEGTransformerState &state;
const case_insensitive_map_t<PEGRule> &grammar_rules;
const case_insensitive_map_t<AnyTransformFunction> &transform_functions;
const case_insensitive_map_t<unique_ptr<TransformEnumValue>> &enum_mappings;
case_insensitive_map_t<idx_t> named_parameter_map;
idx_t prepared_statement_parameter_index = 0;
PreparedParamType last_param_type = PreparedParamType::INVALID;
};
class PEGTransformerFactory {
public:
static PEGTransformerFactory &GetInstance();
explicit PEGTransformerFactory();
static unique_ptr<SQLStatement> Transform(vector<MatcherToken> &tokens, const char *root_rule = "Statement");
private:
template <typename T>
void RegisterEnum(const string &rule_name, T value) {
auto existing_rule = enum_mappings.find(rule_name);
if (existing_rule != enum_mappings.end()) {
throw InternalException("EnumRule %s already exists", rule_name);
}
enum_mappings[rule_name] = make_uniq<TypedTransformEnumResult<T>>(value);
}
template <class FUNC>
void Register(const string &rule_name, FUNC function) {
auto existing_rule = sql_transform_functions.find(rule_name);
if (existing_rule != sql_transform_functions.end()) {
throw InternalException("Rule %s already exists", rule_name);
}
sql_transform_functions[rule_name] =
[function](PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) -> unique_ptr<TransformResultValue> {
auto result_value = function(transformer, parse_result);
return make_uniq<TypedTransformResult<decltype(result_value)>>(std::move(result_value));
};
}
PEGTransformerFactory(const PEGTransformerFactory &) = delete;
static unique_ptr<SQLStatement> TransformStatement(PEGTransformer &, optional_ptr<ParseResult> list);
// common.gram
static unique_ptr<ParsedExpression> TransformNumberLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static string TransformStringLiteral(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
// expression.gram
static unique_ptr<ParsedExpression> TransformBaseExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformConstantLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformLiteralExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformSingleExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
// use.gram
static unique_ptr<SQLStatement> TransformUseStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static QualifiedName TransformUseTarget(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
// set.gram
static unique_ptr<SQLStatement> TransformResetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static vector<unique_ptr<ParsedExpression>> TransformSetAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static SettingInfo TransformSetSetting(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
static unique_ptr<SQLStatement> TransformSetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<SQLStatement> TransformSetTimeZone(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static SettingInfo TransformSetVariable(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
static unique_ptr<SetVariableStatement> TransformStandardAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static vector<unique_ptr<ParsedExpression>> TransformVariableList(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
//! Helper functions
static vector<optional_ptr<ParseResult>> ExtractParseResultsFromList(optional_ptr<ParseResult> parse_result);
private:
PEGParser parser;
case_insensitive_map_t<PEGTransformer::AnyTransformFunction> sql_transform_functions;
case_insensitive_map_t<unique_ptr<TransformEnumValue>> enum_mappings;
};
} // namespace duckdb

View File

@@ -0,0 +1,15 @@
#pragma once
namespace duckdb {
struct TransformEnumValue {
virtual ~TransformEnumValue() = default;
};
template <class T>
struct TypedTransformEnumResult : public TransformEnumValue {
explicit TypedTransformEnumResult(T value_p) : value(std::move(value_p)) {
}
T value;
};
} // namespace duckdb

View File

@@ -0,0 +1,16 @@
#pragma once
namespace duckdb {
struct TransformResultValue {
virtual ~TransformResultValue() = default;
};
template <class T>
struct TypedTransformResult : public TransformResultValue {
explicit TypedTransformResult(T value_p) : value(std::move(value_p)) {
}
T value;
};
} // namespace duckdb