should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 2.8.12...3.29)
project(AutoCompleteExtension)
include_directories(include)
set(AUTOCOMPLETE_EXTENSION_FILES
autocomplete_extension.cpp matcher.cpp tokenizer.cpp keyword_helper.cpp
keyword_map.cpp)
add_subdirectory(transformer)
add_subdirectory(parser)
build_static_extension(autocomplete ${AUTOCOMPLETE_EXTENSION_FILES})
set(PARAMETERS "-warnings")
build_loadable_extension(autocomplete ${PARAMETERS}
${AUTOCOMPLETE_EXTENSION_FILES})
install(
TARGETS autocomplete_extension
EXPORT "${DUCKDB_EXPORT_SET}"
LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")

View File

@@ -0,0 +1,756 @@
#include "autocomplete_extension.hpp"
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
#include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/file_opener.hpp"
#include "duckdb/function/table_function.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/client_data.hpp"
#include "duckdb/main/extension/extension_loader.hpp"
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/keyword_helper.hpp"
#include "matcher.hpp"
#include "duckdb/catalog/default/builtin_types/types.hpp"
#include "duckdb/main/attached_database.hpp"
#include "tokenizer.hpp"
#include "duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp"
#include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp"
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
namespace duckdb {
struct SQLAutoCompleteFunctionData : public TableFunctionData {
explicit SQLAutoCompleteFunctionData(vector<AutoCompleteSuggestion> suggestions_p)
: suggestions(std::move(suggestions_p)) {
}
vector<AutoCompleteSuggestion> suggestions;
};
struct SQLAutoCompleteData : public GlobalTableFunctionState {
SQLAutoCompleteData() : offset(0) {
}
idx_t offset;
};
static vector<AutoCompleteSuggestion> ComputeSuggestions(vector<AutoCompleteCandidate> available_suggestions,
const string &prefix) {
vector<pair<string, idx_t>> scores;
scores.reserve(available_suggestions.size());
case_insensitive_map_t<idx_t> matches;
bool prefix_is_lower = StringUtil::IsLower(prefix);
bool prefix_is_upper = StringUtil::IsUpper(prefix);
auto lower_prefix = StringUtil::Lower(prefix);
for (idx_t i = 0; i < available_suggestions.size(); i++) {
auto &suggestion = available_suggestions[i];
const int32_t BASE_SCORE = 10;
const int32_t SUBSTRING_PENALTY = 10;
auto str = suggestion.candidate;
if (suggestion.extra_char != '\0') {
str += suggestion.extra_char;
}
auto bonus = suggestion.score_bonus;
if (matches.find(str) != matches.end()) {
// entry already exists
continue;
}
matches[str] = i;
D_ASSERT(BASE_SCORE - bonus >= 0);
auto score = idx_t(BASE_SCORE - bonus);
if (prefix.empty()) {
} else if (prefix.size() < str.size()) {
score += StringUtil::SimilarityScore(str.substr(0, prefix.size()), prefix);
} else {
score += StringUtil::SimilarityScore(str, prefix);
}
if (!StringUtil::Contains(StringUtil::Lower(str), lower_prefix)) {
score += SUBSTRING_PENALTY;
}
scores.emplace_back(str, score);
}
vector<AutoCompleteSuggestion> results;
auto top_strings = StringUtil::TopNStrings(scores, 20, 999);
for (auto &result : top_strings) {
auto entry = matches.find(result);
if (entry == matches.end()) {
throw InternalException("Auto-complete match not found");
}
auto &suggestion = available_suggestions[entry->second];
if (suggestion.extra_char != '\0') {
result.pop_back();
}
if (suggestion.candidate_type == CandidateType::KEYWORD) {
if (prefix_is_lower) {
result = StringUtil::Lower(result);
} else if (prefix_is_upper) {
result = StringUtil::Upper(result);
}
} else if (suggestion.candidate_type == CandidateType::IDENTIFIER) {
result = KeywordHelper::WriteOptionallyQuoted(result, '"');
}
if (suggestion.extra_char != '\0') {
result += suggestion.extra_char;
}
results.emplace_back(std::move(result), suggestion.suggestion_pos);
}
return results;
}
static vector<shared_ptr<AttachedDatabase>> GetAllCatalogs(ClientContext &context) {
vector<shared_ptr<AttachedDatabase>> result;
auto &database_manager = DatabaseManager::Get(context);
auto databases = database_manager.GetDatabases(context);
for (auto &database : databases) {
result.push_back(database);
}
return result;
}
static vector<reference<SchemaCatalogEntry>> GetAllSchemas(ClientContext &context) {
return Catalog::GetAllSchemas(context);
}
static vector<reference<CatalogEntry>> GetAllTables(ClientContext &context, bool for_table_names) {
vector<reference<CatalogEntry>> result;
// scan all the schemas for tables and collect them and collect them
// for column names we avoid adding internal entries, because it pollutes the auto-complete too much
// for table names this is generally fine, however
auto schemas = Catalog::GetAllSchemas(context);
for (auto &schema_ref : schemas) {
auto &schema = schema_ref.get();
schema.Scan(context, CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry) {
if (!entry.internal || for_table_names) {
result.push_back(entry);
}
});
};
if (for_table_names) {
for (auto &schema_ref : schemas) {
auto &schema = schema_ref.get();
schema.Scan(context, CatalogType::TABLE_FUNCTION_ENTRY,
[&](CatalogEntry &entry) { result.push_back(entry); });
};
} else {
for (auto &schema_ref : schemas) {
auto &schema = schema_ref.get();
schema.Scan(context, CatalogType::SCALAR_FUNCTION_ENTRY,
[&](CatalogEntry &entry) { result.push_back(entry); });
};
}
return result;
}
static vector<AutoCompleteCandidate> SuggestCatalogName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto all_entries = GetAllCatalogs(context);
for (auto &entry_ref : all_entries) {
auto &entry = *entry_ref;
AutoCompleteCandidate candidate(entry.name, 0);
candidate.extra_char = '.';
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestSchemaName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto all_entries = GetAllSchemas(context);
for (auto &entry_ref : all_entries) {
auto &entry = entry_ref.get();
AutoCompleteCandidate candidate(entry.name, 0);
candidate.extra_char = '.';
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestTableName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto all_entries = GetAllTables(context, true);
for (auto &entry_ref : all_entries) {
auto &entry = entry_ref.get();
// prioritize user-defined entries (views & tables)
int32_t bonus = (entry.internal || entry.type == CatalogType::TABLE_FUNCTION_ENTRY) ? 0 : 1;
suggestions.emplace_back(entry.name, bonus);
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestType(ClientContext &) {
vector<AutoCompleteCandidate> suggestions;
for (auto &type_entry : BUILTIN_TYPES) {
suggestions.emplace_back(type_entry.name, 0, CandidateType::KEYWORD);
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestColumnName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto all_entries = GetAllTables(context, false);
for (auto &entry_ref : all_entries) {
auto &entry = entry_ref.get();
if (entry.type == CatalogType::TABLE_ENTRY) {
auto &table = entry.Cast<TableCatalogEntry>();
int32_t bonus = entry.internal ? 0 : 3;
for (auto &col : table.GetColumns().Logical()) {
suggestions.emplace_back(col.GetName(), bonus);
}
} else if (entry.type == CatalogType::VIEW_ENTRY) {
auto &view = entry.Cast<ViewCatalogEntry>();
int32_t bonus = entry.internal ? 0 : 3;
for (auto &col : view.aliases) {
suggestions.emplace_back(col, bonus);
}
} else {
if (StringUtil::CharacterIsOperator(entry.name[0])) {
continue;
}
int32_t bonus = entry.internal ? 0 : 2;
suggestions.emplace_back(entry.name, bonus);
};
}
return suggestions;
}
static bool KnownExtension(const string &fname) {
vector<string> known_extensions {".parquet", ".csv", ".tsv", ".csv.gz", ".tsv.gz", ".tbl"};
for (auto &ext : known_extensions) {
if (StringUtil::EndsWith(fname, ext)) {
return true;
}
}
return false;
}
static vector<AutoCompleteCandidate> SuggestPragmaName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto all_pragmas = Catalog::GetAllEntries(context, CatalogType::PRAGMA_FUNCTION_ENTRY);
for (const auto &pragma : all_pragmas) {
AutoCompleteCandidate candidate(pragma.get().name, 0);
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestSettingName(ClientContext &context) {
auto &db_config = DBConfig::GetConfig(context);
const auto &options = db_config.GetOptions();
vector<AutoCompleteCandidate> suggestions;
for (const auto &option : options) {
AutoCompleteCandidate candidate(option.name, 0);
suggestions.push_back(std::move(candidate));
}
const auto &option_aliases = db_config.GetAliases();
for (const auto &option_alias : option_aliases) {
AutoCompleteCandidate candidate(option_alias.alias, 0);
suggestions.push_back(std::move(candidate));
}
for (auto &entry : db_config.extension_parameters) {
AutoCompleteCandidate candidate(entry.first, 0);
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestScalarFunctionName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto scalar_functions = Catalog::GetAllEntries(context, CatalogType::SCALAR_FUNCTION_ENTRY);
for (const auto &scalar_function : scalar_functions) {
AutoCompleteCandidate candidate(scalar_function.get().name, 0);
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestTableFunctionName(ClientContext &context) {
vector<AutoCompleteCandidate> suggestions;
auto table_functions = Catalog::GetAllEntries(context, CatalogType::TABLE_FUNCTION_ENTRY);
for (const auto &table_function : table_functions) {
AutoCompleteCandidate candidate(table_function.get().name, 0);
suggestions.push_back(std::move(candidate));
}
return suggestions;
}
static vector<AutoCompleteCandidate> SuggestFileName(ClientContext &context, string &prefix, idx_t &last_pos) {
vector<AutoCompleteCandidate> result;
auto &config = DBConfig::GetConfig(context);
if (!config.options.enable_external_access) {
// if enable_external_access is disabled we don't search the file system
return result;
}
auto &fs = FileSystem::GetFileSystem(context);
string search_dir;
auto is_path_absolute = fs.IsPathAbsolute(prefix);
last_pos += prefix.size();
for (idx_t i = prefix.size(); i > 0; i--, last_pos--) {
if (prefix[i - 1] == '/' || prefix[i - 1] == '\\') {
search_dir = prefix.substr(0, i - 1);
prefix = prefix.substr(i);
break;
}
}
if (search_dir.empty()) {
search_dir = is_path_absolute ? "/" : ".";
} else {
search_dir = fs.ExpandPath(search_dir);
}
fs.ListFiles(search_dir, [&](const string &fname, bool is_dir) {
string suggestion;
if (is_dir) {
suggestion = fname + fs.PathSeparator(fname);
} else {
suggestion = fname + "'";
}
int score = 0;
if (is_dir && fname[0] != '.') {
score = 2;
}
if (KnownExtension(fname)) {
score = 1;
}
result.emplace_back(std::move(suggestion), score);
result.back().candidate_type = CandidateType::LITERAL;
});
return result;
}
class AutoCompleteTokenizer : public BaseTokenizer {
public:
AutoCompleteTokenizer(const string &sql, MatchState &state)
: BaseTokenizer(sql, state.tokens), suggestions(state.suggestions) {
last_pos = 0;
}
void OnLastToken(TokenizeState state, string last_word_p, idx_t last_pos_p) override {
if (state == TokenizeState::STRING_LITERAL) {
suggestions.emplace_back(SuggestionState::SUGGEST_FILE_NAME);
}
last_word = std::move(last_word_p);
last_pos = last_pos_p;
}
vector<MatcherSuggestion> &suggestions;
string last_word;
idx_t last_pos;
};
struct UnicodeSpace {
UnicodeSpace(idx_t pos, idx_t bytes) : pos(pos), bytes(bytes) {
}
idx_t pos;
idx_t bytes;
};
bool ReplaceUnicodeSpaces(const string &query, string &new_query, const vector<UnicodeSpace> &unicode_spaces) {
if (unicode_spaces.empty()) {
// no unicode spaces found
return false;
}
idx_t prev = 0;
for (auto &usp : unicode_spaces) {
new_query += query.substr(prev, usp.pos - prev);
new_query += " ";
prev = usp.pos + usp.bytes;
}
new_query += query.substr(prev, query.size() - prev);
return true;
}
bool IsValidDollarQuotedStringTagFirstChar(const unsigned char &c) {
// the first character can be between A-Z, a-z, or \200 - \377
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c >= 0x80;
}
bool IsValidDollarQuotedStringTagSubsequentChar(const unsigned char &c) {
// subsequent characters can also be between 0-9
return IsValidDollarQuotedStringTagFirstChar(c) || (c >= '0' && c <= '9');
}
// This function strips unicode space characters from the query and replaces them with regular spaces
// It returns true if any unicode space characters were found and stripped
// See here for a list of unicode space characters - https://jkorpela.fi/chars/spaces.html
bool StripUnicodeSpaces(const string &query_str, string &new_query) {
const idx_t NBSP_LEN = 2;
const idx_t USP_LEN = 3;
idx_t pos = 0;
unsigned char quote;
string_t dollar_quote_tag;
vector<UnicodeSpace> unicode_spaces;
auto query = const_uchar_ptr_cast(query_str.c_str());
auto qsize = query_str.size();
regular:
for (; pos + 2 < qsize; pos++) {
if (query[pos] == 0xC2) {
if (query[pos + 1] == 0xA0) {
// U+00A0 - C2A0
unicode_spaces.emplace_back(pos, NBSP_LEN);
}
}
if (query[pos] == 0xE2) {
if (query[pos + 1] == 0x80) {
if (query[pos + 2] >= 0x80 && query[pos + 2] <= 0x8B) {
// U+2000 to U+200B
// E28080 - E2808B
unicode_spaces.emplace_back(pos, USP_LEN);
} else if (query[pos + 2] == 0xAF) {
// U+202F - E280AF
unicode_spaces.emplace_back(pos, USP_LEN);
}
} else if (query[pos + 1] == 0x81) {
if (query[pos + 2] == 0x9F) {
// U+205F - E2819f
unicode_spaces.emplace_back(pos, USP_LEN);
} else if (query[pos + 2] == 0xA0) {
// U+2060 - E281A0
unicode_spaces.emplace_back(pos, USP_LEN);
}
}
} else if (query[pos] == 0xE3) {
if (query[pos + 1] == 0x80 && query[pos + 2] == 0x80) {
// U+3000 - E38080
unicode_spaces.emplace_back(pos, USP_LEN);
}
} else if (query[pos] == 0xEF) {
if (query[pos + 1] == 0xBB && query[pos + 2] == 0xBF) {
// U+FEFF - EFBBBF
unicode_spaces.emplace_back(pos, USP_LEN);
}
} else if (query[pos] == '"' || query[pos] == '\'') {
quote = query[pos];
pos++;
goto in_quotes;
} else if (query[pos] == '$' &&
(query[pos + 1] == '$' || IsValidDollarQuotedStringTagFirstChar(query[pos + 1]))) {
// (optionally tagged) dollar-quoted string
auto start = &query[++pos];
for (; pos + 2 < qsize; pos++) {
if (query[pos] == '$') {
// end of tag
dollar_quote_tag =
string_t(const_char_ptr_cast(start), NumericCast<uint32_t, int64_t>(&query[pos] - start));
goto in_dollar_quotes;
}
if (!IsValidDollarQuotedStringTagSubsequentChar(query[pos])) {
// invalid char in dollar-quoted string, continue as normal
goto regular;
}
}
goto end;
} else if (query[pos] == '-' && query[pos + 1] == '-') {
goto in_comment;
}
}
goto end;
in_quotes:
for (; pos + 1 < qsize; pos++) {
if (query[pos] == quote) {
if (query[pos + 1] == quote) {
// escaped quote
pos++;
continue;
}
pos++;
goto regular;
}
}
goto end;
in_dollar_quotes:
for (; pos + 2 < qsize; pos++) {
if (query[pos] == '$' &&
qsize - (pos + 1) >= dollar_quote_tag.GetSize() + 1 && // found '$' and enough space left
query[pos + dollar_quote_tag.GetSize() + 1] == '$' && // ending '$' at the right spot
memcmp(&query[pos + 1], dollar_quote_tag.GetData(), dollar_quote_tag.GetSize()) == 0) { // tags match
pos += dollar_quote_tag.GetSize() + 1;
goto regular;
}
}
goto end;
in_comment:
for (; pos < qsize; pos++) {
if (query[pos] == '\n' || query[pos] == '\r') {
goto regular;
}
}
goto end;
end:
return ReplaceUnicodeSpaces(query_str, new_query, unicode_spaces);
}
static duckdb::unique_ptr<SQLAutoCompleteFunctionData> GenerateSuggestions(ClientContext &context, const string &sql) {
// tokenize the input
vector<MatcherToken> tokens;
vector<MatcherSuggestion> suggestions;
ParseResultAllocator parse_allocator;
MatchState state(tokens, suggestions, parse_allocator);
vector<UnicodeSpace> unicode_spaces;
string clean_sql;
const string &sql_ref = StripUnicodeSpaces(sql, clean_sql) ? clean_sql : sql;
AutoCompleteTokenizer tokenizer(sql_ref, state);
auto allow_complete = tokenizer.TokenizeInput();
if (!allow_complete) {
return make_uniq<SQLAutoCompleteFunctionData>(vector<AutoCompleteSuggestion>());
}
if (state.suggestions.empty()) {
// no suggestions found during tokenizing
// run the root matcher
MatcherAllocator allocator;
auto &matcher = Matcher::RootMatcher(allocator);
matcher.Match(state);
}
if (state.suggestions.empty()) {
// still no suggestions - return
return make_uniq<SQLAutoCompleteFunctionData>(vector<AutoCompleteSuggestion>());
}
vector<AutoCompleteCandidate> available_suggestions;
for (auto &suggestion : suggestions) {
idx_t suggestion_pos = tokenizer.last_pos;
// run the suggestions
vector<AutoCompleteCandidate> new_suggestions;
switch (suggestion.type) {
case SuggestionState::SUGGEST_VARIABLE:
// variables have no suggestions available
break;
case SuggestionState::SUGGEST_KEYWORD:
new_suggestions.emplace_back(suggestion.keyword);
break;
case SuggestionState::SUGGEST_CATALOG_NAME:
new_suggestions = SuggestCatalogName(context);
break;
case SuggestionState::SUGGEST_SCHEMA_NAME:
new_suggestions = SuggestSchemaName(context);
break;
case SuggestionState::SUGGEST_TABLE_NAME:
new_suggestions = SuggestTableName(context);
break;
case SuggestionState::SUGGEST_COLUMN_NAME:
new_suggestions = SuggestColumnName(context);
break;
case SuggestionState::SUGGEST_TYPE_NAME:
new_suggestions = SuggestType(context);
break;
case SuggestionState::SUGGEST_FILE_NAME:
new_suggestions = SuggestFileName(context, tokenizer.last_word, suggestion_pos);
break;
case SuggestionState::SUGGEST_SCALAR_FUNCTION_NAME:
new_suggestions = SuggestScalarFunctionName(context);
break;
case SuggestionState::SUGGEST_TABLE_FUNCTION_NAME:
new_suggestions = SuggestTableFunctionName(context);
break;
case SuggestionState::SUGGEST_PRAGMA_NAME:
new_suggestions = SuggestPragmaName(context);
break;
case SuggestionState::SUGGEST_SETTING_NAME:
new_suggestions = SuggestSettingName(context);
break;
default:
throw InternalException("Unrecognized suggestion state");
}
for (auto &new_suggestion : new_suggestions) {
if (new_suggestion.extra_char == '\0') {
new_suggestion.extra_char = suggestion.extra_char;
}
new_suggestion.suggestion_pos = suggestion_pos;
available_suggestions.push_back(std::move(new_suggestion));
}
}
auto result_suggestions = ComputeSuggestions(available_suggestions, tokenizer.last_word);
return make_uniq<SQLAutoCompleteFunctionData>(std::move(result_suggestions));
}
static duckdb::unique_ptr<FunctionData> SQLAutoCompleteBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
if (input.inputs[0].IsNull()) {
throw BinderException("sql_auto_complete first parameter cannot be NULL");
}
names.emplace_back("suggestion");
return_types.emplace_back(LogicalType::VARCHAR);
names.emplace_back("suggestion_start");
return_types.emplace_back(LogicalType::INTEGER);
return GenerateSuggestions(context, StringValue::Get(input.inputs[0]));
}
unique_ptr<GlobalTableFunctionState> SQLAutoCompleteInit(ClientContext &context, TableFunctionInitInput &input) {
return make_uniq<SQLAutoCompleteData>();
}
void SQLAutoCompleteFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
auto &bind_data = data_p.bind_data->Cast<SQLAutoCompleteFunctionData>();
auto &data = data_p.global_state->Cast<SQLAutoCompleteData>();
if (data.offset >= bind_data.suggestions.size()) {
// finished returning values
return;
}
// start returning values
// either fill up the chunk or return all the remaining columns
idx_t count = 0;
while (data.offset < bind_data.suggestions.size() && count < STANDARD_VECTOR_SIZE) {
auto &entry = bind_data.suggestions[data.offset++];
// suggestion, VARCHAR
output.SetValue(0, count, Value(entry.text));
// suggestion_start, INTEGER
output.SetValue(1, count, Value::INTEGER(NumericCast<int32_t>(entry.pos)));
count++;
}
output.SetCardinality(count);
}
class ParserTokenizer : public BaseTokenizer {
public:
ParserTokenizer(const string &sql, vector<MatcherToken> &tokens) : BaseTokenizer(sql, tokens) {
}
void OnStatementEnd(idx_t pos) override {
statements.push_back(std::move(tokens));
tokens.clear();
}
void OnLastToken(TokenizeState state, string last_word, idx_t last_pos) override {
if (last_word.empty()) {
return;
}
tokens.emplace_back(std::move(last_word), last_pos);
}
vector<vector<MatcherToken>> statements;
};
static duckdb::unique_ptr<FunctionData> CheckPEGParserBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
if (input.inputs[0].IsNull()) {
throw BinderException("sql_auto_complete first parameter cannot be NULL");
}
names.emplace_back("success");
return_types.emplace_back(LogicalType::BOOLEAN);
const auto sql = StringValue::Get(input.inputs[0]);
vector<MatcherToken> root_tokens;
string clean_sql;
const string &sql_ref = StripUnicodeSpaces(sql, clean_sql) ? clean_sql : sql;
ParserTokenizer tokenizer(sql_ref, root_tokens);
auto allow_complete = tokenizer.TokenizeInput();
if (!allow_complete) {
return nullptr;
}
tokenizer.statements.push_back(std::move(root_tokens));
for (auto &tokens : tokenizer.statements) {
if (tokens.empty()) {
continue;
}
vector<MatcherSuggestion> suggestions;
ParseResultAllocator parse_allocator;
MatchState state(tokens, suggestions, parse_allocator);
MatcherAllocator allocator;
auto &matcher = Matcher::RootMatcher(allocator);
auto match_result = matcher.Match(state);
if (match_result != MatchResultType::SUCCESS || state.token_index < tokens.size()) {
string token_list;
for (idx_t i = 0; i < tokens.size(); i++) {
if (!token_list.empty()) {
token_list += "\n";
}
if (i < 10) {
token_list += " ";
}
token_list += to_string(i) + ":" + tokens[i].text;
}
throw BinderException(
"Failed to parse query \"%s\" - did not consume all tokens (got to token %d - %s)\nTokens:\n%s", sql,
state.token_index, tokens[state.token_index].text, token_list);
}
}
return nullptr;
}
void CheckPEGParserFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
}
class PEGParserExtension : public ParserExtension {
public:
PEGParserExtension() {
parser_override = PEGParser;
}
static ParserOverrideResult PEGParser(ParserExtensionInfo *info, const string &query) {
vector<MatcherToken> root_tokens;
string clean_sql;
ParserTokenizer tokenizer(query, root_tokens);
tokenizer.TokenizeInput();
tokenizer.statements.push_back(std::move(root_tokens));
vector<unique_ptr<SQLStatement>> result;
try {
for (auto &tokenized_statement : tokenizer.statements) {
if (tokenized_statement.empty()) {
continue;
}
auto &transformer = PEGTransformerFactory::GetInstance();
auto statement = transformer.Transform(tokenized_statement, "Statement");
if (statement) {
statement->stmt_location = NumericCast<idx_t>(tokenized_statement[0].offset);
statement->stmt_length =
NumericCast<idx_t>(tokenized_statement[tokenized_statement.size() - 1].offset +
tokenized_statement[tokenized_statement.size() - 1].length);
}
statement->query = query;
result.push_back(std::move(statement));
}
return ParserOverrideResult(std::move(result));
} catch (std::exception &e) {
return ParserOverrideResult(e);
}
}
};
static void LoadInternal(ExtensionLoader &loader) {
TableFunction auto_complete_fun("sql_auto_complete", {LogicalType::VARCHAR}, SQLAutoCompleteFunction,
SQLAutoCompleteBind, SQLAutoCompleteInit);
loader.RegisterFunction(auto_complete_fun);
TableFunction check_peg_parser_fun("check_peg_parser", {LogicalType::VARCHAR}, CheckPEGParserFunction,
CheckPEGParserBind, nullptr);
loader.RegisterFunction(check_peg_parser_fun);
auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance());
config.parser_extensions.push_back(PEGParserExtension());
}
void AutocompleteExtension::Load(ExtensionLoader &loader) {
LoadInternal(loader);
}
std::string AutocompleteExtension::Name() {
return "autocomplete";
}
std::string AutocompleteExtension::Version() const {
return DefaultVersion();
}
} // namespace duckdb
extern "C" {
DUCKDB_CPP_EXTENSION_ENTRY(autocomplete, loader) {
LoadInternal(loader);
}
}

View File

@@ -0,0 +1,54 @@
BETWEEN
BIGINT
BIT
BOOLEAN
CHAR
CHARACTER
COALESCE
COLUMNS
DEC
DECIMAL
EXISTS
EXTRACT
FLOAT
GENERATED
GROUPING
GROUPING_ID
INOUT
INT
INTEGER
INTERVAL
MAP
NATIONAL
NCHAR
NONE
NULLIF
NUMERIC
OUT
OVERLAY
POSITION
PRECISION
REAL
ROW
SETOF
SMALLINT
SUBSTRING
STRUCT
TIME
TIMESTAMP
TREAT
TRIM
TRY_CAST
VALUES
VARCHAR
XMLATTRIBUTES
XMLCONCAT
XMLELEMENT
XMLEXISTS
XMLFOREST
XMLNAMESPACES
XMLPARSE
XMLPI
XMLROOT
XMLSERIALIZE
XMLTABLE

View File

@@ -0,0 +1,29 @@
ASOF
AT
AUTHORIZATION
BINARY
COLLATION
CONCURRENTLY
CROSS
FREEZE
FULL
GENERATED
GLOB
ILIKE
INNER
IS
ISNULL
JOIN
LEFT
LIKE
MAP
NATURAL
NOTNULL
OUTER
OVERLAPS
POSITIONAL
RIGHT
SIMILAR
STRUCT
TABLESAMPLE
VERBOSE

View File

@@ -0,0 +1,75 @@
ALL
ANALYSE
ANALYZE
AND
ANY
ARRAY
AS
ASC
ASYMMETRIC
BOTH
CASE
CAST
CHECK
COLLATE
COLUMN
CONSTRAINT
CREATE
DEFAULT
DEFERRABLE
DESC
DESCRIBE
DISTINCT
DO
ELSE
END
EXCEPT
FALSE
FETCH
FOR
FOREIGN
FROM
GROUP
HAVING
QUALIFY
IN
INITIALLY
INTERSECT
INTO
LAMBDA
LATERAL
LEADING
LIMIT
NOT
NULL
OFFSET
ON
ONLY
OR
ORDER
PIVOT
PIVOT_WIDER
PIVOT_LONGER
PLACING
PRIMARY
REFERENCES
RETURNING
SELECT
SHOW
SOME
SUMMARIZE
SYMMETRIC
TABLE
THEN
TO
TRAILING
TRUE
UNION
UNIQUE
UNPIVOT
USING
VARIADIC
WHEN
WHERE
WINDOW
WITH

View File

@@ -0,0 +1,32 @@
ASOF
AT
AUTHORIZATION
BINARY
BY
COLLATION
COLUMNS
CONCURRENTLY
CROSS
FREEZE
FULL
GLOB
ILIKE
INNER
IS
ISNULL
JOIN
LEFT
LIKE
NATURAL
NOTNULL
OUTER
OVERLAPS
POSITIONAL
RIGHT
UNPACK
SIMILAR
TABLESAMPLE
TRY_CAST
VERBOSE
SEMI
ANTI

View File

@@ -0,0 +1,330 @@
ABORT
ABSOLUTE
ACCESS
ACTION
ADD
ADMIN
AFTER
AGGREGATE
ALSO
ALTER
ALWAYS
ASSERTION
ASSIGNMENT
ATTACH
ATTRIBUTE
BACKWARD
BEFORE
BEGIN
CACHE
CALL
CALLED
CASCADE
CASCADED
CATALOG
CENTURY
CENTURIES
CHAIN
CHARACTERISTICS
CHECKPOINT
CLASS
CLOSE
CLUSTER
COMMENT
COMMENTS
COMMIT
COMMITTED
COMPRESSION
CONFIGURATION
CONFLICT
CONNECTION
CONSTRAINTS
CONTENT
CONTINUE
CONVERSION
COPY
COST
CSV
CUBE
CURRENT
CURSOR
CYCLE
DATA
DATABASE
DAY
DAYS
DEALLOCATE
DECADE
DECADES
DECLARE
DEFAULTS
DEFERRED
DEFINER
DELETE
DELIMITER
DELIMITERS
DEPENDS
DETACH
DICTIONARY
DISABLE
DISCARD
DOCUMENT
DOMAIN
DOUBLE
DROP
EACH
ENABLE
ENCODING
ENCRYPTED
ENUM
ERROR
ESCAPE
EVENT
EXCLUDE
EXCLUDING
EXCLUSIVE
EXECUTE
EXPLAIN
EXPORT
EXPORT_STATE
EXTENSION
EXTENSIONS
EXTERNAL
FAMILY
FILTER
FIRST
FOLLOWING
FORCE
FORWARD
FUNCTION
FUNCTIONS
GLOBAL
GRANT
GRANTED
GROUPS
HANDLER
HEADER
HOLD
HOUR
HOURS
IDENTITY
IF
IGNORE
IMMEDIATE
IMMUTABLE
IMPLICIT
IMPORT
INCLUDE
INCLUDING
INCREMENT
INDEX
INDEXES
INHERIT
INHERITS
INLINE
INPUT
INSENSITIVE
INSERT
INSTALL
INSTEAD
INVOKER
JSON
ISOLATION
KEY
LABEL
LANGUAGE
LARGE
LAST
LEAKPROOF
LEVEL
LISTEN
LOAD
LOCAL
LOCATION
LOCK
LOCKED
LOGGED
MACRO
MAPPING
MATCH
MATCHED
MATERIALIZED
MAXVALUE
MERGE
METHOD
MICROSECOND
MICROSECONDS
MILLENNIUM
MILLENNIA
MILLISECOND
MILLISECONDS
MINUTE
MINUTES
MINVALUE
MODE
MONTH
MONTHS
MOVE
NAME
NAMES
NEW
NEXT
NO
NOTHING
NOTIFY
NOWAIT
NULLS
OBJECT
OF
OFF
OIDS
OLD
OPERATOR
OPTION
OPTIONS
ORDINALITY
OTHERS
OVER
OVERRIDING
OWNED
OWNER
PARALLEL
PARSER
PARTIAL
PARTITION
PARTITIONED
PASSING
PASSWORD
PERCENT
PERSISTENT
PLANS
POLICY
PRAGMA
PRECEDING
PREPARE
PREPARED
PRESERVE
PRIOR
PRIVILEGES
PROCEDURAL
PROCEDURE
PROGRAM
PUBLICATION
QUARTER
QUARTERS
QUOTE
RANGE
READ
REASSIGN
RECHECK
RECURSIVE
REF
REFERENCING
REFRESH
REINDEX
RELATIVE
RELEASE
RENAME
REPEATABLE
REPLACE
REPLICA
RESET
RESPECT
RESTART
RESTRICT
RETURNS
REVOKE
ROLE
ROLLBACK
ROLLUP
ROWS
RULE
SAMPLE
SAVEPOINT
SCHEMA
SCHEMAS
SCOPE
SCROLL
SEARCH
SECRET
SECOND
SECONDS
SECURITY
SEQUENCE
SEQUENCES
SERIALIZABLE
SERVER
SESSION
SET
SETS
SHARE
SIMPLE
SKIP
SNAPSHOT
SORTED
SOURCE
SQL
STABLE
STANDALONE
START
STATEMENT
STATISTICS
STDIN
STDOUT
STORAGE
STORED
STRICT
STRIP
SUBSCRIPTION
SYSID
SYSTEM
TABLES
TABLESPACE
TARGET
TEMP
TEMPLATE
TEMPORARY
TEXT
TIES
TRANSACTION
TRANSFORM
TRIGGER
TRUNCATE
TRUSTED
TYPE
TYPES
UNBOUNDED
UNCOMMITTED
UNENCRYPTED
UNKNOWN
UNLISTEN
UNLOGGED
UNTIL
UPDATE
USE
USER
VACUUM
VALID
VALIDATE
VALIDATOR
VALUE
VARIABLE
VARYING
VERSION
VIEW
VIEWS
VIRTUAL
VOLATILE
WEEK
WEEKS
WHITESPACE
WITHIN
WITHOUT
WORK
WRAPPER
WRITE
XML
YEAR
YEARS
YES
ZONE

View File

@@ -0,0 +1,46 @@
AlterStatement <- 'ALTER' AlterOptions
AlterOptions <- AlterTableStmt / AlterViewStmt / AlterSequenceStmt / AlterDatabaseStmt / AlterSchemaStmt
AlterTableStmt <- 'TABLE' IfExists? BaseTableName AlterTableOptions
AlterSchemaStmt <- 'SCHEMA' IfExists? QualifiedName RenameAlter
AlterTableOptions <- AddColumn / DropColumn / AlterColumn / AddConstraint / ChangeNullability / RenameColumn / RenameAlter / SetPartitionedBy / ResetPartitionedBy / SetSortedBy / ResetSortedBy
AddConstraint <- 'ADD' TopLevelConstraint
AddColumn <- 'ADD' 'COLUMN'? IfNotExists? ColumnDefinition
DropColumn <- 'DROP' 'COLUMN'? IfExists? NestedColumnName DropBehavior?
AlterColumn <- 'ALTER' 'COLUMN'? NestedColumnName AlterColumnEntry
RenameColumn <- 'RENAME' 'COLUMN'? NestedColumnName 'TO' Identifier
NestedColumnName <- (Identifier '.')* ColumnName
RenameAlter <- 'RENAME' 'TO' Identifier
SetPartitionedBy <- 'SET' 'PARTITIONED' 'BY' Parens(List(Expression))
ResetPartitionedBy <- 'RESET' 'PARTITIONED' 'BY'
SetSortedBy <- 'SET' 'SORTED' 'BY' Parens(OrderByExpressions)
ResetSortedBy <- 'RESET' 'SORTED' 'BY'
AlterColumnEntry <- AddOrDropDefault / ChangeNullability / AlterType
AddOrDropDefault <- AddDefault / DropDefault
AddDefault <- 'SET' 'DEFAULT' Expression
DropDefault <- 'DROP' 'DEFAULT'
ChangeNullability <- ('DROP' / 'SET') 'NOT' 'NULL'
AlterType <- SetData? 'TYPE' Type? UsingExpression?
SetData <- 'SET' 'DATA'?
UsingExpression <- 'USING' Expression
AlterViewStmt <- 'VIEW' IfExists? BaseTableName RenameAlter
AlterSequenceStmt <- 'SEQUENCE' IfExists? QualifiedSequenceName AlterSequenceOptions
QualifiedSequenceName <- CatalogQualification? SchemaQualification? SequenceName
AlterSequenceOptions <- RenameAlter / SetSequenceOption
SetSequenceOption <- List(SequenceOption)
AlterDatabaseStmt <- 'DATABASE' IfExists? Identifier RenameDatabaseAlter
RenameDatabaseAlter <- 'RENAME' 'TO' Identifier

View File

@@ -0,0 +1,3 @@
AnalyzeStatement <- 'ANALYZE' 'VERBOSE'? AnalyzeTarget?
AnalyzeTarget <- QualifiedName Parens(List(Name))?
Name <- ColId ('.' ColLabel)*

View File

@@ -0,0 +1,6 @@
AttachStatement <- 'ATTACH' OrReplace? IfNotExists? Database? DatabasePath AttachAlias? AttachOptions?
Database <- 'DATABASE'
DatabasePath <- StringLiteral
AttachAlias <- 'AS' ColId
AttachOptions <- Parens(GenericCopyOptionList)

View File

@@ -0,0 +1 @@
CallStatement <- 'CALL' TableFunctionName TableFunctionArguments

View File

@@ -0,0 +1 @@
CheckpointStatement <- 'FORCE'? 'CHECKPOINT' CatalogName?

View File

@@ -0,0 +1,5 @@
CommentStatement <- 'COMMENT' 'ON' CommentOnType ColumnReference 'IS' CommentValue
CommentOnType <- 'TABLE' / 'SEQUENCE' / 'FUNCTION' / ('MACRO' 'TABLE'?) / 'VIEW' / 'DATABASE' / 'INDEX' / 'SCHEMA' / 'TYPE' / 'COLUMN'
CommentValue <- 'NULL' / StringLiteral

View File

@@ -0,0 +1,133 @@
Statement <-
CreateStatement /
SelectStatement /
SetStatement /
PragmaStatement /
CallStatement /
InsertStatement /
DropStatement /
CopyStatement /
ExplainStatement /
UpdateStatement /
PrepareStatement /
ExecuteStatement /
AlterStatement /
TransactionStatement /
DeleteStatement /
AttachStatement /
UseStatement /
DetachStatement /
CheckpointStatement /
VacuumStatement /
ResetStatement /
ExportStatement /
ImportStatement /
CommentStatement /
DeallocateStatement /
TruncateStatement /
LoadStatement /
InstallStatement /
AnalyzeStatement /
MergeIntoStatement
CatalogName <- Identifier
SchemaName <- Identifier
ReservedSchemaName <- Identifier
TableName <- Identifier
ReservedTableName <- Identifier
ReservedIdentifier <- Identifier
ColumnName <- Identifier
ReservedColumnName <- Identifier
IndexName <- Identifier
SettingName <- Identifier
PragmaName <- Identifier
FunctionName <- Identifier
ReservedFunctionName <- Identifier
TableFunctionName <- Identifier
ConstraintName <- ColIdOrString
SequenceName <- Identifier
CollationName <- Identifier
CopyOptionName <- ColLabel
SecretName <- ColId
NumberLiteral <- < [+-]?[0-9]*([.][0-9]*)? >
StringLiteral <- '\'' [^\']* '\''
Type <- (TimeType / IntervalType / BitType / RowType / MapType / UnionType / NumericType / SetofType / SimpleType) ArrayBounds*
SimpleType <- (QualifiedTypeName / CharacterType) TypeModifiers?
CharacterType <- ('CHARACTER' 'VARYING'?) /
('CHAR' 'VARYING'?) /
('NATIONAL' 'CHARACTER' 'VARYING'?) /
('NATIONAL' 'CHAR' 'VARYING'?) /
('NCHAR' 'VARYING'?) /
'VARCHAR'
IntervalType <- ('INTERVAL' Interval?) / ('INTERVAL' Parens(NumberLiteral))
YearKeyword <- 'YEAR' / 'YEARS'
MonthKeyword <- 'MONTH' / 'MONTHS'
DayKeyword <- 'DAY' / 'DAYS'
HourKeyword <- 'HOUR' / 'HOURS'
MinuteKeyword <- 'MINUTE' / 'MINUTES'
SecondKeyword <- 'SECOND' / 'SECONDS'
MillisecondKeyword <- 'MILLISECOND' / 'MILLISECONDS'
MicrosecondKeyword <- 'MICROSECOND' / 'MICROSECONDS'
WeekKeyword <- 'WEEK' / 'WEEKS'
QuarterKeyword <- 'QUARTER' / 'QUARTERS'
DecadeKeyword <- 'DECADE' / 'DECADES'
CenturyKeyword <- 'CENTURY' / 'CENTURIES'
MillenniumKeyword <- 'MILLENNIUM' / 'MILLENNIA'
Interval <- YearKeyword /
MonthKeyword /
DayKeyword /
HourKeyword /
MinuteKeyword /
SecondKeyword /
MillisecondKeyword /
MicrosecondKeyword /
WeekKeyword /
QuarterKeyword /
DecadeKeyword /
CenturyKeyword /
MillenniumKeyword /
(YearKeyword 'TO' MonthKeyword) /
(DayKeyword 'TO' HourKeyword) /
(DayKeyword 'TO' MinuteKeyword) /
(DayKeyword 'TO' SecondKeyword) /
(HourKeyword 'TO' MinuteKeyword) /
(HourKeyword 'TO' SecondKeyword) /
(MinuteKeyword 'TO' SecondKeyword)
BitType <- 'BIT' 'VARYING'? Parens(List(Expression))?
NumericType <- 'INT' /
'INTEGER' /
'SMALLINT' /
'BIGINT' /
'REAL' /
'BOOLEAN' /
('FLOAT' Parens(NumberLiteral)?) /
('DOUBLE' 'PRECISION') /
('DECIMAL' TypeModifiers?) /
('DEC' TypeModifiers?) /
('NUMERIC' TypeModifiers?)
QualifiedTypeName <- CatalogQualification? SchemaQualification? TypeName
TypeModifiers <- Parens(List(Expression)?)
RowType <- RowOrStruct Parens(List(ColIdType))
UnionType <- 'UNION' Parens(List(ColIdType))
SetofType <- 'SETOF' Type
MapType <- 'MAP' Parens(List(Type))
ColIdType <- ColId Type
ArrayBounds <- ('[' NumberLiteral? ']') / 'ARRAY'
TimeType <- TimeOrTimestamp TypeModifiers? TimeZone?
TimeOrTimestamp <- 'TIME' / 'TIMESTAMP'
TimeZone <- WithOrWithout 'TIME' 'ZONE'
WithOrWithout <- 'WITH' / 'WITHOUT'
RowOrStruct <- 'ROW' / 'STRUCT'
# internal definitions
%whitespace <- [ \t\n\r]*
List(D) <- D (',' D)* ','?
Parens(D) <- '(' D ')'

View File

@@ -0,0 +1,30 @@
CopyStatement <- 'COPY' (CopyTable / CopySelect / CopyFromDatabase)
CopyTable <- BaseTableName InsertColumnList? FromOrTo CopyFileName CopyOptions?
FromOrTo <- 'FROM' / 'TO'
CopySelect <- Parens(SelectStatement) 'TO' CopyFileName CopyOptions?
CopyFileName <- Expression / StringLiteral / Identifier / (Identifier '.' ColId)
CopyOptions <- 'WITH'? (Parens(GenericCopyOptionList) / (SpecializedOptions*))
SpecializedOptions <-
'BINARY' / 'FREEZE' / 'OIDS' / 'CSV' / 'HEADER' /
SpecializedStringOption /
('ENCODING' StringLiteral) /
('FORCE' 'QUOTE' StarOrColumnList) /
('PARTITION' 'BY' StarOrColumnList) /
('FORCE' 'NOT'? 'NULL' ColumnList)
SpecializedStringOption <- ('DELIMITER' / 'NULL' / 'QUOTE' / 'ESCAPE') 'AS'? StringLiteral
StarOrColumnList <- '*' / ColumnList
GenericCopyOptionList <- List(GenericCopyOption)
GenericCopyOption <- GenericCopyOptionName Expression?
# FIXME: should not need to hard-code options here
GenericCopyOptionName <- 'ARRAY' / 'NULL' / 'ANALYZE' / CopyOptionName
CopyFromDatabase <- 'FROM' 'DATABASE' ColId 'TO' ColId CopyDatabaseFlag?
CopyDatabaseFlag <- Parens(SchemaOrData)
SchemaOrData <- 'SCHEMA' / 'DATA'

View File

@@ -0,0 +1,10 @@
CreateIndexStmt <- Unique? 'INDEX' IfNotExists? IndexName? 'ON' BaseTableName IndexType? Parens(List(IndexElement)) WithList? WhereClause?
WithList <- 'WITH' Parens(List(RelOption)) / Oids
Oids <- ('WITH' / 'WITHOUT') 'OIDS'
IndexElement <- Expression DescOrAsc? NullsFirstOrLast?
Unique <- 'UNIQUE'
IndexType <- 'USING' Identifier
RelOption <- ColLabel ('.' ColLabel)* ('=' DefArg)?
DefArg <- FuncType / ReservedKeyword / StringLiteral / NumberLiteral / 'NONE'
FuncType <- Type / ('SETOF'? TypeFuncName '%' 'TYPE')

View File

@@ -0,0 +1,11 @@
CreateMacroStmt <- MacroOrFunction IfNotExists? QualifiedName List(MacroDefinition)
MacroOrFunction <- 'MACRO' / 'FUNCTION'
MacroDefinition <- Parens(MacroParameters?) 'AS' (TableMacroDefinition / ScalarMacroDefinition)
MacroParameters <- List(MacroParameter)
MacroParameter <- NamedParameter / (TypeFuncName Type?)
ScalarMacroDefinition <- Expression
TableMacroDefinition <- 'TABLE' SelectStatement

View File

@@ -0,0 +1 @@
CreateSchemaStmt <- 'SCHEMA' IfNotExists? QualifiedName

View File

@@ -0,0 +1,3 @@
CreateSecretStmt <- 'SECRET' IfNotExists? SecretName? SecretStorageSpecifier? Parens(GenericCopyOptionList)
SecretStorageSpecifier <- 'IN' Identifier

View File

@@ -0,0 +1,20 @@
CreateSequenceStmt <- 'SEQUENCE' IfNotExists? QualifiedName SequenceOption*
SequenceOption <-
SeqSetCycle /
SeqSetIncrement /
SeqSetMinMax /
SeqNoMinMax /
SeqStartWith /
SeqOwnedBy
SeqSetCycle <- 'NO'? 'CYCLE'
SeqSetIncrement <- 'INCREMENT' 'BY'? Expression
SeqSetMinMax <- SeqMinOrMax Expression
SeqNoMinMax <- 'NO' SeqMinOrMax
SeqStartWith <- 'START' 'WITH'? Expression
SeqOwnedBy <- 'OWNED' 'BY' QualifiedName
SeqMinOrMax <- 'MINVALUE' / 'MAXVALUE'

View File

@@ -0,0 +1,69 @@
CreateStatement <- 'CREATE' OrReplace? Temporary? (CreateTableStmt / CreateMacroStmt / CreateSequenceStmt / CreateTypeStmt / CreateSchemaStmt / CreateViewStmt / CreateIndexStmt / CreateSecretStmt)
OrReplace <- 'OR' 'REPLACE'
Temporary <- 'TEMP' / 'TEMPORARY' / 'PERSISTENT'
CreateTableStmt <- 'TABLE' IfNotExists? QualifiedName (CreateTableAs / CreateColumnList) CommitAction?
CreateTableAs <- IdentifierList? 'AS' SelectStatement WithData?
WithData <- 'WITH' 'NO'? 'DATA'
IdentifierList <- Parens(List(Identifier))
CreateColumnList <- Parens(CreateTableColumnList)
IfNotExists <- 'IF' 'NOT' 'EXISTS'
QualifiedName <- CatalogReservedSchemaIdentifier / SchemaReservedIdentifierOrStringLiteral / IdentifierOrStringLiteral
SchemaReservedIdentifierOrStringLiteral <- SchemaQualification ReservedIdentifierOrStringLiteral
CatalogReservedSchemaIdentifier <- CatalogQualification ReservedSchemaQualification ReservedIdentifierOrStringLiteral
IdentifierOrStringLiteral <- Identifier / StringLiteral
ReservedIdentifierOrStringLiteral <- ReservedIdentifier / StringLiteral
CatalogQualification <- CatalogName '.'
SchemaQualification <- SchemaName '.'
ReservedSchemaQualification <- ReservedSchemaName '.'
TableQualification <- TableName '.'
ReservedTableQualification <- ReservedTableName '.'
CreateTableColumnList <- List(CreateTableColumnElement)
CreateTableColumnElement <- ColumnDefinition / TopLevelConstraint
ColumnDefinition <- DottedIdentifier TypeOrGenerated ColumnConstraint*
TypeOrGenerated <- Type? GeneratedColumn?
ColumnConstraint <- NotNullConstraint / UniqueConstraint / PrimaryKeyConstraint / DefaultValue / CheckConstraint / ForeignKeyConstraint / ColumnCollation / ColumnCompression
NotNullConstraint <- 'NOT'? 'NULL'
UniqueConstraint <- 'UNIQUE'
PrimaryKeyConstraint <- 'PRIMARY' 'KEY'
DefaultValue <- 'DEFAULT' Expression
CheckConstraint <- 'CHECK' Parens(Expression)
ForeignKeyConstraint <- 'REFERENCES' BaseTableName Parens(ColumnList)? KeyActions?
ColumnCollation <- 'COLLATE' Expression
ColumnCompression <- 'USING' 'COMPRESSION' ColIdOrString
KeyActions <- UpdateAction? DeleteAction?
UpdateAction <- 'ON' 'UPDATE' KeyAction
DeleteAction <- 'ON' 'DELETE' KeyAction
KeyAction <- ('NO' 'ACTION') / 'RESTRICT' / 'CASCADE' / ('SET' 'NULL') / ('SET' 'DEFAULT')
TopLevelConstraint <- ConstraintNameClause? TopLevelConstraintList
TopLevelConstraintList <- TopPrimaryKeyConstraint / CheckConstraint / TopUniqueConstraint / TopForeignKeyConstraint
ConstraintNameClause <- 'CONSTRAINT' Identifier
TopPrimaryKeyConstraint <- 'PRIMARY' 'KEY' ColumnIdList
TopUniqueConstraint <- 'UNIQUE' ColumnIdList
TopForeignKeyConstraint <- 'FOREIGN' 'KEY' ColumnIdList ForeignKeyConstraint
ColumnIdList <- Parens(List(ColId))
PlainIdentifier <- !ReservedKeyword <[a-z_]i[a-z0-9_]i*>
QuotedIdentifier <- '"' [^"]* '"'
DottedIdentifier <- Identifier ('.' Identifier)*
Identifier <- QuotedIdentifier / PlainIdentifier
ColId <- UnreservedKeyword / ColumnNameKeyword / Identifier
ColIdOrString <- ColId / StringLiteral
FuncName <- UnreservedKeyword / FuncNameKeyword / Identifier
TypeFuncName <- UnreservedKeyword / TypeNameKeyword / FuncNameKeyword / Identifier
TypeName <- UnreservedKeyword / TypeNameKeyword / Identifier
ColLabel <- ReservedKeyword / UnreservedKeyword / ColumnNameKeyword / FuncNameKeyword / TypeNameKeyword / Identifier
ColLabelOrString <- ColLabel / StringLiteral
GeneratedColumn <- Generated? 'AS' Parens(Expression) GeneratedColumnType?
Generated <- 'GENERATED' AlwaysOrByDefault?
AlwaysOrByDefault <- 'ALWAYS' / ('BY' 'DEFAULT')
GeneratedColumnType <- 'VIRTUAL' / 'STORED'
CommitAction <- 'ON' 'COMMIT' PreserveOrDelete
PreserveOrDelete <- ('PRESERVE' / 'DELETE') 'ROWS'

View File

@@ -0,0 +1,4 @@
CreateTypeStmt <- 'TYPE' IfNotExists? QualifiedName 'AS' CreateType
CreateType <- ('ENUM' Parens(SelectStatement)) /
('ENUM' Parens(List(StringLiteral))) /
Type

View File

@@ -0,0 +1 @@
CreateViewStmt <- 'RECURSIVE'? 'VIEW' IfNotExists? QualifiedName InsertColumnList? 'AS' SelectStatement

View File

@@ -0,0 +1 @@
DeallocateStatement <- 'DEALLOCATE' 'PREPARE'? Identifier

View File

@@ -0,0 +1,4 @@
DeleteStatement <- WithClause? 'DELETE' 'FROM' TargetOptAlias DeleteUsingClause? WhereClause? ReturningClause?
TruncateStatement <- 'TRUNCATE' 'TABLE'? BaseTableName
TargetOptAlias <- BaseTableName 'AS'? ColId?
DeleteUsingClause <- 'USING' List(TableRef)

View File

@@ -0,0 +1,9 @@
DescribeStatement <- ShowTables / ShowSelect / ShowAllTables / ShowQualifiedName
ShowSelect <- ShowOrDescribeOrSummarize SelectStatement
ShowAllTables <- ShowOrDescribe 'ALL' 'TABLES'
ShowQualifiedName <- ShowOrDescribeOrSummarize (BaseTableName / StringLiteral)?
ShowTables <- ShowOrDescribe 'TABLES' 'FROM' QualifiedName
ShowOrDescribeOrSummarize <- ShowOrDescribe / 'SUMMARIZE'
ShowOrDescribe <- 'SHOW' / 'DESCRIBE' / 'DESC'

View File

@@ -0,0 +1 @@
DetachStatement <- 'DETACH' Database? IfExists? CatalogName

View File

@@ -0,0 +1,33 @@
DropStatement <- 'DROP' DropEntries DropBehavior?
DropEntries <-
DropTable /
DropTableFunction /
DropFunction /
DropSchema /
DropIndex /
DropSequence /
DropCollation /
DropType /
DropSecret
DropTable <- TableOrView IfExists? List(BaseTableName)
DropTableFunction <- 'MACRO' 'TABLE' IfExists? List(TableFunctionName)
DropFunction <- FunctionType IfExists? List(FunctionIdentifier)
DropSchema <- 'SCHEMA' IfExists? List(QualifiedSchemaName)
DropIndex <- 'INDEX' IfExists? List(QualifiedIndexName)
QualifiedIndexName <- CatalogQualification? SchemaQualification? IndexName
DropSequence <- 'SEQUENCE' IfExists? List(QualifiedSequenceName)
DropCollation <- 'COLLATION' IfExists? List(CollationName)
DropType <- 'TYPE' IfExists? List(QualifiedTypeName)
DropSecret <- Temporary? 'SECRET' IfExists? SecretName DropSecretStorage?
TableOrView <- 'TABLE' / 'VIEW' / ('MATERIALIZED' 'VIEW')
FunctionType <- 'MACRO' / 'FUNCTION'
DropBehavior <- 'CASCADE' / 'RESTRICT'
IfExists <- 'IF' 'EXISTS'
QualifiedSchemaName <- CatalogQualification? SchemaName
DropSecretStorage <- 'FROM' Identifier

View File

@@ -0,0 +1 @@
ExecuteStatement <- 'EXECUTE' Identifier TableFunctionArguments?

View File

@@ -0,0 +1,3 @@
ExplainStatement <- 'EXPLAIN' 'ANALYZE'? ExplainOptions? Statement
ExplainOptions <- Parens(GenericCopyOptionList)

View File

@@ -0,0 +1,5 @@
ExportStatement <- 'EXPORT' 'DATABASE' ExportSource? StringLiteral Parens(GenericCopyOptionList)?
ExportSource <- CatalogName 'TO'
ImportStatement <- 'IMPORT' 'DATABASE' StringLiteral

View File

@@ -0,0 +1,150 @@
ColumnReference <- CatalogReservedSchemaTableColumnName / SchemaReservedTableColumnName / TableReservedColumnName / ColumnName
CatalogReservedSchemaTableColumnName <- CatalogQualification ReservedSchemaQualification ReservedTableQualification ReservedColumnName
SchemaReservedTableColumnName <- SchemaQualification ReservedTableQualification ReservedColumnName
TableReservedColumnName <- TableQualification ReservedColumnName
FunctionExpression <- FunctionIdentifier Parens(DistinctOrAll? List(FunctionArgument)? OrderByClause? IgnoreNulls?) WithinGroupClause? FilterClause? ExportClause? OverClause?
FunctionIdentifier <- CatalogReservedSchemaFunctionName / SchemaReservedFunctionName / FunctionName
CatalogReservedSchemaFunctionName <- CatalogQualification ReservedSchemaQualification? ReservedFunctionName
SchemaReservedFunctionName <- SchemaQualification ReservedFunctionName
DistinctOrAll <- 'DISTINCT' / 'ALL'
ExportClause <- 'EXPORT_STATE'
WithinGroupClause <- 'WITHIN' 'GROUP' Parens(OrderByClause)
FilterClause <- 'FILTER' Parens('WHERE'? Expression)
IgnoreNulls <- ('IGNORE' 'NULLS') / ('RESPECT' 'NULLS')
ParenthesisExpression <- Parens(List(Expression))
LiteralExpression <- StringLiteral / NumberLiteral / ConstantLiteral
ConstantLiteral <- NullLiteral / TrueLiteral / FalseLiteral
NullLiteral <- 'NULL'
TrueLiteral <- 'TRUE'
FalseLiteral <- 'FALSE'
CastExpression <- CastOrTryCast Parens(Expression 'AS' Type)
CastOrTryCast <- 'CAST' / 'TRY_CAST'
StarExpression <- (ColId '.')* '*' ExcludeList? ReplaceList? RenameList?
ExcludeList <- 'EXCLUDE' (Parens(List(ExcludeName)) / ExcludeName)
ExcludeName <- DottedIdentifier / ColIdOrString
ReplaceList <- 'REPLACE' (Parens(List(ReplaceEntry)) / ReplaceEntry)
ReplaceEntry <- Expression 'AS' ColumnReference
RenameList <- 'RENAME' (Parens(List(RenameEntry)) / RenameEntry)
RenameEntry <- ColumnReference 'AS' Identifier
SubqueryExpression <- 'NOT'? 'EXISTS'? SubqueryReference
CaseExpression <- 'CASE' Expression? CaseWhenThen CaseWhenThen* CaseElse? 'END'
CaseWhenThen <- 'WHEN' Expression 'THEN' Expression
CaseElse <- 'ELSE' Expression
TypeLiteral <- ColId StringLiteral
IntervalLiteral <- 'INTERVAL' IntervalParameter IntervalUnit?
IntervalParameter <- StringLiteral / NumberLiteral / Parens(Expression)
IntervalUnit <- ColId
FrameClause <- Framing FrameExtent WindowExcludeClause?
Framing <- 'ROWS' / 'RANGE' / 'GROUPS'
FrameExtent <- ('BETWEEN' FrameBound 'AND' FrameBound) / FrameBound
FrameBound <- ('UNBOUNDED' 'PRECEDING') / ('UNBOUNDED' 'FOLLOWING') / ('CURRENT' 'ROW') / (Expression 'PRECEDING') / (Expression 'FOLLOWING')
WindowExcludeClause <- 'EXCLUDE' WindowExcludeElement
WindowExcludeElement <- ('CURRENT' 'ROW') / 'GROUP' / 'TIES' / ('NO' 'OTHERS')
OverClause <- 'OVER' WindowFrame
WindowFrame <- WindowFrameDefinition / Identifier / Parens(Identifier)
WindowFrameDefinition <- Parens(BaseWindowName? WindowFrameContents) / Parens(WindowFrameContents)
WindowFrameContents <- WindowPartition? OrderByClause? FrameClause?
BaseWindowName <- Identifier
WindowPartition <- 'PARTITION' 'BY' List(Expression)
PrefixExpression <- PrefixOperator Expression
PrefixOperator <- 'NOT' / '-' / '+' / '~'
ListExpression <- 'ARRAY'? (BoundedListExpression / SelectStatement)
BoundedListExpression <- '[' List(Expression)? ']'
StructExpression <- '{' List(StructField)? '}'
StructField <- Expression ':' Expression
MapExpression <- 'MAP' StructExpression
GroupingExpression <- GroupingOrGroupingId Parens(List(Expression))
GroupingOrGroupingId <- 'GROUPING' / 'GROUPING_ID'
Parameter <- '?' / NumberedParameter / ColLabelParameter
NumberedParameter <- '$' NumberLiteral
ColLabelParameter <- '$' ColLabel
PositionalExpression <- '#' NumberLiteral
DefaultExpression <- 'DEFAULT'
ListComprehensionExpression <- '[' Expression 'FOR' List(Expression) ListComprehensionFilter? ']'
ListComprehensionFilter <- 'IF' Expression
SingleExpression <-
LiteralExpression /
Parameter /
SubqueryExpression /
SpecialFunctionExpression /
ParenthesisExpression /
IntervalLiteral /
TypeLiteral /
CaseExpression /
StarExpression /
CastExpression /
GroupingExpression /
MapExpression /
FunctionExpression /
ColumnReference /
PrefixExpression /
ListComprehensionExpression /
ListExpression /
StructExpression /
PositionalExpression /
DefaultExpression
OperatorLiteral <- <[\+\-\*\/\%\^\<\>\=\~\!\@\&\|\`]+>
LikeOperator <- 'NOT'? LikeOrSimilarTo
LikeOrSimilarTo <- 'LIKE' / 'ILIKE' / 'GLOB' / ('SIMILAR' 'TO')
InOperator <- 'NOT'? 'IN'
IsOperator <- 'IS' 'NOT'? DistinctFrom?
DistinctFrom <- 'DISTINCT' 'FROM'
ConjunctionOperator <- 'OR' / 'AND'
ComparisonOperator <- '=' / '<=' / '>=' / '<' / '>' / '<>' / '!=' / '=='
BetweenOperator <- 'NOT'? 'BETWEEN'
CollateOperator <- 'COLLATE'
LambdaOperator <- '->'
EscapeOperator <- 'ESCAPE'
AtTimeZoneOperator <- 'AT' 'TIME' 'ZONE'
PostfixOperator <- '!'
AnyAllOperator <- ComparisonOperator AnyOrAll
AnyOrAll <- 'ANY' / 'ALL'
Operator <-
AnyAllOperator /
ConjunctionOperator /
LikeOperator /
InOperator /
IsOperator /
BetweenOperator /
CollateOperator /
LambdaOperator /
EscapeOperator /
AtTimeZoneOperator /
OperatorLiteral
CastOperator <- '::' Type
DotOperator <- '.' (FunctionExpression / ColLabel)
NotNull <- 'NOT' 'NULL'
Indirection <- CastOperator / DotOperator / SliceExpression / NotNull / PostfixOperator
BaseExpression <- SingleExpression Indirection*
Expression <- BaseExpression RecursiveExpression*
RecursiveExpression <- (Operator Expression)
SliceExpression <- '[' SliceBound ']'
SliceBound <- Expression? (':' (Expression / '-')?)? (':' Expression?)?
SpecialFunctionExpression <- CoalesceExpression / UnpackExpression / ColumnsExpression / ExtractExpression / LambdaExpression / NullIfExpression / PositionExpression / RowExpression / SubstringExpression / TrimExpression
CoalesceExpression <- 'COALESCE' Parens(List(Expression))
UnpackExpression <- 'UNPACK' Parens(Expression)
ColumnsExpression <- '*'? 'COLUMNS' Parens(Expression)
ExtractExpression <- 'EXTRACT' Parens(Expression 'FROM' Expression)
LambdaExpression <- 'LAMBDA' List(ColIdOrString) ':' Expression
NullIfExpression <- 'NULLIF' Parens(Expression ',' Expression)
PositionExpression <- 'POSITION' Parens(Expression)
RowExpression <- 'ROW' Parens(List(Expression))
SubstringExpression <- 'SUBSTRING' Parens(SubstringParameters / List(Expression))
SubstringParameters <- Expression 'FROM' NumberLiteral 'FOR' NumberLiteral
TrimExpression <- 'TRIM' Parens(TrimDirection? TrimSource? List(Expression))
TrimDirection <- 'BOTH' / 'LEADING' / 'TRAILING'
TrimSource <- Expression? 'FROM'

View File

@@ -0,0 +1,27 @@
InsertStatement <- WithClause? 'INSERT' OrAction? 'INTO' InsertTarget ByNameOrPosition? InsertColumnList? InsertValues OnConflictClause? ReturningClause?
OrAction <- 'OR' 'REPLACE' / 'IGNORE'
ByNameOrPosition <- 'BY' 'NAME' / 'POSITION'
InsertTarget <- BaseTableName InsertAlias?
InsertAlias <- 'AS' Identifier
ColumnList <- List(ColId)
InsertColumnList <- Parens(ColumnList)
InsertValues <- SelectStatement / DefaultValues
DefaultValues <- 'DEFAULT' 'VALUES'
OnConflictClause <- 'ON' 'CONFLICT' OnConflictTarget? OnConflictAction
OnConflictTarget <- OnConflictExpressionTarget / OnConflictIndexTarget
OnConflictExpressionTarget <- Parens(List(ColId)) WhereClause?
OnConflictIndexTarget <- 'ON' 'CONSTRAINT' ConstraintName
OnConflictAction <- OnConflictUpdate / OnConflictNothing
OnConflictUpdate <- 'DO' 'UPDATE' 'SET' UpdateSetClause WhereClause?
OnConflictNothing <- 'DO' 'NOTHING'
ReturningClause <- 'RETURNING' TargetList

View File

@@ -0,0 +1,4 @@
LoadStatement <- 'LOAD' ColIdOrString
InstallStatement <- 'FORCE'? 'INSTALL' Identifier FromSource? VersionNumber?
FromSource <- 'FROM' (Identifier / StringLiteral)
VersionNumber <- Identifier

View File

@@ -0,0 +1,21 @@
MergeIntoStatement <- WithClause? 'MERGE' 'INTO' TargetOptAlias MergeIntoUsingClause MergeMatch* ReturningClause?
MergeIntoUsingClause <- 'USING' TableRef JoinQualifier
MergeMatch <- MatchedClause / NotMatchedClause
MatchedClause <- 'WHEN' 'MATCHED' AndExpression? 'THEN' MatchedClauseAction
MatchedClauseAction <- UpdateMatchClause / DeleteMatchClause / InsertMatchClause / DoNothingMatchClause / ErrorMatchClause
UpdateMatchClause <- 'UPDATE' (UpdateMatchSetClause / ByNameOrPosition?)
DeleteMatchClause <- 'DELETE'
InsertMatchClause <- 'INSERT' (InsertValuesList / DefaultValues / InsertByNameOrPosition)?
InsertByNameOrPosition <- ByNameOrPosition? '*'?
InsertValuesList <- InsertColumnList? 'VALUES' Parens(List(Expression))
DoNothingMatchClause <- 'DO' 'NOTHING'
ErrorMatchClause <- 'ERROR' Expression?
UpdateMatchSetClause <- 'SET' (UpdateSetClause / '*')
AndExpression <- 'AND' Expression
NotMatchedClause <- 'WHEN' 'NOT' 'MATCHED' BySourceOrTarget? AndExpression? 'THEN' MatchedClauseAction
BySourceOrTarget <- 'BY' ('SOURCE' / 'TARGET')

View File

@@ -0,0 +1,18 @@
PivotStatement <- PivotKeyword TableRef PivotOn? PivotUsing? GroupByClause?
PivotOn <- 'ON' PivotColumnList
PivotUsing <- 'USING' TargetList
PivotColumnList <- List(Expression)
PivotKeyword <- 'PIVOT' / 'PIVOT_WIDER'
UnpivotKeyword <- 'UNPIVOT' / 'PIVOT_LONGER'
UnpivotStatement <- UnpivotKeyword TableRef 'ON' TargetList IntoNameValues?
IntoNameValues <- 'INTO' 'NAME' ColIdOrString ValueOrValues List(Identifier)
ValueOrValues <- 'VALUE' / 'VALUES'
IncludeExcludeNulls <- ('INCLUDE' / 'EXCLUDE') 'NULLS'
UnpivotHeader <- ColIdOrString / Parens(List(ColIdOrString))

View File

@@ -0,0 +1,5 @@
PragmaStatement <- 'PRAGMA' (PragmaAssign / PragmaFunction)
PragmaAssign <- SettingName '=' VariableList
PragmaFunction <- PragmaName PragmaParameters?
PragmaParameters <- List(Expression)

View File

@@ -0,0 +1,3 @@
PrepareStatement <- 'PREPARE' Identifier TypeList? 'AS' Statement
TypeList <- Parens(List(Type))

View File

@@ -0,0 +1,126 @@
SelectStatement <- SelectOrParens (SetopClause SelectStatement)* ResultModifiers
SetopClause <- ('UNION' / 'EXCEPT' / 'INTERSECT') DistinctOrAll? ByName?
ByName <- 'BY' 'NAME'
SelectOrParens <- BaseSelect / Parens(SelectStatement)
BaseSelect <- WithClause? (OptionalParensSimpleSelect / ValuesClause / DescribeStatement / TableStatement / PivotStatement / UnpivotStatement) ResultModifiers
ResultModifiers <- OrderByClause? LimitClause? OffsetClause?
TableStatement <- 'TABLE' BaseTableName
OptionalParensSimpleSelect <- Parens(SimpleSelect) / SimpleSelect
SimpleSelect <- SelectFrom WhereClause? GroupByClause? HavingClause? WindowClause? QualifyClause? SampleClause?
SelectFrom <- (SelectClause FromClause?) / (FromClause SelectClause?)
WithStatement <- ColIdOrString InsertColumnList? UsingKey? 'AS' Materialized? SubqueryReference
UsingKey <- 'USING' 'KEY' Parens(List(ColId))
Materialized <- 'NOT'? 'MATERIALIZED'
WithClause <- 'WITH' Recursive? List(WithStatement)
Recursive <- 'RECURSIVE'
SelectClause <- 'SELECT' DistinctClause? TargetList
TargetList <- List(AliasedExpression)
ColumnAliases <- Parens(List(ColIdOrString))
DistinctClause <- ('DISTINCT' DistinctOn?) / 'ALL'
DistinctOn <- 'ON' Parens(List(Expression))
InnerTableRef <- ValuesRef / TableFunction / TableSubquery / BaseTableRef / ParensTableRef
TableRef <- InnerTableRef JoinOrPivot* TableAlias?
TableSubquery <- Lateral? SubqueryReference TableAlias?
BaseTableRef <- TableAliasColon? BaseTableName TableAlias? AtClause?
TableAliasColon <- ColIdOrString ':'
ValuesRef <- ValuesClause TableAlias?
ParensTableRef <- TableAliasColon? Parens(TableRef)
JoinOrPivot <- JoinClause / TablePivotClause / TableUnpivotClause
TablePivotClause <- 'PIVOT' Parens(TargetList 'FOR' PivotValueLists GroupByClause?) TableAlias?
TableUnpivotClause <- 'UNPIVOT' IncludeExcludeNulls? Parens(UnpivotHeader 'FOR' PivotValueLists) TableAlias?
PivotHeader <- BaseExpression
PivotValueLists <- PivotValueList PivotValueList*
PivotValueList <- PivotHeader 'IN' PivotTargetList
PivotTargetList <- Identifier / Parens(TargetList)
Lateral <- 'LATERAL'
BaseTableName <- CatalogReservedSchemaTable / SchemaReservedTable / TableName
SchemaReservedTable <- SchemaQualification ReservedTableName
CatalogReservedSchemaTable <- CatalogQualification ReservedSchemaQualification ReservedTableName
TableFunction <- TableFunctionLateralOpt / TableFunctionAliasColon
TableFunctionLateralOpt <- Lateral? QualifiedTableFunction TableFunctionArguments WithOrdinality? TableAlias?
TableFunctionAliasColon <- TableAliasColon QualifiedTableFunction TableFunctionArguments WithOrdinality?
WithOrdinality <- 'WITH' 'ORDINALITY'
QualifiedTableFunction <- CatalogQualification? SchemaQualification? TableFunctionName
TableFunctionArguments <- Parens(List(FunctionArgument)?)
FunctionArgument <- NamedParameter / Expression
NamedParameter <- TypeName Type? NamedParameterAssignment Expression
NamedParameterAssignment <- ':=' / '=>'
TableAlias <- 'AS'? (Identifier / StringLiteral) ColumnAliases?
AtClause <- 'AT' Parens(AtSpecifier)
AtSpecifier <- AtUnit '=>' Expression
AtUnit <- 'VERSION' / 'TIMESTAMP'
JoinClause <- RegularJoinClause / JoinWithoutOnClause
RegularJoinClause <- 'ASOF'? JoinType? 'JOIN' TableRef JoinQualifier
JoinWithoutOnClause <- JoinPrefix 'JOIN' TableRef
JoinQualifier <- OnClause / UsingClause
OnClause <- 'ON' Expression
UsingClause <- 'USING' Parens(List(ColumnName))
OuterJoinType <- 'FULL' / 'LEFT' / 'RIGHT'
JoinType <- (OuterJoinType 'OUTER'?) / 'SEMI' / 'ANTI' / 'INNER'
JoinPrefix <- 'CROSS' / ('NATURAL' JoinType?) / 'POSITIONAL'
FromClause <- 'FROM' List(TableRef)
WhereClause <- 'WHERE' Expression
GroupByClause <- 'GROUP' 'BY' GroupByExpressions
HavingClause <- 'HAVING' Expression
QualifyClause <- 'QUALIFY' Expression
SampleClause <- (TableSample / UsingSample) SampleEntry
UsingSample <- 'USING' 'SAMPLE'
TableSample <- 'TABLESAMPLE'
WindowClause <- 'WINDOW' List(WindowDefinition)
WindowDefinition <- Identifier 'AS' WindowFrameDefinition
SampleEntry <- SampleEntryFunction / SampleEntryCount
SampleEntryCount <- SampleCount Parens(SampleProperties)?
SampleEntryFunction <- SampleFunction? Parens(SampleCount) RepeatableSample?
SampleFunction <- ColId
SampleProperties <- ColId (',' NumberLiteral)?
RepeatableSample <- 'REPEATABLE' Parens(NumberLiteral)
SampleCount <- Expression SampleUnit?
SampleUnit <- '%' / 'PERCENT' / 'ROWS'
GroupByExpressions <- GroupByList / 'ALL'
GroupByList <- List(GroupByExpression)
GroupByExpression <- EmptyGroupingItem / CubeOrRollupClause / GroupingSetsClause / Expression
EmptyGroupingItem <- '(' ')'
CubeOrRollupClause <- CubeOrRollup Parens(List(Expression))
CubeOrRollup <- 'CUBE' / 'ROLLUP'
GroupingSetsClause <- 'GROUPING' 'SETS' Parens(GroupByList)
SubqueryReference <- Parens(SelectStatement)
OrderByExpression <- Expression DescOrAsc? NullsFirstOrLast?
DescOrAsc <- 'DESC' / 'DESCENDING' / 'ASC' / 'ASCENDING'
NullsFirstOrLast <- 'NULLS' 'FIRST' / 'LAST'
OrderByClause <- 'ORDER' 'BY' OrderByExpressions
OrderByExpressions <- List(OrderByExpression) / OrderByAll
OrderByAll <- 'ALL' DescOrAsc? NullsFirstOrLast?
LimitClause <- 'LIMIT' LimitValue
OffsetClause <- 'OFFSET' OffsetValue
LimitValue <- 'ALL' / (NumberLiteral 'PERCENT') / (Expression '%'?)
OffsetValue <- Expression RowOrRows?
RowOrRows <- 'ROW' / 'ROWS'
AliasedExpression <- (ColId ':' Expression) / (Expression 'AS' ColLabelOrString) / (Expression Identifier?)
ValuesClause <- 'VALUES' List(ValuesExpressions)
ValuesExpressions <- Parens(List(Expression))

View File

@@ -0,0 +1,19 @@
SetStatement <- 'SET' (StandardAssignment / SetTimeZone)
StandardAssignment <- (SetVariable / SetSetting) SetAssignment
SetTimeZone <- 'TIME' 'ZONE' Expression
SetSetting <- SettingScope? SettingName
SetVariable <- VariableScope Identifier
VariableScope <- 'VARIABLE'
SettingScope <- LocalScope / SessionScope / GlobalScope
LocalScope <- 'LOCAL'
SessionScope <- 'SESSION'
GlobalScope <- 'GLOBAL'
SetAssignment <- VariableAssign VariableList
VariableAssign <- '=' / 'TO'
VariableList <- List(Expression)
ResetStatement <- 'RESET' (SetVariable / SetSetting)

View File

@@ -0,0 +1,11 @@
TransactionStatement <- BeginTransaction / RollbackTransaction / CommitTransaction
BeginTransaction <- StartOrBegin Transaction? ReadOrWrite?
RollbackTransaction <- AbortOrRollback Transaction?
CommitTransaction <- CommitOrEnd Transaction?
StartOrBegin <- 'START' / 'BEGIN'
Transaction <- 'WORK' / 'TRANSACTION'
ReadOrWrite <- 'READ' ('ONLY' / 'WRITE')
AbortOrRollback <- 'ABORT' / 'ROLLBACK'
CommitOrEnd <- 'COMMIT' / 'END'

View File

@@ -0,0 +1,6 @@
UpdateStatement <- WithClause? 'UPDATE' UpdateTarget UpdateSetClause FromClause? WhereClause? ReturningClause?
UpdateTarget <- (BaseTableName 'SET') / (BaseTableName UpdateAlias? 'SET')
UpdateAlias <- 'AS'? ColId
UpdateSetClause <- List(UpdateSetElement) / (Parens(List(ColumnName)) '=' Expression)
UpdateSetElement <- ColumnName '=' Expression

View File

@@ -0,0 +1,3 @@
UseStatement <- 'USE' UseTarget
UseTarget <- (CatalogName '.' ReservedSchemaName) / SchemaName / CatalogName

View File

@@ -0,0 +1,12 @@
VacuumStatement <- 'VACUUM' (VacuumLegacyOptions AnalyzeStatement / VacuumLegacyOptions QualifiedTarget / VacuumLegacyOptions / VacuumParensOptions QualifiedTarget?)?
VacuumLegacyOptions <- OptFull OptFreeze OptVerbose
VacuumParensOptions <- Parens(List(VacuumOption))
VacuumOption <- 'ANALYZE' / 'VERBOSE' / 'FREEZE' / 'FULL' / Identifier
OptFull <- 'FULL'?
OptFreeze <- 'FREEZE'?
OptVerbose <- 'VERBOSE'?
QualifiedTarget <- QualifiedName OptNameList
OptNameList <- Parens(List(Name))?

View File

@@ -0,0 +1,13 @@
#pragma once
#include "duckdb/common/enums/set_scope.hpp"
#include "duckdb/common/string.hpp"
namespace duckdb {
struct SettingInfo {
string name;
SetScope scope = SetScope::AUTOMATIC; // Default value is defined here
};
} // namespace duckdb

View File

@@ -0,0 +1,22 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// autocomplete_extension.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb.hpp"
namespace duckdb {
class AutocompleteExtension : public Extension {
public:
void Load(ExtensionLoader &loader) override;
std::string Name() override;
std::string Version() const override;
};
} // namespace duckdb

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
#pragma once
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/common/string.hpp"
namespace duckdb {
enum class PEGKeywordCategory : uint8_t {
KEYWORD_NONE,
KEYWORD_UNRESERVED,
KEYWORD_RESERVED,
KEYWORD_TYPE_FUNC,
KEYWORD_COL_NAME
};
class PEGKeywordHelper {
public:
static PEGKeywordHelper &Instance();
bool KeywordCategoryType(const string &text, PEGKeywordCategory type) const;
void InitializeKeywordMaps();
private:
PEGKeywordHelper();
bool initialized;
case_insensitive_set_t reserved_keyword_map;
case_insensitive_set_t unreserved_keyword_map;
case_insensitive_set_t colname_keyword_map;
case_insensitive_set_t typefunc_keyword_map;
};
} // namespace duckdb

View File

@@ -0,0 +1,185 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// matcher.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/reference_map.hpp"
#include "transformer/parse_result.hpp"
namespace duckdb {
class ParseResultAllocator;
class Matcher;
class MatcherAllocator;
enum class SuggestionState : uint8_t {
SUGGEST_KEYWORD,
SUGGEST_CATALOG_NAME,
SUGGEST_SCHEMA_NAME,
SUGGEST_TABLE_NAME,
SUGGEST_TYPE_NAME,
SUGGEST_COLUMN_NAME,
SUGGEST_FILE_NAME,
SUGGEST_VARIABLE,
SUGGEST_SCALAR_FUNCTION_NAME,
SUGGEST_TABLE_FUNCTION_NAME,
SUGGEST_PRAGMA_NAME,
SUGGEST_SETTING_NAME,
SUGGEST_RESERVED_VARIABLE
};
enum class CandidateType { KEYWORD, IDENTIFIER, LITERAL };
struct AutoCompleteCandidate {
// NOLINTNEXTLINE: allow implicit conversion from string
AutoCompleteCandidate(string candidate_p, int32_t score_bonus = 0,
CandidateType candidate_type = CandidateType::IDENTIFIER)
: candidate(std::move(candidate_p)), score_bonus(score_bonus), candidate_type(candidate_type) {
}
// NOLINTNEXTLINE: allow implicit conversion from const char*
AutoCompleteCandidate(const char *candidate_p, int32_t score_bonus = 0,
CandidateType candidate_type = CandidateType::IDENTIFIER)
: AutoCompleteCandidate(string(candidate_p), score_bonus, candidate_type) {
}
string candidate;
//! The higher the score bonus, the more likely this candidate will be chosen
int32_t score_bonus;
//! The type of candidate we are suggesting - this modifies how we handle quoting/case sensitivity
CandidateType candidate_type;
//! Extra char to push at the back
char extra_char = '\0';
//! Suggestion position
idx_t suggestion_pos = 0;
};
struct AutoCompleteSuggestion {
AutoCompleteSuggestion(string text_p, idx_t pos) : text(std::move(text_p)), pos(pos) {
}
string text;
idx_t pos;
};
enum class MatchResultType { SUCCESS, FAIL };
enum class SuggestionType { OPTIONAL, MANDATORY };
enum class TokenType { WORD };
struct MatcherToken {
// NOLINTNEXTLINE: allow implicit conversion from text
MatcherToken(string text_p, idx_t offset_p) : text(std::move(text_p)), offset(offset_p) {
length = text.length();
}
TokenType type = TokenType::WORD;
string text;
idx_t offset = 0;
idx_t length = 0;
};
struct MatcherSuggestion {
// NOLINTNEXTLINE: allow implicit conversion from auto-complete candidate
MatcherSuggestion(AutoCompleteCandidate keyword_p)
: keyword(std::move(keyword_p)), type(SuggestionState::SUGGEST_KEYWORD) {
}
// NOLINTNEXTLINE: allow implicit conversion from suggestion state
MatcherSuggestion(SuggestionState type, char extra_char = '\0') : keyword(""), type(type), extra_char(extra_char) {
}
//! Literal suggestion
AutoCompleteCandidate keyword;
SuggestionState type;
char extra_char = '\0';
};
struct MatchState {
MatchState(vector<MatcherToken> &tokens, vector<MatcherSuggestion> &suggestions, ParseResultAllocator &allocator)
: tokens(tokens), suggestions(suggestions), token_index(0), allocator(allocator) {
}
MatchState(MatchState &state)
: tokens(state.tokens), suggestions(state.suggestions), token_index(state.token_index),
allocator(state.allocator) {
}
vector<MatcherToken> &tokens;
vector<MatcherSuggestion> &suggestions;
reference_set_t<const Matcher> added_suggestions;
idx_t token_index;
ParseResultAllocator &allocator;
void AddSuggestion(MatcherSuggestion suggestion);
};
enum class MatcherType { KEYWORD, LIST, OPTIONAL, CHOICE, REPEAT, VARIABLE, STRING_LITERAL, NUMBER_LITERAL, OPERATOR };
class Matcher {
public:
explicit Matcher(MatcherType type) : type(type) {
}
virtual ~Matcher() = default;
//! Match
virtual MatchResultType Match(MatchState &state) const = 0;
virtual optional_ptr<ParseResult> MatchParseResult(MatchState &state) const = 0;
virtual SuggestionType AddSuggestion(MatchState &state) const;
virtual SuggestionType AddSuggestionInternal(MatchState &state) const = 0;
virtual string ToString() const = 0;
void Print() const;
static Matcher &RootMatcher(MatcherAllocator &allocator);
MatcherType Type() const {
return type;
}
void SetName(string name_p) {
name = std::move(name_p);
}
string GetName() const;
public:
template <class TARGET>
TARGET &Cast() {
if (type != TARGET::TYPE) {
throw InternalException("Failed to cast matcher to type - matcher type mismatch");
}
return reinterpret_cast<TARGET &>(*this);
}
template <class TARGET>
const TARGET &Cast() const {
if (type != TARGET::TYPE) {
throw InternalException("Failed to cast matcher to type - matcher type mismatch");
}
return reinterpret_cast<const TARGET &>(*this);
}
protected:
MatcherType type;
string name;
};
class MatcherAllocator {
public:
Matcher &Allocate(unique_ptr<Matcher> matcher);
private:
vector<unique_ptr<Matcher>> matchers;
};
class ParseResultAllocator {
public:
optional_ptr<ParseResult> Allocate(unique_ptr<ParseResult> parse_result);
private:
vector<unique_ptr<ParseResult>> parse_results;
};
} // namespace duckdb

View File

@@ -0,0 +1,66 @@
#pragma once
#include "duckdb/common/case_insensitive_map.hpp"
#include "duckdb/common/string_map_set.hpp"
namespace duckdb {
enum class PEGRuleType {
LITERAL, // literal rule ('Keyword'i)
REFERENCE, // reference to another rule (Rule)
OPTIONAL, // optional rule (Rule?)
OR, // or rule (Rule1 / Rule2)
REPEAT // repeat rule (Rule1*
};
enum class PEGTokenType {
LITERAL, // literal token ('Keyword'i)
REFERENCE, // reference token (Rule)
OPERATOR, // operator token (/ or )
FUNCTION_CALL, // start of function call (i.e. Function(...))
REGEX // regular expression ([ \t\n\r] or <[a-z_]i[a-z0-9_]i>)
};
struct PEGToken {
PEGTokenType type;
string_t text;
};
struct PEGRule {
string_map_t<idx_t> parameters;
vector<PEGToken> tokens;
void Clear() {
parameters.clear();
tokens.clear();
}
};
struct PEGParser {
public:
void ParseRules(const char *grammar);
void AddRule(string_t rule_name, PEGRule rule);
case_insensitive_map_t<PEGRule> rules;
};
enum class PEGParseState {
RULE_NAME, // Rule name
RULE_SEPARATOR, // look for <-
RULE_DEFINITION // part of rule definition
};
inline bool IsPEGOperator(char c) {
switch (c) {
case '/':
case '?':
case '(':
case ')':
case '*':
case '!':
return true;
default:
return false;
}
}
} // namespace duckdb

View File

@@ -0,0 +1,54 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// tokenizer.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "matcher.hpp"
namespace duckdb {
enum class TokenizeState {
STANDARD = 0,
SINGLE_LINE_COMMENT,
MULTI_LINE_COMMENT,
QUOTED_IDENTIFIER,
STRING_LITERAL,
KEYWORD,
NUMERIC,
OPERATOR,
DOLLAR_QUOTED_STRING
};
class BaseTokenizer {
public:
BaseTokenizer(const string &sql, vector<MatcherToken> &tokens);
virtual ~BaseTokenizer() = default;
public:
void PushToken(idx_t start, idx_t end);
bool TokenizeInput();
virtual void OnStatementEnd(idx_t pos);
virtual void OnLastToken(TokenizeState state, string last_word, idx_t last_pos) = 0;
bool IsSpecialOperator(idx_t pos, idx_t &op_len) const;
static bool IsSingleByteOperator(char c);
static bool CharacterIsInitialNumber(char c);
static bool CharacterIsNumber(char c);
static bool CharacterIsControlFlow(char c);
static bool CharacterIsKeyword(char c);
static bool CharacterIsOperator(char c);
bool IsValidDollarTagCharacter(char c);
protected:
const string &sql;
vector<MatcherToken> &tokens;
};
} // namespace duckdb

View File

@@ -0,0 +1,325 @@
#pragma once
#include "duckdb/common/arena_linked_list.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/common/types/string_type.hpp"
#include "duckdb/parser/parsed_expression.hpp"
namespace duckdb {
class PEGTransformer; // Forward declaration
enum class ParseResultType : uint8_t {
LIST,
OPTIONAL,
REPEAT,
CHOICE,
EXPRESSION,
IDENTIFIER,
KEYWORD,
OPERATOR,
STATEMENT,
EXTENSION,
NUMBER,
STRING,
INVALID
};
inline const char *ParseResultToString(ParseResultType type) {
switch (type) {
case ParseResultType::LIST:
return "LIST";
case ParseResultType::OPTIONAL:
return "OPTIONAL";
case ParseResultType::REPEAT:
return "REPEAT";
case ParseResultType::CHOICE:
return "CHOICE";
case ParseResultType::EXPRESSION:
return "EXPRESSION";
case ParseResultType::IDENTIFIER:
return "IDENTIFIER";
case ParseResultType::KEYWORD:
return "KEYWORD";
case ParseResultType::OPERATOR:
return "OPERATOR";
case ParseResultType::STATEMENT:
return "STATEMENT";
case ParseResultType::EXTENSION:
return "EXTENSION";
case ParseResultType::NUMBER:
return "NUMBER";
case ParseResultType::STRING:
return "STRING";
case ParseResultType::INVALID:
return "INVALID";
}
return "INVALID";
}
class ParseResult {
public:
explicit ParseResult(ParseResultType type) : type(type) {
}
virtual ~ParseResult() = default;
template <class TARGET>
TARGET &Cast() {
if (TARGET::TYPE != ParseResultType::INVALID && type != TARGET::TYPE) {
throw InternalException("Failed to cast parse result of type %s to type %s for rule %s",
ParseResultToString(TARGET::TYPE), ParseResultToString(type), name);
}
return reinterpret_cast<TARGET &>(*this);
}
ParseResultType type;
string name;
virtual void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const {
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
}
// The public entry point
std::string ToString() const {
std::stringstream ss;
std::unordered_set<const ParseResult *> visited;
// The root is always the "last" element at its level
ToStringInternal(ss, visited, "", true);
return ss.str();
}
};
struct IdentifierParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::IDENTIFIER;
string identifier;
explicit IdentifierParseResult(string identifier_p) : ParseResult(TYPE), identifier(std::move(identifier_p)) {
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << identifier << "\"\n";
}
};
struct KeywordParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::KEYWORD;
string keyword;
explicit KeywordParseResult(string keyword_p) : ParseResult(TYPE), keyword(std::move(keyword_p)) {
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << keyword << "\"\n";
}
};
struct ListParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::LIST;
public:
explicit ListParseResult(vector<optional_ptr<ParseResult>> results_p, string name_p)
: ParseResult(TYPE), children(std::move(results_p)) {
name = name_p;
}
vector<optional_ptr<ParseResult>> GetChildren() const {
return children;
}
optional_ptr<ParseResult> GetChild(idx_t index) {
if (index >= children.size()) {
throw InternalException("Child index out of bounds");
}
return children[index];
}
template <class T>
T &Child(idx_t index) {
auto child_ptr = GetChild(index);
return child_ptr->Cast<T>();
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ss << indent << (is_last ? "└─" : "├─");
if (visited.count(this)) {
ss << " List (" << name << ") [... already printed ...]\n";
return;
}
visited.insert(this);
ss << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
ss << " [" << children.size() << " children]\n";
std::string child_indent = indent + (is_last ? " " : "");
for (size_t i = 0; i < children.size(); ++i) {
if (children[i]) {
children[i]->ToStringInternal(ss, visited, child_indent, i == children.size() - 1);
} else {
ss << child_indent << (i == children.size() - 1 ? "└─" : "├─") << " [nullptr]\n";
}
}
}
private:
vector<optional_ptr<ParseResult>> children;
};
struct RepeatParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::REPEAT;
vector<optional_ptr<ParseResult>> children;
explicit RepeatParseResult(vector<optional_ptr<ParseResult>> results_p)
: ParseResult(TYPE), children(std::move(results_p)) {
}
template <class T>
T &Child(idx_t index) {
if (index >= children.size()) {
throw InternalException("Child index out of bounds");
}
return children[index]->Cast<T>();
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ss << indent << (is_last ? "└─" : "├─");
if (visited.count(this)) {
ss << " Repeat (" << name << ") [... already printed ...]\n";
return;
}
visited.insert(this);
ss << " " << ParseResultToString(type);
if (!name.empty()) {
ss << " (" << name << ")";
}
ss << " [" << children.size() << " children]\n";
std::string child_indent = indent + (is_last ? " " : "");
for (size_t i = 0; i < children.size(); ++i) {
if (children[i]) {
children[i]->ToStringInternal(ss, visited, child_indent, i == children.size() - 1);
} else {
ss << child_indent << (i == children.size() - 1 ? "└─" : "├─") << " [nullptr]\n";
}
}
}
};
struct OptionalParseResult : ParseResult {
static constexpr ParseResultType TYPE = ParseResultType::OPTIONAL;
optional_ptr<ParseResult> optional_result;
explicit OptionalParseResult() : ParseResult(TYPE), optional_result(nullptr) {
}
explicit OptionalParseResult(optional_ptr<ParseResult> result_p) : ParseResult(TYPE), optional_result(result_p) {
name = result_p->name;
}
bool HasResult() const {
return optional_result != nullptr;
}
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
if (HasResult()) {
// The optional node has a value, so we "collapse" it by just printing its child.
// We pass the same indentation and is_last status, so it takes the place of the Optional node.
optional_result->ToStringInternal(ss, visited, indent, is_last);
} else {
// The optional node is empty, which is useful information, so we print it.
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type) << " [empty]\n";
}
}
};
class ChoiceParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::CHOICE;
explicit ChoiceParseResult(optional_ptr<ParseResult> parse_result_p, idx_t selected_idx_p)
: ParseResult(TYPE), result(parse_result_p), selected_idx(selected_idx_p) {
name = parse_result_p->name;
}
optional_ptr<ParseResult> result;
idx_t selected_idx;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
if (result) {
// The choice was resolved. We print a marker and then print the child below it.
ss << indent << (is_last ? "└─" : "├─") << " [" << ParseResultToString(type) << " (idx: " << selected_idx
<< ")] ->\n";
// The child is now on a new indentation level and is the only child of our marker.
std::string child_indent = indent + (is_last ? " " : "");
result->ToStringInternal(ss, visited, child_indent, true);
} else {
// The choice had no result.
ss << indent << (is_last ? "└─" : "├─") << " " << ParseResultToString(type) << " [no result]\n";
}
}
};
class NumberParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::NUMBER;
explicit NumberParseResult(string number_p) : ParseResult(TYPE), number(std::move(number_p)) {
}
string number;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": " << number << "\n";
}
};
class StringLiteralParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::STRING;
explicit StringLiteralParseResult(string string_p) : ParseResult(TYPE), result(std::move(string_p)) {
}
string result;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": \"" << result << "\"\n";
}
};
class OperatorParseResult : public ParseResult {
public:
static constexpr ParseResultType TYPE = ParseResultType::OPERATOR;
explicit OperatorParseResult(string operator_p) : ParseResult(TYPE), operator_token(std::move(operator_p)) {
}
string operator_token;
void ToStringInternal(std::stringstream &ss, std::unordered_set<const ParseResult *> &visited,
const std::string &indent, bool is_last) const override {
ParseResult::ToStringInternal(ss, visited, indent, is_last);
ss << ": " << operator_token << "\n";
}
};
} // namespace duckdb

View File

@@ -0,0 +1,208 @@
#pragma once
#include "tokenizer.hpp"
#include "parse_result.hpp"
#include "transform_enum_result.hpp"
#include "transform_result.hpp"
#include "ast/setting_info.hpp"
#include "duckdb/function/macro_function.hpp"
#include "duckdb/parser/expression/case_expression.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
#include "duckdb/parser/expression/parameter_expression.hpp"
#include "duckdb/parser/expression/window_expression.hpp"
#include "duckdb/parser/parsed_data/create_type_info.hpp"
#include "duckdb/parser/parsed_data/transaction_info.hpp"
#include "duckdb/parser/statement/copy_database_statement.hpp"
#include "duckdb/parser/statement/set_statement.hpp"
#include "duckdb/parser/statement/create_statement.hpp"
#include "duckdb/parser/tableref/basetableref.hpp"
#include "parser/peg_parser.hpp"
#include "duckdb/storage/arena_allocator.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/statement/drop_statement.hpp"
#include "duckdb/parser/statement/insert_statement.hpp"
namespace duckdb {
// Forward declare
struct QualifiedName;
struct MatcherToken;
struct PEGTransformerState {
explicit PEGTransformerState(const vector<MatcherToken> &tokens_p) : tokens(tokens_p), token_index(0) {
}
const vector<MatcherToken> &tokens;
idx_t token_index;
};
class PEGTransformer {
public:
using AnyTransformFunction =
std::function<unique_ptr<TransformResultValue>(PEGTransformer &, optional_ptr<ParseResult>)>;
PEGTransformer(ArenaAllocator &allocator, PEGTransformerState &state,
const case_insensitive_map_t<AnyTransformFunction> &transform_functions,
const case_insensitive_map_t<PEGRule> &grammar_rules,
const case_insensitive_map_t<unique_ptr<TransformEnumValue>> &enum_mappings)
: allocator(allocator), state(state), grammar_rules(grammar_rules), transform_functions(transform_functions),
enum_mappings(enum_mappings) {
}
public:
template <typename T>
T Transform(optional_ptr<ParseResult> parse_result) {
auto it = transform_functions.find(parse_result->name);
if (it == transform_functions.end()) {
throw NotImplementedException("No transformer function found for rule '%s'", parse_result->name);
}
auto &func = it->second;
unique_ptr<TransformResultValue> base_result = func(*this, parse_result);
if (!base_result) {
throw InternalException("Transformer for rule '%s' returned a nullptr.", parse_result->name);
}
auto *typed_result_ptr = dynamic_cast<TypedTransformResult<T> *>(base_result.get());
if (!typed_result_ptr) {
throw InternalException("Transformer for rule '" + parse_result->name + "' returned an unexpected type.");
}
return std::move(typed_result_ptr->value);
}
template <typename T>
T Transform(ListParseResult &parse_result, idx_t child_index) {
auto child_parse_result = parse_result.GetChild(child_index);
return Transform<T>(child_parse_result);
}
template <typename T>
T TransformEnum(optional_ptr<ParseResult> parse_result) {
auto enum_rule_name = parse_result->name;
auto rule_value = enum_mappings.find(enum_rule_name);
if (rule_value == enum_mappings.end()) {
throw ParserException("Enum transform failed: could not find mapping for '%s'", enum_rule_name);
}
auto *typed_enum_ptr = dynamic_cast<TypedTransformEnumResult<T> *>(rule_value->second.get());
if (!typed_enum_ptr) {
throw InternalException("Enum mapping for rule '%s' has an unexpected type.", enum_rule_name);
}
return typed_enum_ptr->value;
}
template <typename T>
void TransformOptional(ListParseResult &list_pr, idx_t child_idx, T &target) {
auto &opt = list_pr.Child<OptionalParseResult>(child_idx);
if (opt.HasResult()) {
target = Transform<T>(opt.optional_result);
}
}
// Make overloads return raw pointers, as ownership is handled by the ArenaAllocator.
template <class T, typename... Args>
T *Make(Args &&...args) {
return allocator.Make<T>(std::forward<Args>(args)...);
}
void ClearParameters();
static void ParamTypeCheck(PreparedParamType last_type, PreparedParamType new_type);
void SetParam(const string &name, idx_t index, PreparedParamType type);
bool GetParam(const string &name, idx_t &index, PreparedParamType type);
public:
ArenaAllocator &allocator;
PEGTransformerState &state;
const case_insensitive_map_t<PEGRule> &grammar_rules;
const case_insensitive_map_t<AnyTransformFunction> &transform_functions;
const case_insensitive_map_t<unique_ptr<TransformEnumValue>> &enum_mappings;
case_insensitive_map_t<idx_t> named_parameter_map;
idx_t prepared_statement_parameter_index = 0;
PreparedParamType last_param_type = PreparedParamType::INVALID;
};
class PEGTransformerFactory {
public:
static PEGTransformerFactory &GetInstance();
explicit PEGTransformerFactory();
static unique_ptr<SQLStatement> Transform(vector<MatcherToken> &tokens, const char *root_rule = "Statement");
private:
template <typename T>
void RegisterEnum(const string &rule_name, T value) {
auto existing_rule = enum_mappings.find(rule_name);
if (existing_rule != enum_mappings.end()) {
throw InternalException("EnumRule %s already exists", rule_name);
}
enum_mappings[rule_name] = make_uniq<TypedTransformEnumResult<T>>(value);
}
template <class FUNC>
void Register(const string &rule_name, FUNC function) {
auto existing_rule = sql_transform_functions.find(rule_name);
if (existing_rule != sql_transform_functions.end()) {
throw InternalException("Rule %s already exists", rule_name);
}
sql_transform_functions[rule_name] =
[function](PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) -> unique_ptr<TransformResultValue> {
auto result_value = function(transformer, parse_result);
return make_uniq<TypedTransformResult<decltype(result_value)>>(std::move(result_value));
};
}
PEGTransformerFactory(const PEGTransformerFactory &) = delete;
static unique_ptr<SQLStatement> TransformStatement(PEGTransformer &, optional_ptr<ParseResult> list);
// common.gram
static unique_ptr<ParsedExpression> TransformNumberLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static string TransformStringLiteral(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
// expression.gram
static unique_ptr<ParsedExpression> TransformBaseExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformConstantLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformLiteralExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<ParsedExpression> TransformSingleExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
// use.gram
static unique_ptr<SQLStatement> TransformUseStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static QualifiedName TransformUseTarget(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
// set.gram
static unique_ptr<SQLStatement> TransformResetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static vector<unique_ptr<ParsedExpression>> TransformSetAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static SettingInfo TransformSetSetting(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
static unique_ptr<SQLStatement> TransformSetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static unique_ptr<SQLStatement> TransformSetTimeZone(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static SettingInfo TransformSetVariable(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result);
static unique_ptr<SetVariableStatement> TransformStandardAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
static vector<unique_ptr<ParsedExpression>> TransformVariableList(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result);
//! Helper functions
static vector<optional_ptr<ParseResult>> ExtractParseResultsFromList(optional_ptr<ParseResult> parse_result);
private:
PEGParser parser;
case_insensitive_map_t<PEGTransformer::AnyTransformFunction> sql_transform_functions;
case_insensitive_map_t<unique_ptr<TransformEnumValue>> enum_mappings;
};
} // namespace duckdb

View File

@@ -0,0 +1,15 @@
#pragma once
namespace duckdb {
struct TransformEnumValue {
virtual ~TransformEnumValue() = default;
};
template <class T>
struct TypedTransformEnumResult : public TransformEnumValue {
explicit TypedTransformEnumResult(T value_p) : value(std::move(value_p)) {
}
T value;
};
} // namespace duckdb

View File

@@ -0,0 +1,16 @@
#pragma once
namespace duckdb {
struct TransformResultValue {
virtual ~TransformResultValue() = default;
};
template <class T>
struct TypedTransformResult : public TransformResultValue {
explicit TypedTransformResult(T value_p) : value(std::move(value_p)) {
}
T value;
};
} // namespace duckdb

View File

@@ -0,0 +1,167 @@
import os
import argparse
from pathlib import Path
parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files')
parser.add_argument(
'--print', action='store_true', help='Print the grammar instead of writing to a file', default=False
)
parser.add_argument(
'--grammar-file',
action='store_true',
help='Write the grammar to a .gram file instead of a C++ header',
default=False,
)
args = parser.parse_args()
autocomplete_dir = Path(__file__).parent
statements_dir = os.path.join(autocomplete_dir, 'grammar', 'statements')
keywords_dir = os.path.join(autocomplete_dir, 'grammar', 'keywords')
target_file = os.path.join(autocomplete_dir, 'include', 'inlined_grammar.hpp')
contents = ""
# Maps filenames to string categories
FILENAME_TO_CATEGORY = {
"reserved_keyword.list": "RESERVED_KEYWORD",
"unreserved_keyword.list": "UNRESERVED_KEYWORD",
"column_name_keyword.list": "COL_NAME_KEYWORD",
"func_name_keyword.list": "TYPE_FUNC_NAME_KEYWORD",
"type_name_keyword.list": "TYPE_FUNC_NAME_KEYWORD",
}
# Maps category names to their C++ map variable names
CPP_MAP_NAMES = {
"RESERVED_KEYWORD": "reserved_keyword_map",
"UNRESERVED_KEYWORD": "unreserved_keyword_map",
"COL_NAME_KEYWORD": "colname_keyword_map",
"TYPE_FUNC_NAME_KEYWORD": "typefunc_keyword_map",
}
# Use a dictionary of sets to collect keywords for each category, preventing duplicates
keyword_sets = {category: set() for category in CPP_MAP_NAMES.keys()}
# --- Validation and Loading (largely unchanged) ---
# For validation during the loading phase
reserved_set = set()
unreserved_set = set()
def load_keywords(filepath):
with open(filepath, "r") as f:
return [line.strip().lower() for line in f if line.strip()]
for filename in os.listdir(keywords_dir):
if filename not in FILENAME_TO_CATEGORY:
continue
category = FILENAME_TO_CATEGORY[filename]
keywords = load_keywords(os.path.join(keywords_dir, filename))
for kw in keywords:
# Validation logic remains the same to enforce rules
if category == "RESERVED_KEYWORD":
if kw in reserved_set or kw in unreserved_set:
print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories")
exit(1)
reserved_set.add(kw)
elif category == "UNRESERVED_KEYWORD":
if kw in reserved_set or kw in unreserved_set:
print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories")
exit(1)
unreserved_set.add(kw)
# Add the keyword to the appropriate set
keyword_sets[category].add(kw)
# --- C++ Code Generation ---
output_path = os.path.join(autocomplete_dir, "keyword_map.cpp")
with open(output_path, "w") as f:
f.write("/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */\n")
f.write("#include \"keyword_helper.hpp\"\n\n")
f.write("namespace duckdb {\n")
f.write("void PEGKeywordHelper::InitializeKeywordMaps() { // Renamed for clarity\n")
f.write("\tif (initialized) {\n\t\treturn;\n\t};\n")
f.write("\tinitialized = true;\n\n")
# Get the total number of categories to handle the last item differently
num_categories = len(keyword_sets)
# Iterate through each category and generate code for each map
for i, (category, keywords) in enumerate(keyword_sets.items()):
cpp_map_name = CPP_MAP_NAMES[category]
f.write(f"\t// Populating {cpp_map_name}\n")
# Sort keywords for deterministic output
for kw in sorted(list(keywords)):
# Populate the C++ set with insert
f.write(f'\t{cpp_map_name}.insert("{kw}");\n')
# Add a newline for all but the last block
if i < num_categories - 1:
f.write("\n")
f.write("}\n")
f.write("} // namespace duckdb\n")
print(f"Successfully generated {output_path}")
def filename_to_upper_camel(file):
name, _ = os.path.splitext(file) # column_name_keywords
parts = name.split('_') # ['column', 'name', 'keywords']
return ''.join(p.capitalize() for p in parts)
for file in os.listdir(keywords_dir):
if not file.endswith('.list'):
continue
rule_name = filename_to_upper_camel(file)
rule = f"{rule_name} <- "
with open(os.path.join(keywords_dir, file), 'r') as f:
lines = [f"'{line.strip()}'" for line in f if line.strip()]
rule += " /\n".join(lines) + "\n"
contents += rule
for file in os.listdir(statements_dir):
if not file.endswith('.gram'):
raise Exception(f"File {file} does not end with .gram")
with open(os.path.join(statements_dir, file), 'r') as f:
contents += f.read() + "\n"
if args.print:
print(contents)
exit(0)
if args.grammar_file:
grammar_file = target_file.replace('.hpp', '.gram')
with open(grammar_file, 'w+') as f:
f.write(contents)
exit(0)
def get_grammar_bytes(contents, add_null_terminator=True):
result_text = ""
for line in contents.split('\n'):
if len(line) == 0:
continue
result_text += "\t\"" + line.replace('\\', '\\\\').replace('"', '\\"') + "\\n\"\n"
return result_text
with open(target_file, 'w+') as f:
f.write(
'''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */
#pragma once
namespace duckdb {
const char INLINED_PEG_GRAMMAR[] = {
'''
+ get_grammar_bytes(contents)
+ '''
};
} // namespace duckdb
'''
)

View File

@@ -0,0 +1,35 @@
#include "keyword_helper.hpp"
namespace duckdb {
PEGKeywordHelper &PEGKeywordHelper::Instance() {
static PEGKeywordHelper instance;
return instance;
}
PEGKeywordHelper::PEGKeywordHelper() {
InitializeKeywordMaps();
}
bool PEGKeywordHelper::KeywordCategoryType(const std::string &text, const PEGKeywordCategory type) const {
switch (type) {
case PEGKeywordCategory::KEYWORD_RESERVED: {
auto it = reserved_keyword_map.find(text);
return it != reserved_keyword_map.end();
}
case PEGKeywordCategory::KEYWORD_UNRESERVED: {
auto it = unreserved_keyword_map.find(text);
return it != unreserved_keyword_map.end();
}
case PEGKeywordCategory::KEYWORD_TYPE_FUNC: {
auto it = typefunc_keyword_map.find(text);
return it != typefunc_keyword_map.end();
}
case PEGKeywordCategory::KEYWORD_COL_NAME: {
auto it = colname_keyword_map.find(text);
return it != colname_keyword_map.end();
}
default:
return false;
}
}
} // namespace duckdb

View File

@@ -0,0 +1,513 @@
/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */
#include "keyword_helper.hpp"
namespace duckdb {
void PEGKeywordHelper::InitializeKeywordMaps() { // Renamed for clarity
if (initialized) {
return;
};
initialized = true;
// Populating reserved_keyword_map
reserved_keyword_map.insert("all");
reserved_keyword_map.insert("analyse");
reserved_keyword_map.insert("analyze");
reserved_keyword_map.insert("and");
reserved_keyword_map.insert("any");
reserved_keyword_map.insert("array");
reserved_keyword_map.insert("as");
reserved_keyword_map.insert("asc");
reserved_keyword_map.insert("asymmetric");
reserved_keyword_map.insert("both");
reserved_keyword_map.insert("case");
reserved_keyword_map.insert("cast");
reserved_keyword_map.insert("check");
reserved_keyword_map.insert("collate");
reserved_keyword_map.insert("column");
reserved_keyword_map.insert("constraint");
reserved_keyword_map.insert("create");
reserved_keyword_map.insert("default");
reserved_keyword_map.insert("deferrable");
reserved_keyword_map.insert("desc");
reserved_keyword_map.insert("describe");
reserved_keyword_map.insert("distinct");
reserved_keyword_map.insert("do");
reserved_keyword_map.insert("else");
reserved_keyword_map.insert("end");
reserved_keyword_map.insert("except");
reserved_keyword_map.insert("false");
reserved_keyword_map.insert("fetch");
reserved_keyword_map.insert("for");
reserved_keyword_map.insert("foreign");
reserved_keyword_map.insert("from");
reserved_keyword_map.insert("group");
reserved_keyword_map.insert("having");
reserved_keyword_map.insert("in");
reserved_keyword_map.insert("initially");
reserved_keyword_map.insert("intersect");
reserved_keyword_map.insert("into");
reserved_keyword_map.insert("lambda");
reserved_keyword_map.insert("lateral");
reserved_keyword_map.insert("leading");
reserved_keyword_map.insert("limit");
reserved_keyword_map.insert("not");
reserved_keyword_map.insert("null");
reserved_keyword_map.insert("offset");
reserved_keyword_map.insert("on");
reserved_keyword_map.insert("only");
reserved_keyword_map.insert("or");
reserved_keyword_map.insert("order");
reserved_keyword_map.insert("pivot");
reserved_keyword_map.insert("pivot_longer");
reserved_keyword_map.insert("pivot_wider");
reserved_keyword_map.insert("placing");
reserved_keyword_map.insert("primary");
reserved_keyword_map.insert("qualify");
reserved_keyword_map.insert("references");
reserved_keyword_map.insert("returning");
reserved_keyword_map.insert("select");
reserved_keyword_map.insert("show");
reserved_keyword_map.insert("some");
reserved_keyword_map.insert("summarize");
reserved_keyword_map.insert("symmetric");
reserved_keyword_map.insert("table");
reserved_keyword_map.insert("then");
reserved_keyword_map.insert("to");
reserved_keyword_map.insert("trailing");
reserved_keyword_map.insert("true");
reserved_keyword_map.insert("union");
reserved_keyword_map.insert("unique");
reserved_keyword_map.insert("unpivot");
reserved_keyword_map.insert("using");
reserved_keyword_map.insert("variadic");
reserved_keyword_map.insert("when");
reserved_keyword_map.insert("where");
reserved_keyword_map.insert("window");
reserved_keyword_map.insert("with");
// Populating unreserved_keyword_map
unreserved_keyword_map.insert("abort");
unreserved_keyword_map.insert("absolute");
unreserved_keyword_map.insert("access");
unreserved_keyword_map.insert("action");
unreserved_keyword_map.insert("add");
unreserved_keyword_map.insert("admin");
unreserved_keyword_map.insert("after");
unreserved_keyword_map.insert("aggregate");
unreserved_keyword_map.insert("also");
unreserved_keyword_map.insert("alter");
unreserved_keyword_map.insert("always");
unreserved_keyword_map.insert("assertion");
unreserved_keyword_map.insert("assignment");
unreserved_keyword_map.insert("attach");
unreserved_keyword_map.insert("attribute");
unreserved_keyword_map.insert("backward");
unreserved_keyword_map.insert("before");
unreserved_keyword_map.insert("begin");
unreserved_keyword_map.insert("cache");
unreserved_keyword_map.insert("call");
unreserved_keyword_map.insert("called");
unreserved_keyword_map.insert("cascade");
unreserved_keyword_map.insert("cascaded");
unreserved_keyword_map.insert("catalog");
unreserved_keyword_map.insert("centuries");
unreserved_keyword_map.insert("century");
unreserved_keyword_map.insert("chain");
unreserved_keyword_map.insert("characteristics");
unreserved_keyword_map.insert("checkpoint");
unreserved_keyword_map.insert("class");
unreserved_keyword_map.insert("close");
unreserved_keyword_map.insert("cluster");
unreserved_keyword_map.insert("comment");
unreserved_keyword_map.insert("comments");
unreserved_keyword_map.insert("commit");
unreserved_keyword_map.insert("committed");
unreserved_keyword_map.insert("compression");
unreserved_keyword_map.insert("configuration");
unreserved_keyword_map.insert("conflict");
unreserved_keyword_map.insert("connection");
unreserved_keyword_map.insert("constraints");
unreserved_keyword_map.insert("content");
unreserved_keyword_map.insert("continue");
unreserved_keyword_map.insert("conversion");
unreserved_keyword_map.insert("copy");
unreserved_keyword_map.insert("cost");
unreserved_keyword_map.insert("csv");
unreserved_keyword_map.insert("cube");
unreserved_keyword_map.insert("current");
unreserved_keyword_map.insert("cursor");
unreserved_keyword_map.insert("cycle");
unreserved_keyword_map.insert("data");
unreserved_keyword_map.insert("database");
unreserved_keyword_map.insert("day");
unreserved_keyword_map.insert("days");
unreserved_keyword_map.insert("deallocate");
unreserved_keyword_map.insert("decade");
unreserved_keyword_map.insert("decades");
unreserved_keyword_map.insert("declare");
unreserved_keyword_map.insert("defaults");
unreserved_keyword_map.insert("deferred");
unreserved_keyword_map.insert("definer");
unreserved_keyword_map.insert("delete");
unreserved_keyword_map.insert("delimiter");
unreserved_keyword_map.insert("delimiters");
unreserved_keyword_map.insert("depends");
unreserved_keyword_map.insert("detach");
unreserved_keyword_map.insert("dictionary");
unreserved_keyword_map.insert("disable");
unreserved_keyword_map.insert("discard");
unreserved_keyword_map.insert("document");
unreserved_keyword_map.insert("domain");
unreserved_keyword_map.insert("double");
unreserved_keyword_map.insert("drop");
unreserved_keyword_map.insert("each");
unreserved_keyword_map.insert("enable");
unreserved_keyword_map.insert("encoding");
unreserved_keyword_map.insert("encrypted");
unreserved_keyword_map.insert("enum");
unreserved_keyword_map.insert("error");
unreserved_keyword_map.insert("escape");
unreserved_keyword_map.insert("event");
unreserved_keyword_map.insert("exclude");
unreserved_keyword_map.insert("excluding");
unreserved_keyword_map.insert("exclusive");
unreserved_keyword_map.insert("execute");
unreserved_keyword_map.insert("explain");
unreserved_keyword_map.insert("export");
unreserved_keyword_map.insert("export_state");
unreserved_keyword_map.insert("extension");
unreserved_keyword_map.insert("extensions");
unreserved_keyword_map.insert("external");
unreserved_keyword_map.insert("family");
unreserved_keyword_map.insert("filter");
unreserved_keyword_map.insert("first");
unreserved_keyword_map.insert("following");
unreserved_keyword_map.insert("force");
unreserved_keyword_map.insert("forward");
unreserved_keyword_map.insert("function");
unreserved_keyword_map.insert("functions");
unreserved_keyword_map.insert("global");
unreserved_keyword_map.insert("grant");
unreserved_keyword_map.insert("granted");
unreserved_keyword_map.insert("groups");
unreserved_keyword_map.insert("handler");
unreserved_keyword_map.insert("header");
unreserved_keyword_map.insert("hold");
unreserved_keyword_map.insert("hour");
unreserved_keyword_map.insert("hours");
unreserved_keyword_map.insert("identity");
unreserved_keyword_map.insert("if");
unreserved_keyword_map.insert("ignore");
unreserved_keyword_map.insert("immediate");
unreserved_keyword_map.insert("immutable");
unreserved_keyword_map.insert("implicit");
unreserved_keyword_map.insert("import");
unreserved_keyword_map.insert("include");
unreserved_keyword_map.insert("including");
unreserved_keyword_map.insert("increment");
unreserved_keyword_map.insert("index");
unreserved_keyword_map.insert("indexes");
unreserved_keyword_map.insert("inherit");
unreserved_keyword_map.insert("inherits");
unreserved_keyword_map.insert("inline");
unreserved_keyword_map.insert("input");
unreserved_keyword_map.insert("insensitive");
unreserved_keyword_map.insert("insert");
unreserved_keyword_map.insert("install");
unreserved_keyword_map.insert("instead");
unreserved_keyword_map.insert("invoker");
unreserved_keyword_map.insert("isolation");
unreserved_keyword_map.insert("json");
unreserved_keyword_map.insert("key");
unreserved_keyword_map.insert("label");
unreserved_keyword_map.insert("language");
unreserved_keyword_map.insert("large");
unreserved_keyword_map.insert("last");
unreserved_keyword_map.insert("leakproof");
unreserved_keyword_map.insert("level");
unreserved_keyword_map.insert("listen");
unreserved_keyword_map.insert("load");
unreserved_keyword_map.insert("local");
unreserved_keyword_map.insert("location");
unreserved_keyword_map.insert("lock");
unreserved_keyword_map.insert("locked");
unreserved_keyword_map.insert("logged");
unreserved_keyword_map.insert("macro");
unreserved_keyword_map.insert("mapping");
unreserved_keyword_map.insert("match");
unreserved_keyword_map.insert("matched");
unreserved_keyword_map.insert("materialized");
unreserved_keyword_map.insert("maxvalue");
unreserved_keyword_map.insert("merge");
unreserved_keyword_map.insert("method");
unreserved_keyword_map.insert("microsecond");
unreserved_keyword_map.insert("microseconds");
unreserved_keyword_map.insert("millennia");
unreserved_keyword_map.insert("millennium");
unreserved_keyword_map.insert("millisecond");
unreserved_keyword_map.insert("milliseconds");
unreserved_keyword_map.insert("minute");
unreserved_keyword_map.insert("minutes");
unreserved_keyword_map.insert("minvalue");
unreserved_keyword_map.insert("mode");
unreserved_keyword_map.insert("month");
unreserved_keyword_map.insert("months");
unreserved_keyword_map.insert("move");
unreserved_keyword_map.insert("name");
unreserved_keyword_map.insert("names");
unreserved_keyword_map.insert("new");
unreserved_keyword_map.insert("next");
unreserved_keyword_map.insert("no");
unreserved_keyword_map.insert("nothing");
unreserved_keyword_map.insert("notify");
unreserved_keyword_map.insert("nowait");
unreserved_keyword_map.insert("nulls");
unreserved_keyword_map.insert("object");
unreserved_keyword_map.insert("of");
unreserved_keyword_map.insert("off");
unreserved_keyword_map.insert("oids");
unreserved_keyword_map.insert("old");
unreserved_keyword_map.insert("operator");
unreserved_keyword_map.insert("option");
unreserved_keyword_map.insert("options");
unreserved_keyword_map.insert("ordinality");
unreserved_keyword_map.insert("others");
unreserved_keyword_map.insert("over");
unreserved_keyword_map.insert("overriding");
unreserved_keyword_map.insert("owned");
unreserved_keyword_map.insert("owner");
unreserved_keyword_map.insert("parallel");
unreserved_keyword_map.insert("parser");
unreserved_keyword_map.insert("partial");
unreserved_keyword_map.insert("partition");
unreserved_keyword_map.insert("partitioned");
unreserved_keyword_map.insert("passing");
unreserved_keyword_map.insert("password");
unreserved_keyword_map.insert("percent");
unreserved_keyword_map.insert("persistent");
unreserved_keyword_map.insert("plans");
unreserved_keyword_map.insert("policy");
unreserved_keyword_map.insert("pragma");
unreserved_keyword_map.insert("preceding");
unreserved_keyword_map.insert("prepare");
unreserved_keyword_map.insert("prepared");
unreserved_keyword_map.insert("preserve");
unreserved_keyword_map.insert("prior");
unreserved_keyword_map.insert("privileges");
unreserved_keyword_map.insert("procedural");
unreserved_keyword_map.insert("procedure");
unreserved_keyword_map.insert("program");
unreserved_keyword_map.insert("publication");
unreserved_keyword_map.insert("quarter");
unreserved_keyword_map.insert("quarters");
unreserved_keyword_map.insert("quote");
unreserved_keyword_map.insert("range");
unreserved_keyword_map.insert("read");
unreserved_keyword_map.insert("reassign");
unreserved_keyword_map.insert("recheck");
unreserved_keyword_map.insert("recursive");
unreserved_keyword_map.insert("ref");
unreserved_keyword_map.insert("referencing");
unreserved_keyword_map.insert("refresh");
unreserved_keyword_map.insert("reindex");
unreserved_keyword_map.insert("relative");
unreserved_keyword_map.insert("release");
unreserved_keyword_map.insert("rename");
unreserved_keyword_map.insert("repeatable");
unreserved_keyword_map.insert("replace");
unreserved_keyword_map.insert("replica");
unreserved_keyword_map.insert("reset");
unreserved_keyword_map.insert("respect");
unreserved_keyword_map.insert("restart");
unreserved_keyword_map.insert("restrict");
unreserved_keyword_map.insert("returns");
unreserved_keyword_map.insert("revoke");
unreserved_keyword_map.insert("role");
unreserved_keyword_map.insert("rollback");
unreserved_keyword_map.insert("rollup");
unreserved_keyword_map.insert("rows");
unreserved_keyword_map.insert("rule");
unreserved_keyword_map.insert("sample");
unreserved_keyword_map.insert("savepoint");
unreserved_keyword_map.insert("schema");
unreserved_keyword_map.insert("schemas");
unreserved_keyword_map.insert("scope");
unreserved_keyword_map.insert("scroll");
unreserved_keyword_map.insert("search");
unreserved_keyword_map.insert("second");
unreserved_keyword_map.insert("seconds");
unreserved_keyword_map.insert("secret");
unreserved_keyword_map.insert("security");
unreserved_keyword_map.insert("sequence");
unreserved_keyword_map.insert("sequences");
unreserved_keyword_map.insert("serializable");
unreserved_keyword_map.insert("server");
unreserved_keyword_map.insert("session");
unreserved_keyword_map.insert("set");
unreserved_keyword_map.insert("sets");
unreserved_keyword_map.insert("share");
unreserved_keyword_map.insert("simple");
unreserved_keyword_map.insert("skip");
unreserved_keyword_map.insert("snapshot");
unreserved_keyword_map.insert("sorted");
unreserved_keyword_map.insert("source");
unreserved_keyword_map.insert("sql");
unreserved_keyword_map.insert("stable");
unreserved_keyword_map.insert("standalone");
unreserved_keyword_map.insert("start");
unreserved_keyword_map.insert("statement");
unreserved_keyword_map.insert("statistics");
unreserved_keyword_map.insert("stdin");
unreserved_keyword_map.insert("stdout");
unreserved_keyword_map.insert("storage");
unreserved_keyword_map.insert("stored");
unreserved_keyword_map.insert("strict");
unreserved_keyword_map.insert("strip");
unreserved_keyword_map.insert("subscription");
unreserved_keyword_map.insert("sysid");
unreserved_keyword_map.insert("system");
unreserved_keyword_map.insert("tables");
unreserved_keyword_map.insert("tablespace");
unreserved_keyword_map.insert("target");
unreserved_keyword_map.insert("temp");
unreserved_keyword_map.insert("template");
unreserved_keyword_map.insert("temporary");
unreserved_keyword_map.insert("text");
unreserved_keyword_map.insert("ties");
unreserved_keyword_map.insert("transaction");
unreserved_keyword_map.insert("transform");
unreserved_keyword_map.insert("trigger");
unreserved_keyword_map.insert("truncate");
unreserved_keyword_map.insert("trusted");
unreserved_keyword_map.insert("type");
unreserved_keyword_map.insert("types");
unreserved_keyword_map.insert("unbounded");
unreserved_keyword_map.insert("uncommitted");
unreserved_keyword_map.insert("unencrypted");
unreserved_keyword_map.insert("unknown");
unreserved_keyword_map.insert("unlisten");
unreserved_keyword_map.insert("unlogged");
unreserved_keyword_map.insert("until");
unreserved_keyword_map.insert("update");
unreserved_keyword_map.insert("use");
unreserved_keyword_map.insert("user");
unreserved_keyword_map.insert("vacuum");
unreserved_keyword_map.insert("valid");
unreserved_keyword_map.insert("validate");
unreserved_keyword_map.insert("validator");
unreserved_keyword_map.insert("value");
unreserved_keyword_map.insert("variable");
unreserved_keyword_map.insert("varying");
unreserved_keyword_map.insert("version");
unreserved_keyword_map.insert("view");
unreserved_keyword_map.insert("views");
unreserved_keyword_map.insert("virtual");
unreserved_keyword_map.insert("volatile");
unreserved_keyword_map.insert("week");
unreserved_keyword_map.insert("weeks");
unreserved_keyword_map.insert("whitespace");
unreserved_keyword_map.insert("within");
unreserved_keyword_map.insert("without");
unreserved_keyword_map.insert("work");
unreserved_keyword_map.insert("wrapper");
unreserved_keyword_map.insert("write");
unreserved_keyword_map.insert("xml");
unreserved_keyword_map.insert("year");
unreserved_keyword_map.insert("years");
unreserved_keyword_map.insert("yes");
unreserved_keyword_map.insert("zone");
// Populating colname_keyword_map
colname_keyword_map.insert("between");
colname_keyword_map.insert("bigint");
colname_keyword_map.insert("bit");
colname_keyword_map.insert("boolean");
colname_keyword_map.insert("char");
colname_keyword_map.insert("character");
colname_keyword_map.insert("coalesce");
colname_keyword_map.insert("columns");
colname_keyword_map.insert("dec");
colname_keyword_map.insert("decimal");
colname_keyword_map.insert("exists");
colname_keyword_map.insert("extract");
colname_keyword_map.insert("float");
colname_keyword_map.insert("generated");
colname_keyword_map.insert("grouping");
colname_keyword_map.insert("grouping_id");
colname_keyword_map.insert("inout");
colname_keyword_map.insert("int");
colname_keyword_map.insert("integer");
colname_keyword_map.insert("interval");
colname_keyword_map.insert("map");
colname_keyword_map.insert("national");
colname_keyword_map.insert("nchar");
colname_keyword_map.insert("none");
colname_keyword_map.insert("nullif");
colname_keyword_map.insert("numeric");
colname_keyword_map.insert("out");
colname_keyword_map.insert("overlay");
colname_keyword_map.insert("position");
colname_keyword_map.insert("precision");
colname_keyword_map.insert("real");
colname_keyword_map.insert("row");
colname_keyword_map.insert("setof");
colname_keyword_map.insert("smallint");
colname_keyword_map.insert("struct");
colname_keyword_map.insert("substring");
colname_keyword_map.insert("time");
colname_keyword_map.insert("timestamp");
colname_keyword_map.insert("treat");
colname_keyword_map.insert("trim");
colname_keyword_map.insert("try_cast");
colname_keyword_map.insert("values");
colname_keyword_map.insert("varchar");
colname_keyword_map.insert("xmlattributes");
colname_keyword_map.insert("xmlconcat");
colname_keyword_map.insert("xmlelement");
colname_keyword_map.insert("xmlexists");
colname_keyword_map.insert("xmlforest");
colname_keyword_map.insert("xmlnamespaces");
colname_keyword_map.insert("xmlparse");
colname_keyword_map.insert("xmlpi");
colname_keyword_map.insert("xmlroot");
colname_keyword_map.insert("xmlserialize");
colname_keyword_map.insert("xmltable");
// Populating typefunc_keyword_map
typefunc_keyword_map.insert("anti");
typefunc_keyword_map.insert("asof");
typefunc_keyword_map.insert("at");
typefunc_keyword_map.insert("authorization");
typefunc_keyword_map.insert("binary");
typefunc_keyword_map.insert("by");
typefunc_keyword_map.insert("collation");
typefunc_keyword_map.insert("columns");
typefunc_keyword_map.insert("concurrently");
typefunc_keyword_map.insert("cross");
typefunc_keyword_map.insert("freeze");
typefunc_keyword_map.insert("full");
typefunc_keyword_map.insert("generated");
typefunc_keyword_map.insert("glob");
typefunc_keyword_map.insert("ilike");
typefunc_keyword_map.insert("inner");
typefunc_keyword_map.insert("is");
typefunc_keyword_map.insert("isnull");
typefunc_keyword_map.insert("join");
typefunc_keyword_map.insert("left");
typefunc_keyword_map.insert("like");
typefunc_keyword_map.insert("map");
typefunc_keyword_map.insert("natural");
typefunc_keyword_map.insert("notnull");
typefunc_keyword_map.insert("outer");
typefunc_keyword_map.insert("overlaps");
typefunc_keyword_map.insert("positional");
typefunc_keyword_map.insert("right");
typefunc_keyword_map.insert("semi");
typefunc_keyword_map.insert("similar");
typefunc_keyword_map.insert("struct");
typefunc_keyword_map.insert("tablesample");
typefunc_keyword_map.insert("try_cast");
typefunc_keyword_map.insert("unpack");
typefunc_keyword_map.insert("verbose");
}
} // namespace duckdb

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
add_library_unity(duckdb_peg_parser OBJECT peg_parser.cpp)
set(AUTOCOMPLETE_EXTENSION_FILES
${AUTOCOMPLETE_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_peg_parser>
PARENT_SCOPE)

View File

@@ -0,0 +1,194 @@
#include "parser/peg_parser.hpp"
namespace duckdb {
void PEGParser::AddRule(string_t rule_name, PEGRule rule) {
auto entry = rules.find(rule_name.GetString());
if (entry != rules.end()) {
throw InternalException("Failed to parse grammar - duplicate rule name %s", rule_name.GetString());
}
rules.insert(make_pair(rule_name, std::move(rule)));
}
void PEGParser::ParseRules(const char *grammar) {
string_t rule_name;
PEGRule rule;
PEGParseState parse_state = PEGParseState::RULE_NAME;
idx_t bracket_count = 0;
bool in_or_clause = false;
// look for the rules
idx_t c = 0;
while (grammar[c]) {
if (grammar[c] == '#') {
// comment - ignore until EOL
while (grammar[c] && !StringUtil::CharacterIsNewline(grammar[c])) {
c++;
}
continue;
}
if (parse_state == PEGParseState::RULE_DEFINITION && StringUtil::CharacterIsNewline(grammar[c]) &&
bracket_count == 0 && !in_or_clause && !rule.tokens.empty()) {
// if we see a newline while we are parsing a rule definition we can complete the rule
AddRule(rule_name, std::move(rule));
rule_name = string_t();
rule.Clear();
// look for the subsequent rule
parse_state = PEGParseState::RULE_NAME;
c++;
continue;
}
if (StringUtil::CharacterIsSpace(grammar[c])) {
// skip whitespace
c++;
continue;
}
switch (parse_state) {
case PEGParseState::RULE_NAME: {
// look for alpha-numerics
idx_t start_pos = c;
if (grammar[c] == '%') {
// rules can start with % (%whitespace)
c++;
}
while (grammar[c] && StringUtil::CharacterIsAlphaNumeric(grammar[c])) {
c++;
}
if (c == start_pos) {
throw InternalException("Failed to parse grammar - expected an alpha-numeric rule name (pos %d)", c);
}
rule_name = string_t(grammar + start_pos, c - start_pos);
rule.Clear();
parse_state = PEGParseState::RULE_SEPARATOR;
break;
}
case PEGParseState::RULE_SEPARATOR: {
if (grammar[c] == '(') {
if (!rule.parameters.empty()) {
throw InternalException("Failed to parse grammar - multiple parameters at position %d", c);
}
// parameter
c++;
idx_t parameter_start = c;
while (grammar[c] && StringUtil::CharacterIsAlphaNumeric(grammar[c])) {
c++;
}
if (parameter_start == c) {
throw InternalException("Failed to parse grammar - expected a parameter at position %d", c);
}
rule.parameters.insert(
make_pair(string_t(grammar + parameter_start, c - parameter_start), rule.parameters.size()));
if (grammar[c] != ')') {
throw InternalException("Failed to parse grammar - expected closing bracket at position %d", c);
}
c++;
} else {
if (grammar[c] != '<' || grammar[c + 1] != '-') {
throw InternalException("Failed to parse grammar - expected a rule definition (<-) (pos %d)", c);
}
c += 2;
parse_state = PEGParseState::RULE_DEFINITION;
}
break;
}
case PEGParseState::RULE_DEFINITION: {
// we parse either:
// (1) a literal ('Keyword'i)
// (2) a rule reference (Rule)
// (3) an operator ( '(' '/' '?' '*' ')')
in_or_clause = false;
if (grammar[c] == '\'') {
// parse literal
c++;
idx_t literal_start = c;
while (grammar[c] && grammar[c] != '\'') {
if (grammar[c] == '\\') {
// escape
c++;
}
c++;
}
if (!grammar[c]) {
throw InternalException("Failed to parse grammar - did not find closing ' (pos %d)", c);
}
PEGToken token;
token.text = string_t(grammar + literal_start, c - literal_start);
token.type = PEGTokenType::LITERAL;
rule.tokens.push_back(token);
c++;
} else if (StringUtil::CharacterIsAlphaNumeric(grammar[c])) {
// alphanumeric character - this is a rule reference
idx_t rule_start = c;
while (grammar[c] && StringUtil::CharacterIsAlphaNumeric(grammar[c])) {
c++;
}
PEGToken token;
token.text = string_t(grammar + rule_start, c - rule_start);
if (grammar[c] == '(') {
// this is a function call
c++;
bracket_count++;
token.type = PEGTokenType::FUNCTION_CALL;
} else {
token.type = PEGTokenType::REFERENCE;
}
rule.tokens.push_back(token);
} else if (grammar[c] == '[' || grammar[c] == '<') {
// regular expression- [^"] or <...>
idx_t rule_start = c;
char final_char = grammar[c] == '[' ? ']' : '>';
while (grammar[c] && grammar[c] != final_char) {
if (grammar[c] == '\\') {
// handle escapes
c++;
}
if (grammar[c]) {
c++;
}
}
c++;
PEGToken token;
token.text = string_t(grammar + rule_start, c - rule_start);
token.type = PEGTokenType::REGEX;
rule.tokens.push_back(token);
} else if (IsPEGOperator(grammar[c])) {
if (grammar[c] == '(') {
bracket_count++;
} else if (grammar[c] == ')') {
if (bracket_count == 0) {
throw InternalException("Failed to parse grammar - unclosed bracket at position %d in rule %s",
c, rule_name.GetString());
}
bracket_count--;
} else if (grammar[c] == '/') {
in_or_clause = true;
}
// operator - operators are always length 1
PEGToken token;
token.text = string_t(grammar + c, 1);
token.type = PEGTokenType::OPERATOR;
rule.tokens.push_back(token);
c++;
} else {
throw InternalException("Unrecognized rule contents in rule %s (character %s)", rule_name.GetString(),
string(1, grammar[c]));
}
}
default:
break;
}
if (!grammar[c]) {
break;
}
}
if (parse_state == PEGParseState::RULE_SEPARATOR) {
throw InternalException("Failed to parse grammar - rule %s does not have a definition", rule_name.GetString());
}
if (parse_state == PEGParseState::RULE_DEFINITION) {
if (rule.tokens.empty()) {
throw InternalException("Failed to parse grammar - rule %s is empty", rule_name.GetString());
}
AddRule(rule_name, std::move(rule));
}
}
} // namespace duckdb

View File

@@ -0,0 +1,394 @@
#include "tokenizer.hpp"
#include "duckdb/common/printer.hpp"
#include "duckdb/common/string_util.hpp"
namespace duckdb {
BaseTokenizer::BaseTokenizer(const string &sql, vector<MatcherToken> &tokens) : sql(sql), tokens(tokens) {
}
static bool OperatorEquals(const char *str, const char *op, idx_t len, idx_t &op_len) {
for (idx_t i = 0; i < len; i++) {
if (str[i] != op[i]) {
return false;
}
}
op_len = len;
return true;
}
bool BaseTokenizer::IsSpecialOperator(idx_t pos, idx_t &op_len) const {
const char *op_start = sql.c_str() + pos;
if (pos + 2 < sql.size()) {
if (OperatorEquals(op_start, "->>", 3, op_len)) {
return true;
}
}
if (pos + 1 >= sql.size()) {
// 2-byte operators are out-of-bounds
return false;
}
if (OperatorEquals(op_start, "::", 2, op_len)) {
return true;
}
if (OperatorEquals(op_start, ":=", 2, op_len)) {
return true;
}
if (OperatorEquals(op_start, "->", 2, op_len)) {
return true;
}
if (OperatorEquals(op_start, "**", 2, op_len)) {
return true;
}
if (OperatorEquals(op_start, "//", 2, op_len)) {
return true;
}
return false;
}
bool BaseTokenizer::IsSingleByteOperator(char c) {
switch (c) {
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case ',':
case '?':
case '$':
case '+':
case '-':
case '#':
return true;
default:
return false;
}
}
bool BaseTokenizer::CharacterIsInitialNumber(char c) {
if (c >= '0' && c <= '9') {
return true;
}
return c == '.';
}
bool BaseTokenizer::CharacterIsNumber(char c) {
if (CharacterIsInitialNumber(c)) {
return true;
}
switch (c) {
case 'e': // exponents
case 'E':
case '-':
case '+':
case '_':
return true;
default:
return false;
}
}
bool BaseTokenizer::CharacterIsControlFlow(char c) {
switch (c) {
case '\'':
case '-':
case ';':
case '"':
case '.':
return true;
default:
return false;
}
}
bool BaseTokenizer::CharacterIsKeyword(char c) {
if (IsSingleByteOperator(c)) {
return false;
}
if (StringUtil::CharacterIsOperator(c)) {
return false;
}
if (StringUtil::CharacterIsSpace(c)) {
return false;
}
if (CharacterIsControlFlow(c)) {
return false;
}
return true;
}
bool BaseTokenizer::CharacterIsOperator(char c) {
if (IsSingleByteOperator(c)) {
return false;
}
if (CharacterIsControlFlow(c)) {
return false;
}
return StringUtil::CharacterIsOperator(c);
}
void BaseTokenizer::PushToken(idx_t start, idx_t end) {
if (start >= end) {
return;
}
string last_token = sql.substr(start, end - start);
tokens.emplace_back(std::move(last_token), start);
}
bool BaseTokenizer::IsValidDollarTagCharacter(char c) {
if (c >= 'A' && c <= 'Z') {
return true;
}
if (c >= 'a' && c <= 'z') {
return true;
}
if (c >= '\200' && c <= '\377') {
return true;
}
return false;
}
bool BaseTokenizer::TokenizeInput() {
auto state = TokenizeState::STANDARD;
idx_t last_pos = 0;
string dollar_quote_marker;
for (idx_t i = 0; i < sql.size(); i++) {
auto c = sql[i];
switch (state) {
case TokenizeState::STANDARD:
if (c == '\'') {
state = TokenizeState::STRING_LITERAL;
last_pos = i;
break;
}
if (c == '"') {
state = TokenizeState::QUOTED_IDENTIFIER;
last_pos = i;
break;
}
if (c == ';') {
// end of statement
OnStatementEnd(i);
last_pos = i + 1;
break;
}
if (c == '$') {
// Dollar-quoted string statement
if (i + 1 >= sql.size()) {
// We need more than a single dollar
break;
}
if (sql[i + 1] >= '0' && sql[i + 1] <= '9') {
// $[numeric] is a parameter, not a dollar-quoted string
break;
}
// Dollar-quoted string
last_pos = i;
// Scan until next $
idx_t next_dollar = 0;
for (idx_t idx = i + 1; idx < sql.size(); idx++) {
if (sql[idx] == '$') {
next_dollar = idx;
break;
}
if (!IsValidDollarTagCharacter(sql[idx])) {
break;
}
}
if (next_dollar == 0) {
break;
}
state = TokenizeState::DOLLAR_QUOTED_STRING;
last_pos = i;
i = next_dollar;
if (i < sql.size()) {
// Found a complete marker, store it.
idx_t marker_start = last_pos + 1;
dollar_quote_marker = string(sql.begin() + marker_start, sql.begin() + i);
}
break;
}
if (c == '-' && i + 1 < sql.size() && sql[i + 1] == '-') {
i++;
state = TokenizeState::SINGLE_LINE_COMMENT;
break;
}
if (c == '/' && i + 1 < sql.size() && sql[i + 1] == '*') {
i++;
state = TokenizeState::MULTI_LINE_COMMENT;
break;
}
if (StringUtil::CharacterIsSpace(c)) {
// space character - skip
last_pos = i + 1;
break;
}
idx_t op_len;
if (IsSpecialOperator(i, op_len)) {
// special operator - push the special operator
tokens.emplace_back(sql.substr(i, op_len), last_pos);
i += op_len - 1;
last_pos = i + 1;
break;
}
if (IsSingleByteOperator(c)) {
// single-byte operator - directly push the token
tokens.emplace_back(string(1, c), last_pos);
last_pos = i + 1;
break;
}
if (CharacterIsInitialNumber(c)) {
// parse a numeric literal
state = TokenizeState::NUMERIC;
last_pos = i;
break;
}
if (StringUtil::CharacterIsOperator(c)) {
state = TokenizeState::OPERATOR;
last_pos = i;
break;
}
state = TokenizeState::KEYWORD;
last_pos = i;
break;
case TokenizeState::NUMERIC:
// numeric literal - check if this is still numeric
if (!CharacterIsNumber(c)) {
// not a number - return to standard state
// number must END with initial number
// i.e. we accept "_" in numbers (1_1), but "1_" is tokenized as the number "1" followed by the keyword
// "_" backtrack until it does
while (!CharacterIsInitialNumber(sql[i - 1])) {
i--;
}
PushToken(last_pos, i);
state = TokenizeState::STANDARD;
last_pos = i;
i--;
}
break;
case TokenizeState::OPERATOR:
// operator literal - check if this is still an operator
if (!CharacterIsOperator(c)) {
// not an operator - return to standard state
PushToken(last_pos, i);
state = TokenizeState::STANDARD;
last_pos = i;
i--;
}
break;
case TokenizeState::KEYWORD:
// keyword - check if this is still a keyword
if (!CharacterIsKeyword(c)) {
// not a keyword - return to standard state
PushToken(last_pos, i);
state = TokenizeState::STANDARD;
last_pos = i;
i--;
}
break;
case TokenizeState::STRING_LITERAL:
if (c == '\'') {
if (i + 1 < sql.size() && sql[i + 1] == '\'') {
// escaped - skip escape
i++;
} else {
PushToken(last_pos, i + 1);
last_pos = i + 1;
state = TokenizeState::STANDARD;
}
}
break;
case TokenizeState::QUOTED_IDENTIFIER:
if (c == '"') {
if (i + 1 < sql.size() && sql[i + 1] == '"') {
// escaped - skip escape
i++;
} else {
PushToken(last_pos, i + 1);
last_pos = i + 1;
state = TokenizeState::STANDARD;
}
}
break;
case TokenizeState::SINGLE_LINE_COMMENT:
if (c == '\n' || c == '\r') {
last_pos = i + 1;
state = TokenizeState::STANDARD;
}
break;
case TokenizeState::MULTI_LINE_COMMENT:
if (c == '*' && i + 1 < sql.size() && sql[i + 1] == '/') {
i++;
last_pos = i + 1;
state = TokenizeState::STANDARD;
}
break;
case TokenizeState::DOLLAR_QUOTED_STRING: {
// Dollar-quoted string -- all that will get us out is a $[marker]$
if (c != '$') {
break;
}
if (i + 1 >= sql.size()) {
// No room for the final dollar
break;
}
// Skip to the next dollar symbol
idx_t start = i + 1;
idx_t end = start;
while (end < sql.size() && sql[end] != '$') {
end++;
}
if (end >= sql.size()) {
// No final dollar, continue as normal
break;
}
if (end - start != dollar_quote_marker.size()) {
// Length mismatch, cannot match
break;
}
if (sql.compare(start, dollar_quote_marker.size(), dollar_quote_marker) != 0) {
// marker mismatch
break;
}
// Marker found! Revert to standard state
size_t full_marker_len = dollar_quote_marker.size() + 2;
string quoted = sql.substr(last_pos, (start + dollar_quote_marker.size() + 1) - last_pos);
quoted = "'" + quoted.substr(full_marker_len, quoted.size() - 2 * full_marker_len) + "'";
tokens.emplace_back(quoted, full_marker_len);
dollar_quote_marker = string();
state = TokenizeState::STANDARD;
i = end;
last_pos = i + 1;
break;
}
default:
throw InternalException("unrecognized tokenize state");
}
}
// finished processing - check the final state
switch (state) {
case TokenizeState::STRING_LITERAL:
last_pos++;
break;
case TokenizeState::SINGLE_LINE_COMMENT:
case TokenizeState::MULTI_LINE_COMMENT:
// no suggestions in comments
return false;
default:
break;
}
string last_word = sql.substr(last_pos, sql.size() - last_pos);
OnLastToken(state, std::move(last_word), last_pos);
return true;
}
void BaseTokenizer::OnStatementEnd(idx_t pos) {
tokens.clear();
}
} // namespace duckdb

View File

@@ -0,0 +1,12 @@
add_library_unity(
duckdb_peg_transformer
OBJECT
peg_transformer.cpp
peg_transformer_factory.cpp
transform_common.cpp
transform_expression.cpp
transform_set.cpp
transform_use.cpp)
set(AUTOCOMPLETE_EXTENSION_FILES
${AUTOCOMPLETE_EXTENSION_FILES} $<TARGET_OBJECTS:duckdb_peg_transformer>
PARENT_SCOPE)

View File

@@ -0,0 +1,47 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/statement/set_statement.hpp"
#include "duckdb/common/string_util.hpp"
namespace duckdb {
void PEGTransformer::ParamTypeCheck(PreparedParamType last_type, PreparedParamType new_type) {
// Mixing positional/auto-increment and named parameters is not supported
if (last_type == PreparedParamType::INVALID) {
return;
}
if (last_type == PreparedParamType::NAMED) {
if (new_type != PreparedParamType::NAMED) {
throw NotImplementedException("Mixing named and positional parameters is not supported yet");
}
}
if (last_type != PreparedParamType::NAMED) {
if (new_type == PreparedParamType::NAMED) {
throw NotImplementedException("Mixing named and positional parameters is not supported yet");
}
}
}
bool PEGTransformer::GetParam(const string &identifier, idx_t &index, PreparedParamType type) {
ParamTypeCheck(last_param_type, type);
auto entry = named_parameter_map.find(identifier);
if (entry == named_parameter_map.end()) {
return false;
}
index = entry->second;
return true;
}
void PEGTransformer::SetParam(const string &identifier, idx_t index, PreparedParamType type) {
ParamTypeCheck(last_param_type, type);
last_param_type = type;
D_ASSERT(!named_parameter_map.count(identifier));
named_parameter_map[identifier] = index;
}
void PEGTransformer::ClearParameters() {
prepared_statement_parameter_index = 0;
named_parameter_map.clear();
}
} // namespace duckdb

View File

@@ -0,0 +1,116 @@
#include "transformer/peg_transformer.hpp"
#include "matcher.hpp"
#include "duckdb/common/to_string.hpp"
#include "duckdb/parser/sql_statement.hpp"
#include "duckdb/parser/tableref/showref.hpp"
namespace duckdb {
unique_ptr<SQLStatement> PEGTransformerFactory::TransformStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &choice_pr = list_pr.Child<ChoiceParseResult>(0);
return transformer.Transform<unique_ptr<SQLStatement>>(choice_pr.result);
}
unique_ptr<SQLStatement> PEGTransformerFactory::Transform(vector<MatcherToken> &tokens, const char *root_rule) {
string token_stream;
for (auto &token : tokens) {
token_stream += token.text + " ";
}
vector<MatcherSuggestion> suggestions;
ParseResultAllocator parse_result_allocator;
MatchState state(tokens, suggestions, parse_result_allocator);
MatcherAllocator allocator;
auto &matcher = Matcher::RootMatcher(allocator);
auto match_result = matcher.MatchParseResult(state);
if (match_result == nullptr || state.token_index < state.tokens.size()) {
// TODO(dtenwolde) add error handling
string token_list;
for (idx_t i = 0; i < tokens.size(); i++) {
if (!token_list.empty()) {
token_list += "\n";
}
if (i < 10) {
token_list += " ";
}
token_list += to_string(i) + ":" + tokens[i].text;
}
throw ParserException("Failed to parse query - did not consume all tokens (got to token %d - %s)\nTokens:\n%s",
state.token_index, tokens[state.token_index].text, token_list);
}
match_result->name = root_rule;
ArenaAllocator transformer_allocator(Allocator::DefaultAllocator());
PEGTransformerState transformer_state(tokens);
auto &factory = GetInstance();
PEGTransformer transformer(transformer_allocator, transformer_state, factory.sql_transform_functions,
factory.parser.rules, factory.enum_mappings);
auto result = transformer.Transform<unique_ptr<SQLStatement>>(match_result);
return transformer.Transform<unique_ptr<SQLStatement>>(match_result);
}
#define REGISTER_TRANSFORM(FUNCTION) Register(string(#FUNCTION).substr(9), &FUNCTION)
PEGTransformerFactory &PEGTransformerFactory::GetInstance() {
static PEGTransformerFactory instance;
return instance;
}
PEGTransformerFactory::PEGTransformerFactory() {
REGISTER_TRANSFORM(TransformStatement);
// common.gram
REGISTER_TRANSFORM(TransformNumberLiteral);
REGISTER_TRANSFORM(TransformStringLiteral);
// expression.gram
REGISTER_TRANSFORM(TransformBaseExpression);
REGISTER_TRANSFORM(TransformExpression);
REGISTER_TRANSFORM(TransformLiteralExpression);
REGISTER_TRANSFORM(TransformSingleExpression);
REGISTER_TRANSFORM(TransformConstantLiteral);
// use.gram
REGISTER_TRANSFORM(TransformUseStatement);
REGISTER_TRANSFORM(TransformUseTarget);
// set.gram
REGISTER_TRANSFORM(TransformResetStatement);
REGISTER_TRANSFORM(TransformSetAssignment);
REGISTER_TRANSFORM(TransformSetSetting);
REGISTER_TRANSFORM(TransformSetStatement);
REGISTER_TRANSFORM(TransformSetTimeZone);
REGISTER_TRANSFORM(TransformSetVariable);
REGISTER_TRANSFORM(TransformStandardAssignment);
REGISTER_TRANSFORM(TransformVariableList);
RegisterEnum<SetScope>("LocalScope", SetScope::LOCAL);
RegisterEnum<SetScope>("GlobalScope", SetScope::GLOBAL);
RegisterEnum<SetScope>("SessionScope", SetScope::SESSION);
RegisterEnum<SetScope>("VariableScope", SetScope::VARIABLE);
RegisterEnum<Value>("FalseLiteral", Value(false));
RegisterEnum<Value>("TrueLiteral", Value(true));
RegisterEnum<Value>("NullLiteral", Value());
}
vector<optional_ptr<ParseResult>>
PEGTransformerFactory::ExtractParseResultsFromList(optional_ptr<ParseResult> parse_result) {
// List(D) <- D (',' D)* ','?
vector<optional_ptr<ParseResult>> result;
auto &list_pr = parse_result->Cast<ListParseResult>();
result.push_back(list_pr.GetChild(0));
auto opt_child = list_pr.Child<OptionalParseResult>(1);
if (opt_child.HasResult()) {
auto repeat_result = opt_child.optional_result->Cast<RepeatParseResult>();
for (auto &child : repeat_result.children) {
auto &list_child = child->Cast<ListParseResult>();
result.push_back(list_child.GetChild(1));
}
}
return result;
}
} // namespace duckdb

View File

@@ -0,0 +1,82 @@
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/types/decimal.hpp"
#include "transformer/peg_transformer.hpp"
namespace duckdb {
// NumberLiteral <- < [+-]?[0-9]*([.][0-9]*)? >
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformNumberLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto literal_pr = parse_result->Cast<NumberParseResult>();
string_t str_val(literal_pr.number);
bool try_cast_as_integer = true;
bool try_cast_as_decimal = true;
optional_idx decimal_position = optional_idx::Invalid();
idx_t num_underscores = 0;
idx_t num_integer_underscores = 0;
for (idx_t i = 0; i < str_val.GetSize(); i++) {
if (literal_pr.number[i] == '.') {
// decimal point: cast as either decimal or double
try_cast_as_integer = false;
decimal_position = i;
}
if (literal_pr.number[i] == 'e' || literal_pr.number[i] == 'E') {
// found exponent, cast as double
try_cast_as_integer = false;
try_cast_as_decimal = false;
}
if (literal_pr.number[i] == '_') {
num_underscores++;
if (!decimal_position.IsValid()) {
num_integer_underscores++;
}
}
}
if (try_cast_as_integer) {
int64_t bigint_value;
// try to cast as bigint first
if (TryCast::Operation<string_t, int64_t>(str_val, bigint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::BIGINT(bigint_value));
}
hugeint_t hugeint_value;
// if that is not successful; try to cast as hugeint
if (TryCast::Operation<string_t, hugeint_t>(str_val, hugeint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::HUGEINT(hugeint_value));
}
uhugeint_t uhugeint_value;
// if that is not successful; try to cast as uhugeint
if (TryCast::Operation<string_t, uhugeint_t>(str_val, uhugeint_value)) {
// successfully cast to bigint: bigint value
return make_uniq<ConstantExpression>(Value::UHUGEINT(uhugeint_value));
}
}
idx_t decimal_offset = literal_pr.number[0] == '-' ? 3 : 2;
if (try_cast_as_decimal && decimal_position.IsValid() &&
str_val.GetSize() - num_underscores < Decimal::MAX_WIDTH_DECIMAL + decimal_offset) {
// figure out the width/scale based on the decimal position
auto width = NumericCast<uint8_t>(str_val.GetSize() - 1 - num_underscores);
auto scale = NumericCast<uint8_t>(width - decimal_position.GetIndex() + num_integer_underscores);
if (literal_pr.number[0] == '-') {
width--;
}
if (width <= Decimal::MAX_WIDTH_DECIMAL) {
// we can cast the value as a decimal
Value val = Value(str_val);
val = val.DefaultCastAs(LogicalType::DECIMAL(width, scale));
return make_uniq<ConstantExpression>(std::move(val));
}
}
// if there is a decimal or the value is too big to cast as either hugeint or bigint
double dbl_value = Cast::Operation<string_t, double>(str_val);
return make_uniq<ConstantExpression>(Value::DOUBLE(dbl_value));
}
// StringLiteral <- '\'' [^\']* '\''
string PEGTransformerFactory::TransformStringLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &string_literal_pr = parse_result->Cast<StringLiteralParseResult>();
return string_literal_pr.result;
}
} // namespace duckdb

View File

@@ -0,0 +1,118 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/expression/comparison_expression.hpp"
#include "duckdb/parser/expression/between_expression.hpp"
#include "duckdb/parser/expression/operator_expression.hpp"
#include "duckdb/parser/expression/cast_expression.hpp"
namespace duckdb {
// BaseExpression <- SingleExpression Indirection*
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformBaseExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto expr = transformer.Transform<unique_ptr<ParsedExpression>>(list_pr.Child<ListParseResult>(0));
auto indirection_opt = list_pr.Child<OptionalParseResult>(1);
if (indirection_opt.HasResult()) {
auto indirection_repeat = indirection_opt.optional_result->Cast<RepeatParseResult>();
for (auto child : indirection_repeat.children) {
auto indirection_expr = transformer.Transform<unique_ptr<ParsedExpression>>(child);
if (indirection_expr->GetExpressionClass() == ExpressionClass::CAST) {
auto cast_expr = unique_ptr_cast<ParsedExpression, CastExpression>(std::move(indirection_expr));
cast_expr->child = std::move(expr);
expr = std::move(cast_expr);
} else if (indirection_expr->GetExpressionClass() == ExpressionClass::OPERATOR) {
auto operator_expr = unique_ptr_cast<ParsedExpression, OperatorExpression>(std::move(indirection_expr));
operator_expr->children.insert(operator_expr->children.begin(), std::move(expr));
expr = std::move(operator_expr);
} else if (indirection_expr->GetExpressionClass() == ExpressionClass::FUNCTION) {
auto function_expr = unique_ptr_cast<ParsedExpression, FunctionExpression>(std::move(indirection_expr));
function_expr->children.push_back(std::move(expr));
expr = std::move(function_expr);
}
}
}
return expr;
}
// Expression <- BaseExpression RecursiveExpression*
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &base_expr_pr = list_pr.Child<ListParseResult>(0);
unique_ptr<ParsedExpression> base_expr = transformer.Transform<unique_ptr<ParsedExpression>>(base_expr_pr);
auto &indirection_pr = list_pr.Child<OptionalParseResult>(1);
if (indirection_pr.HasResult()) {
auto repeat_expression_pr = indirection_pr.optional_result->Cast<RepeatParseResult>();
vector<unique_ptr<ParsedExpression>> expr_children;
for (auto &child : repeat_expression_pr.children) {
auto expr = transformer.Transform<unique_ptr<ParsedExpression>>(child);
if (expr->expression_class == ExpressionClass::COMPARISON) {
auto compare_expr = unique_ptr_cast<ParsedExpression, ComparisonExpression>(std::move(expr));
compare_expr->left = std::move(base_expr);
base_expr = std::move(compare_expr);
} else if (expr->expression_class == ExpressionClass::FUNCTION) {
auto func_expr = unique_ptr_cast<ParsedExpression, FunctionExpression>(std::move(expr));
func_expr->children.insert(func_expr->children.begin(), std::move(base_expr));
base_expr = std::move(func_expr);
} else if (expr->expression_class == ExpressionClass::LAMBDA) {
auto lambda_expr = unique_ptr_cast<ParsedExpression, LambdaExpression>(std::move(expr));
lambda_expr->lhs = std::move(base_expr);
base_expr = std::move(lambda_expr);
} else if (expr->expression_class == ExpressionClass::BETWEEN) {
auto between_expr = unique_ptr_cast<ParsedExpression, BetweenExpression>(std::move(expr));
between_expr->input = std::move(base_expr);
base_expr = std::move(between_expr);
} else {
base_expr = make_uniq<OperatorExpression>(expr->type, std::move(base_expr), std::move(expr));
}
}
}
return base_expr;
}
// LiteralExpression <- StringLiteral / NumberLiteral / 'NULL' / 'TRUE' / 'FALSE'
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformLiteralExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &choice_result = parse_result->Cast<ListParseResult>();
auto &matched_rule_result = choice_result.Child<ChoiceParseResult>(0);
if (matched_rule_result.name == "StringLiteral") {
return make_uniq<ConstantExpression>(Value(transformer.Transform<string>(matched_rule_result.result)));
}
return transformer.Transform<unique_ptr<ParsedExpression>>(matched_rule_result.result);
}
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformConstantLiteral(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return make_uniq<ConstantExpression>(transformer.TransformEnum<Value>(list_pr.Child<ChoiceParseResult>(0).result));
}
// SingleExpression <- LiteralExpression /
// Parameter /
// SubqueryExpression /
// SpecialFunctionExpression /
// ParenthesisExpression /
// IntervalLiteral /
// TypeLiteral /
// CaseExpression /
// StarExpression /
// CastExpression /
// GroupingExpression /
// MapExpression /
// FunctionExpression /
// ColumnReference /
// PrefixExpression /
// ListComprehensionExpression /
// ListExpression /
// StructExpression /
// PositionalExpression /
// DefaultExpression
unique_ptr<ParsedExpression> PEGTransformerFactory::TransformSingleExpression(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return transformer.Transform<unique_ptr<ParsedExpression>>(list_pr.Child<ChoiceParseResult>(0).result);
}
} // namespace duckdb

View File

@@ -0,0 +1,93 @@
#include "transformer/peg_transformer.hpp"
namespace duckdb {
// ResetStatement <- 'RESET' (SetVariable / SetSetting)
unique_ptr<SQLStatement> PEGTransformerFactory::TransformResetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &child_pr = list_pr.Child<ListParseResult>(1);
auto &choice_pr = child_pr.Child<ChoiceParseResult>(0);
SettingInfo setting_info = transformer.Transform<SettingInfo>(choice_pr.result);
return make_uniq<ResetVariableStatement>(setting_info.name, setting_info.scope);
}
// SetAssignment <- VariableAssign VariableList
vector<unique_ptr<ParsedExpression>>
PEGTransformerFactory::TransformSetAssignment(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
return transformer.Transform<vector<unique_ptr<ParsedExpression>>>(list_pr, 1);
}
// SetSetting <- SettingScope? SettingName
SettingInfo PEGTransformerFactory::TransformSetSetting(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &optional_scope_pr = list_pr.Child<OptionalParseResult>(0);
SettingInfo result;
result.name = list_pr.Child<IdentifierParseResult>(1).identifier;
if (optional_scope_pr.optional_result) {
auto setting_scope = optional_scope_pr.optional_result->Cast<ListParseResult>();
auto scope_value = setting_scope.Child<ChoiceParseResult>(0);
result.scope = transformer.TransformEnum<SetScope>(scope_value);
}
return result;
}
// SetStatement <- 'SET' (StandardAssignment / SetTimeZone)
unique_ptr<SQLStatement> PEGTransformerFactory::TransformSetStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &child_pr = list_pr.Child<ListParseResult>(1);
auto &assignment_or_timezone = child_pr.Child<ChoiceParseResult>(0);
return transformer.Transform<unique_ptr<SetVariableStatement>>(assignment_or_timezone);
}
// SetTimeZone <- 'TIME' 'ZONE' Expression
unique_ptr<SQLStatement> PEGTransformerFactory::TransformSetTimeZone(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
throw NotImplementedException("Rule 'SetTimeZone' has not been implemented yet");
}
// SetVariable <- VariableScope Identifier
SettingInfo PEGTransformerFactory::TransformSetVariable(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
SettingInfo result;
result.scope = transformer.TransformEnum<SetScope>(list_pr.Child<ListParseResult>(0));
result.name = list_pr.Child<IdentifierParseResult>(1).identifier;
return result;
}
// StandardAssignment <- (SetVariable / SetSetting) SetAssignment
unique_ptr<SetVariableStatement>
PEGTransformerFactory::TransformStandardAssignment(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &choice_pr = parse_result->Cast<ChoiceParseResult>();
auto &list_pr = choice_pr.result->Cast<ListParseResult>();
auto &first_sub_rule = list_pr.Child<ListParseResult>(0);
auto &setting_or_var_pr = first_sub_rule.Child<ChoiceParseResult>(0);
SettingInfo setting_info = transformer.Transform<SettingInfo>(setting_or_var_pr.result);
auto &set_assignment_pr = list_pr.Child<ListParseResult>(1);
auto value = transformer.Transform<vector<unique_ptr<ParsedExpression>>>(set_assignment_pr);
// TODO(dtenwolde) Needs to throw error if more than 1 value (e.g. set threads=1,2;)
return make_uniq<SetVariableStatement>(setting_info.name, std::move(value[0]), setting_info.scope);
}
// VariableList <- List(Expression)
vector<unique_ptr<ParsedExpression>>
PEGTransformerFactory::TransformVariableList(PEGTransformer &transformer, optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto expr_list = ExtractParseResultsFromList(list_pr.Child<ListParseResult>(0));
vector<unique_ptr<ParsedExpression>> expressions;
for (auto &expr : expr_list) {
expressions.push_back(transformer.Transform<unique_ptr<ParsedExpression>>(expr));
}
return expressions;
}
} // namespace duckdb

View File

@@ -0,0 +1,51 @@
#include "transformer/peg_transformer.hpp"
#include "duckdb/parser/sql_statement.hpp"
namespace duckdb {
// UseStatement <- 'USE' UseTarget
unique_ptr<SQLStatement> PEGTransformerFactory::TransformUseStatement(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto qn = transformer.Transform<QualifiedName>(list_pr, 1);
string value_str;
if (IsInvalidSchema(qn.schema)) {
value_str = qn.name;
} else {
value_str = qn.schema + "." + qn.name;
}
auto value_expr = make_uniq<ConstantExpression>(Value(value_str));
return make_uniq<SetVariableStatement>("schema", std::move(value_expr), SetScope::AUTOMATIC);
}
// UseTarget <- (CatalogName '.' ReservedSchemaName) / SchemaName / CatalogName
QualifiedName PEGTransformerFactory::TransformUseTarget(PEGTransformer &transformer,
optional_ptr<ParseResult> parse_result) {
auto &list_pr = parse_result->Cast<ListParseResult>();
auto &choice_pr = list_pr.Child<ChoiceParseResult>(0);
QualifiedName result;
if (choice_pr.result->type == ParseResultType::LIST) {
vector<string> entries;
auto use_target_children = choice_pr.result->Cast<ListParseResult>();
for (auto &child : use_target_children.GetChildren()) {
if (child->type == ParseResultType::IDENTIFIER) {
entries.push_back(child->Cast<IdentifierParseResult>().identifier);
}
}
if (entries.size() == 2) {
result.catalog = INVALID_CATALOG;
result.schema = entries[0];
result.name = entries[1];
} else {
throw InternalException("Invalid amount of entries for use statement");
}
} else if (choice_pr.result->type == ParseResultType::IDENTIFIER) {
result.name = choice_pr.result->Cast<IdentifierParseResult>().identifier;
} else {
throw InternalException("Unexpected parse result type encountered in UseTarget");
}
return result;
}
} // namespace duckdb