Files
email-tracker/external/duckdb/test/helpers/test_helpers.cpp
2025-10-24 19:21:19 -05:00

384 lines
11 KiB
C++

// #define CATCH_CONFIG_RUNNER
#include "catch.hpp"
#include "duckdb/common/file_system.hpp"
#include "duckdb/common/value_operations/value_operations.hpp"
#include "compare_result.hpp"
#include "duckdb/main/query_result.hpp"
#include "test_helpers.hpp"
#include "duckdb/parser/parsed_data/copy_info.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/execution/operator/csv_scanner/string_value_scanner.hpp"
#include "duckdb/common/case_insensitive_map.hpp"
#include "test_config.hpp"
#include "pid.hpp"
#include "duckdb/function/table/read_csv.hpp"
#include "duckdb/storage/storage_info.hpp"
#include <cmath>
#include <fstream>
using namespace std;
#define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir"
namespace duckdb {
static string custom_test_directory;
static case_insensitive_set_t required_requires;
static bool delete_test_path = true;
bool NO_FAIL(QueryResult &result) {
if (result.HasError()) {
fprintf(stderr, "Query failed with message: %s\n", result.GetError().c_str());
}
return !result.HasError();
}
bool NO_FAIL(duckdb::unique_ptr<QueryResult> result) {
return NO_FAIL(*result);
}
void TestDeleteDirectory(string path) {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
try {
if (fs->DirectoryExists(path)) {
fs->RemoveDirectory(path);
}
} catch (...) {
}
}
void TestDeleteFile(string path) {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
try {
fs->TryRemoveFile(path);
} catch (...) {
}
}
void TestChangeDirectory(string path) {
// set the base path for the tests
FileSystem::SetWorkingDirectory(path);
}
string TestGetCurrentDirectory() {
return FileSystem::GetWorkingDirectory();
}
void DeleteDatabase(string path) {
if (!custom_test_directory.empty()) {
return;
}
TestDeleteFile(path);
TestDeleteFile(path + ".wal");
}
void TestCreateDirectory(string path) {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
fs->CreateDirectory(path);
}
string TestJoinPath(string path1, string path2) {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
return fs->JoinPath(path1, path2);
}
void SetTestDirectory(string path) {
custom_test_directory = path;
}
void AddRequire(string require) {
required_requires.insert(require);
}
bool IsRequired(string require) {
return required_requires.count(require);
}
string GetTestDirectory() {
if (custom_test_directory.empty()) {
return TESTING_DIRECTORY_NAME;
}
return custom_test_directory;
}
string TestDirectoryPath() {
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
auto test_directory = GetTestDirectory();
if (!fs->DirectoryExists(test_directory)) {
fs->CreateDirectory(test_directory);
}
string path;
if (custom_test_directory.empty()) {
// add the PID to the test directory - but only if it was not specified explicitly by the user
auto pid = getpid();
path = fs->JoinPath(test_directory, to_string(pid));
} else {
path = test_directory;
}
if (!fs->DirectoryExists(path)) {
fs->CreateDirectory(path);
}
return path;
}
void SetDeleteTestPath(bool delete_path) {
delete_test_path = delete_path;
}
bool DeleteTestPath() {
return delete_test_path;
}
void ClearTestDirectory() {
if (!DeleteTestPath()) {
return;
}
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
auto test_dir = TestDirectoryPath();
// try to clear any files we created in the test directory
fs->ListFiles(test_dir, [&](const string &file, bool is_dir) {
auto full_path = fs->JoinPath(test_dir, file);
try {
if (is_dir) {
fs->RemoveDirectory(full_path);
} else {
fs->RemoveFile(full_path);
}
} catch (...) {
// skip
}
});
}
string TestCreatePath(string suffix) {
return TestJoinPath(TestDirectoryPath(), suffix);
}
bool TestIsInternalError(unordered_set<string> &internal_error_messages, const string &error) {
for (auto &error_message : internal_error_messages) {
if (StringUtil::Contains(error, error_message)) {
return true;
}
}
return false;
}
unique_ptr<DBConfig> GetTestConfig() {
auto &test_config = TestConfiguration::Get();
auto result = make_uniq<DBConfig>();
#ifndef DUCKDB_ALTERNATIVE_VERIFY
result->options.checkpoint_wal_size = test_config.GetCheckpointWALSize();
result->options.checkpoint_on_shutdown = test_config.GetCheckpointOnShutdown();
#else
result->options.checkpoint_on_shutdown = false;
#endif
result->options.abort_on_wal_failure = true;
#ifdef DUCKDB_RUN_SLOW_VERIFIERS
// This mode isn't slow, but we want test coverage both when it's enabled
// and when it's not, so we enable only when DUCKDB_RUN_SLOW_VERIFIERS is set.
result->options.trim_free_blocks = true;
#endif
result->options.allow_unsigned_extensions = true;
auto storage_version = test_config.GetStorageVersion();
if (!storage_version.empty()) {
result->options.serialization_compatibility = SerializationCompatibility::FromString(storage_version);
}
auto max_threads = test_config.GetMaxThreads();
if (max_threads.IsValid()) {
result->options.maximum_threads = max_threads.GetIndex();
}
auto block_alloc_size = test_config.GetBlockAllocSize();
if (block_alloc_size.IsValid()) {
Storage::VerifyBlockAllocSize(block_alloc_size.GetIndex());
result->options.default_block_alloc_size = block_alloc_size.GetIndex();
}
result->options.debug_initialize = test_config.GetDebugInitialize();
result->options.set_variables.emplace("debug_verify_vector",
EnumUtil::ToString(test_config.GetVectorVerification()));
return result;
}
bool CHECK_COLUMN(QueryResult &result_, size_t column_number, vector<duckdb::Value> values) {
if (result_.type == QueryResultType::STREAM_RESULT) {
fprintf(stderr, "Unexpected stream query result in CHECK_COLUMN\n");
return false;
}
auto &result = (MaterializedQueryResult &)result_;
if (result.HasError()) {
fprintf(stderr, "Query failed with message: %s\n", result.GetError().c_str());
return false;
}
if (result.names.size() != result.types.size()) {
// column names do not match
result.Print();
return false;
}
if (values.empty()) {
if (result.RowCount() != 0) {
result.Print();
return false;
} else {
return true;
}
}
if (result.RowCount() == 0) {
result.Print();
return false;
}
if (column_number >= result.types.size()) {
result.Print();
return false;
}
for (idx_t row_idx = 0; row_idx < values.size(); row_idx++) {
auto value = result.GetValue(column_number, row_idx);
// NULL <> NULL, hence special handling
if (value.IsNull() && values[row_idx].IsNull()) {
continue;
}
if (!Value::DefaultValuesAreEqual(value, values[row_idx])) {
// FAIL("Incorrect result! Got " + vector.GetValue(j).ToString()
// +
// " but expected " + values[i + j].ToString());
result.Print();
return false;
}
}
return true;
}
bool CHECK_COLUMN(duckdb::unique_ptr<duckdb::QueryResult> &result, size_t column_number, vector<duckdb::Value> values) {
if (result->type == QueryResultType::STREAM_RESULT) {
auto &stream = (StreamQueryResult &)*result;
result = stream.Materialize();
}
return CHECK_COLUMN(*result, column_number, values);
}
bool CHECK_COLUMN(duckdb::unique_ptr<duckdb::MaterializedQueryResult> &result, size_t column_number,
vector<duckdb::Value> values) {
return CHECK_COLUMN((QueryResult &)*result, column_number, values);
}
string compare_csv(duckdb::QueryResult &result, string csv, bool header) {
D_ASSERT(result.type == QueryResultType::MATERIALIZED_RESULT);
auto &materialized = (MaterializedQueryResult &)result;
if (materialized.HasError()) {
fprintf(stderr, "Query failed with message: %s\n", materialized.GetError().c_str());
return materialized.GetError();
}
string error;
if (!compare_result(csv, materialized.Collection(), materialized.types, header, error)) {
return error;
}
return "";
}
string compare_csv_collection(duckdb::ColumnDataCollection &collection, string csv, bool header) {
string error;
if (!compare_result(csv, collection, collection.Types(), header, error)) {
return error;
}
return "";
}
string show_diff(DataChunk &left, DataChunk &right) {
if (left.ColumnCount() != right.ColumnCount()) {
return StringUtil::Format("Different column counts: %d vs %d", (int)left.ColumnCount(),
(int)right.ColumnCount());
}
if (left.size() != right.size()) {
return StringUtil::Format("Different sizes: %zu vs %zu", left.size(), right.size());
}
string difference;
for (size_t i = 0; i < left.ColumnCount(); i++) {
bool has_differences = false;
auto &left_vector = left.data[i];
auto &right_vector = right.data[i];
string left_column = StringUtil::Format("Result\n------\n%s [", left_vector.GetType().ToString().c_str());
string right_column = StringUtil::Format("Expect\n------\n%s [", right_vector.GetType().ToString().c_str());
if (left_vector.GetType() == right_vector.GetType()) {
for (size_t j = 0; j < left.size(); j++) {
auto left_value = left_vector.GetValue(j);
auto right_value = right_vector.GetValue(j);
if (!Value::DefaultValuesAreEqual(left_value, right_value)) {
left_column += left_value.ToString() + ",";
right_column += right_value.ToString() + ",";
has_differences = true;
} else {
left_column += "_,";
right_column += "_,";
}
}
} else {
left_column += "...";
right_column += "...";
}
left_column += "]\n";
right_column += "]\n";
if (has_differences) {
difference += StringUtil::Format("Difference in column %d:\n", i);
difference += left_column + "\n" + right_column + "\n";
}
}
return difference;
}
//! Compares the result of a pipe-delimited CSV with the given DataChunk
//! Returns true if they are equal, and stores an error_message otherwise
bool compare_result(string csv, ColumnDataCollection &collection, vector<LogicalType> sql_types, bool has_header,
string &error_message) {
D_ASSERT(collection.Count() == 0 || collection.Types().size() == sql_types.size());
// create the csv on disk
auto csv_path = TestCreatePath("__test_csv_path.csv");
ofstream f(csv_path);
f << csv;
f.close();
// set up the CSV reader
CSVReaderOptions options;
options.auto_detect = false;
options.dialect_options.state_machine_options.delimiter = {"|"};
options.dialect_options.header = has_header;
options.dialect_options.state_machine_options.quote = '\"';
options.dialect_options.state_machine_options.escape = '\"';
options.file_path = csv_path;
options.dialect_options.num_cols = sql_types.size();
// set up the intermediate result chunk
DataChunk parsed_result;
parsed_result.Initialize(Allocator::DefaultAllocator(), sql_types);
DuckDB db;
Connection con(db);
MultiFileOptions file_options;
auto scanner_ptr = StringValueScanner::GetCSVScanner(*con.context, options, file_options);
auto &scanner = *scanner_ptr;
ColumnDataCollection csv_data_collection(*con.context, sql_types);
while (!scanner.FinishedIterator()) {
// parse a chunk from the CSV file
try {
parsed_result.Reset();
scanner.Flush(parsed_result);
} catch (std::exception &ex) {
error_message = "Could not parse CSV: " + string(ex.what());
return false;
}
if (parsed_result.size() == 0) {
break;
}
csv_data_collection.Append(parsed_result);
}
string error;
if (!ColumnDataCollection::ResultEquals(collection, csv_data_collection, error_message)) {
return false;
}
return true;
}
} // namespace duckdb