518 lines
18 KiB
C++
518 lines
18 KiB
C++
#include "catch.hpp"
|
|
#include "duckdb.hpp"
|
|
#include "duckdb/common/common.hpp"
|
|
#include "duckdb/common/local_file_system.hpp"
|
|
#include "duckdb/common/virtual_file_system.hpp"
|
|
#include "duckdb/main/materialized_query_result.hpp"
|
|
#include "test_config.hpp"
|
|
#include "test_helpers.hpp"
|
|
|
|
#include <iostream>
|
|
#include <shared_mutex>
|
|
|
|
using namespace duckdb;
|
|
|
|
bool g_enable_verbose_output = false;
|
|
bool g_enable_info_output = true;
|
|
|
|
#define PRINT_VERBOSE(x) \
|
|
do { \
|
|
if (g_enable_verbose_output) \
|
|
std::cout << x << std::endl; \
|
|
} while (0)
|
|
|
|
#define PRINT_INFO(x) \
|
|
do { \
|
|
if (g_enable_info_output) \
|
|
std::cout << x << std::endl; \
|
|
} while (0)
|
|
|
|
bool ends_with(const std::string &str, const std::string &suffix) {
|
|
// Ensure str is at least as long as suffix
|
|
if (str.length() < suffix.length()) {
|
|
return false;
|
|
}
|
|
|
|
// Compare the ending characters
|
|
return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
|
|
}
|
|
|
|
class FaultInjectionFileSystem : public duckdb::LocalFileSystem {
|
|
public:
|
|
enum FaultInjectionSite {
|
|
WRITE = 0,
|
|
FSYNC = 1,
|
|
};
|
|
|
|
void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
|
|
PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
|
|
if (is_db_file(handle)) {
|
|
ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
|
|
}
|
|
return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes, location);
|
|
}
|
|
|
|
void FileSync(duckdb::FileHandle &handle) override {
|
|
PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
|
|
if (is_db_file(handle)) {
|
|
ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
|
|
}
|
|
return duckdb::LocalFileSystem::FileSync(handle);
|
|
}
|
|
|
|
void RemoveFile(const duckdb::string &filename,
|
|
duckdb::optional_ptr<duckdb::FileOpener> opener = nullptr) override {
|
|
PRINT_VERBOSE("FS remove " << filename);
|
|
return duckdb::LocalFileSystem::RemoveFile(filename, opener);
|
|
}
|
|
|
|
void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
|
|
PRINT_VERBOSE("FS truncate " << handle.GetPath() << " from " << handle.GetFileSize() << " to " << new_size);
|
|
return duckdb::LocalFileSystem::Truncate(handle, new_size);
|
|
}
|
|
|
|
// In linux - trim() is equivalent to zeroing out a range (albeit in a much more efficient manner). Let's
|
|
// reproduce this behavior regardless of whether the current environment supports it.
|
|
bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
|
|
PRINT_VERBOSE("FS trim " << handle.GetPath() << " offset=" << offset_bytes << " bytes=" << length_bytes);
|
|
|
|
std::string nulls(length_bytes, '\0');
|
|
duckdb::LocalFileSystem::Write(handle, (void *)nulls.data(), length_bytes, offset_bytes);
|
|
return true;
|
|
}
|
|
|
|
// Will inject a single occurrence of a fault
|
|
void InjectFault(FaultInjectionSite site) {
|
|
std::lock_guard<std::mutex> l(fault_m_);
|
|
// Make sure this is not called twice - as we will drop a fault
|
|
REQUIRE(faults.insert(site).second);
|
|
}
|
|
|
|
protected:
|
|
void ThrowInjectedFaultIfSet(FaultInjectionSite site) {
|
|
std::lock_guard<std::mutex> l(fault_m_);
|
|
auto it = faults.find(site);
|
|
if (it != faults.end()) {
|
|
faults.erase(it);
|
|
PRINT_VERBOSE("Injecting fault");
|
|
throw duckdb::IOException("Injected fault");
|
|
}
|
|
}
|
|
|
|
bool is_wal_file(const duckdb::FileHandle &handle) {
|
|
return ends_with(handle.GetPath(), ".db.wal");
|
|
}
|
|
|
|
bool is_db_file(const duckdb::FileHandle &handle) {
|
|
return ends_with(handle.GetPath(), ".db");
|
|
}
|
|
|
|
bool is_wal_or_db_file(const duckdb::FileHandle &handle) {
|
|
return is_db_file(handle) || is_wal_file(handle);
|
|
}
|
|
|
|
private:
|
|
std::mutex fault_m_;
|
|
std::unordered_set<FaultInjectionSite> faults;
|
|
};
|
|
|
|
// This implementation of duckdb::FileSystem will cache writes to the database file in memory until fsync is called.
|
|
// It expects all read ranges to be perfectly aligned with previous writes.
|
|
class LazyFlushFileSystem : public FaultInjectionFileSystem {
|
|
public:
|
|
~LazyFlushFileSystem() {
|
|
if (!unflushed_chunks.empty()) {
|
|
PRINT_INFO("Unflushed chunks on shutdown for db file");
|
|
}
|
|
}
|
|
|
|
void Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
|
|
PRINT_VERBOSE("FS write offset=" << location << " bytes=" << nr_bytes);
|
|
|
|
// We only perform positional writes for the db file
|
|
REQUIRE(is_db_file(handle));
|
|
|
|
std::unique_lock<std::mutex> l(m_);
|
|
|
|
ThrowInjectedFaultIfSet(FaultInjectionSite::WRITE);
|
|
|
|
// Store the data in memory until fsync occurs
|
|
PRINT_VERBOSE("Caching chunk " << location << " bytes " << nr_bytes);
|
|
|
|
// TODO: be lazy - don't handle partial overwrites
|
|
REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
|
|
|
|
auto it = unflushed_chunks.find(location);
|
|
if (it != unflushed_chunks.end()) {
|
|
// Check that if there is an existing chunk - it's size matches exactly
|
|
REQUIRE(it->second.size() == nr_bytes);
|
|
it->second = std::string((char *)buffer, nr_bytes);
|
|
} else {
|
|
unflushed_chunks.emplace(location, std::string((char *)buffer, nr_bytes));
|
|
}
|
|
}
|
|
|
|
int64_t Write(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
|
|
// Check appends only occur on the WAL
|
|
REQUIRE(is_wal_file(handle));
|
|
|
|
return duckdb::LocalFileSystem::Write(handle, buffer, nr_bytes);
|
|
}
|
|
|
|
void Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override {
|
|
REQUIRE(is_db_file(handle));
|
|
|
|
{
|
|
// TODO: shared_lock
|
|
std::unique_lock<std::mutex> l(m_);
|
|
|
|
// We don't handle partial overlaps for now.
|
|
REQUIRE(!partially_overlaps_existing_chunk(unflushed_chunks, location, nr_bytes));
|
|
|
|
auto it = unflushed_chunks.find(location);
|
|
if (it != unflushed_chunks.end()) {
|
|
PRINT_VERBOSE("FS read cached chunk at offset=" << location << " bytes=" << nr_bytes);
|
|
const auto &data = it->second;
|
|
// Assume block-aligned reads
|
|
REQUIRE(data.size() == nr_bytes);
|
|
memcpy(buffer, data.data(), nr_bytes);
|
|
return;
|
|
}
|
|
}
|
|
|
|
PRINT_VERBOSE("FS read disk chunk at offset=" << location << " bytes=" << nr_bytes);
|
|
return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes, location);
|
|
}
|
|
|
|
int64_t Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes) override {
|
|
PRINT_VERBOSE("FS read at end of file, bytes=" << nr_bytes);
|
|
REQUIRE(is_wal_or_db_file(handle));
|
|
|
|
if (is_db_file(handle)) {
|
|
// Just make sure we don't miss the unflushed chunks
|
|
REQUIRE(unflushed_chunks.empty());
|
|
}
|
|
return duckdb::LocalFileSystem::Read(handle, buffer, nr_bytes);
|
|
}
|
|
|
|
void FileSync(duckdb::FileHandle &handle) override {
|
|
PRINT_VERBOSE("FS fsync " << handle.GetPath() << " file_size=" << handle.GetFileSize());
|
|
|
|
REQUIRE(is_wal_or_db_file(handle));
|
|
|
|
if (!is_db_file(handle)) {
|
|
return duckdb::LocalFileSystem::FileSync(handle);
|
|
}
|
|
|
|
std::unique_lock<std::mutex> l(m_);
|
|
|
|
ThrowInjectedFaultIfSet(FaultInjectionSite::FSYNC);
|
|
|
|
for (const auto &location_and_data : unflushed_chunks) {
|
|
auto location = location_and_data.first;
|
|
const auto &data = location_and_data.second;
|
|
PRINT_VERBOSE("Flushing chunk " << location << " size=" << data.size());
|
|
duckdb::LocalFileSystem::Write(handle, (void *)data.data(), data.size(), location);
|
|
}
|
|
unflushed_chunks.clear();
|
|
|
|
duckdb::LocalFileSystem::FileSync(handle);
|
|
}
|
|
|
|
bool Trim(duckdb::FileHandle &handle, idx_t offset_bytes, idx_t length_bytes) override {
|
|
REQUIRE(is_db_file(handle));
|
|
|
|
std::unique_lock<std::mutex> l(m_);
|
|
|
|
// This is just simpler to implement
|
|
REQUIRE(unflushed_chunks.count(offset_bytes) == 0);
|
|
|
|
return FaultInjectionFileSystem::Trim(handle, offset_bytes, length_bytes);
|
|
}
|
|
|
|
void Truncate(duckdb::FileHandle &handle, int64_t new_size) override {
|
|
std::unique_lock<std::mutex> l(m_);
|
|
|
|
if (is_db_file(handle)) {
|
|
REQUIRE(unflushed_chunks.empty());
|
|
}
|
|
|
|
return duckdb::LocalFileSystem::Truncate(handle, new_size);
|
|
}
|
|
|
|
private:
|
|
// Lock for modifying unflushed_chunks:
|
|
// 1. Adding to unflushed_chunks on write
|
|
// 2. Flushing unflushed_chunks on fsync
|
|
// 3. Reading from unflushed_chunks
|
|
std::mutex m_;
|
|
std::map<idx_t, std::string> unflushed_chunks;
|
|
|
|
bool partially_overlaps_existing_chunk(const std::map<idx_t, std::string> &chunks, idx_t offset, size_t length) {
|
|
idx_t end = offset + length;
|
|
for (const auto &off_data : chunks) {
|
|
auto off = off_data.first;
|
|
const auto &data = off_data.second;
|
|
idx_t chunk_end = off + data.size();
|
|
|
|
// Check for any overlap
|
|
bool overlap = offset < chunk_end && off < end;
|
|
|
|
// Exclude full containment and exact match
|
|
bool exact_match = (offset == off && length == data.size());
|
|
|
|
if (overlap && !exact_match)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template <class ResultT>
|
|
void validate(ResultT &r, std::string expected_err_message = "") {
|
|
// For debugging
|
|
bool expected_err = !expected_err_message.empty();
|
|
if (expected_err != r.HasError() && r.HasError()) {
|
|
PRINT_INFO("Unexpected: query failed with " << r.GetError());
|
|
}
|
|
REQUIRE(expected_err == r.HasError());
|
|
if (r.HasError()) {
|
|
REQUIRE(r.GetError().find(expected_err_message) != std::string::npos);
|
|
}
|
|
}
|
|
|
|
void cleanup_db_file(const std::string &filename) {
|
|
bool removed_or_missing = std::remove(filename.c_str()) == 0 || errno == ENOENT;
|
|
REQUIRE(removed_or_missing);
|
|
}
|
|
|
|
TEST_CASE("simple fault injection storage test", "[storage][.]") {
|
|
if (!TestConfiguration::TestRunStorageFuzzer()) {
|
|
SKIP_TEST("storage-fuzzer not enabled");
|
|
return;
|
|
}
|
|
duckdb::DBConfig config;
|
|
|
|
LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
|
|
config.file_system = duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
|
|
|
|
std::string file_path = TestCreatePath("pig.db");
|
|
|
|
cleanup_db_file(file_path);
|
|
|
|
{
|
|
duckdb::DuckDB db(file_path, &config);
|
|
|
|
duckdb::Connection con(db);
|
|
|
|
validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
|
|
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000)"));
|
|
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"));
|
|
|
|
auto res = con.Query("SELECT count(*) FROM t");
|
|
validate(*res);
|
|
REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
|
|
|
|
// Writes are ok - fsync are not ok
|
|
raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
|
|
|
|
validate(*con.Query("INSERT INTO t SELECT * FROM RANGE(0, 1000000)"),
|
|
"TransactionContext Error: Failed to commit: Injected fault");
|
|
|
|
// Check that the tx was rolled back
|
|
auto res2 = con.Query("SELECT count(*) FROM t");
|
|
validate(*res2);
|
|
REQUIRE(res2->GetValue(0, 0).ToString() == "1001000");
|
|
}
|
|
{
|
|
duckdb::DuckDB db(file_path, &config);
|
|
duckdb::Connection con(db);
|
|
|
|
auto res = con.Query("SELECT count(*) FROM t");
|
|
validate(*res);
|
|
REQUIRE(res->GetValue(0, 0).ToString() == "1001000");
|
|
}
|
|
}
|
|
|
|
enum ActionType {
|
|
// This action will simply flip the setting true -> false or false -> true
|
|
TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT = 0,
|
|
SMALL_WRITE = 2,
|
|
LARGE_WRITE = 3,
|
|
LARGE_WRITE_WITH_FAULT = 4,
|
|
UPDATE = 5,
|
|
DELETE = 6,
|
|
RESET_TABLE = 7,
|
|
};
|
|
|
|
TEST_CASE("fuzzed storage test", "[storage][.]") {
|
|
if (!TestConfiguration::TestRunStorageFuzzer()) {
|
|
SKIP_TEST("storage-fuzzer not enabled");
|
|
return;
|
|
}
|
|
// DuckDB Configurations
|
|
duckdb::DBConfig config;
|
|
config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(true);
|
|
config.options.trim_free_blocks = true;
|
|
config.options.checkpoint_on_shutdown = false;
|
|
|
|
std::string file_path = TestCreatePath("pig.db");
|
|
|
|
cleanup_db_file(file_path);
|
|
|
|
{
|
|
duckdb::DuckDB db(file_path, &config);
|
|
duckdb::Connection con(db);
|
|
validate(*con.Query("CREATE TABLE IF NOT EXISTS t(i INTEGER)"));
|
|
}
|
|
|
|
std::map<double, ActionType> pct_to_action = {{0.1, ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT},
|
|
{0.3, ActionType::LARGE_WRITE},
|
|
{0.5, ActionType::SMALL_WRITE},
|
|
{0.7, ActionType::UPDATE},
|
|
{0.85, ActionType::DELETE},
|
|
{1.0, ActionType::LARGE_WRITE_WITH_FAULT}};
|
|
|
|
// Randomly generated sequence of actions
|
|
std::vector<ActionType> actions = {};
|
|
|
|
int NUM_ACTIONS = 30;
|
|
for (int i = 0; i < NUM_ACTIONS; i++) {
|
|
double selection = (rand() % 100) / 100.0;
|
|
for (const auto &prob_type : pct_to_action) {
|
|
auto prob = prob_type.first;
|
|
auto type = prob_type.second;
|
|
if (selection > prob) {
|
|
continue;
|
|
}
|
|
actions.push_back(type);
|
|
break;
|
|
}
|
|
}
|
|
actions.push_back(RESET_TABLE);
|
|
for (int i = 0; i < NUM_ACTIONS; i++) {
|
|
double selection = (rand() % 100) / 100.0;
|
|
for (const auto &prob_type : pct_to_action) {
|
|
auto prob = prob_type.first;
|
|
auto type = prob_type.second;
|
|
if (selection > prob) {
|
|
continue;
|
|
}
|
|
actions.push_back(type);
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint64_t offset = 0;
|
|
bool skip_checkpoint_on_commit = true;
|
|
std::string expected_checksum = "";
|
|
duckdb::unique_ptr<QueryResult> previous_result;
|
|
for (const auto &action : actions) {
|
|
// Note: the injected file system has to be reset each time. DuckDB construction seems to be std::move'ing them
|
|
|
|
/*
|
|
LazyFlushFileSystem *raw_fs = new LazyFlushFileSystem();
|
|
config.file_system =
|
|
duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<LazyFlushFileSystem>(raw_fs));
|
|
*/
|
|
|
|
FaultInjectionFileSystem *raw_fs = new FaultInjectionFileSystem();
|
|
config.file_system =
|
|
duckdb::make_uniq<duckdb::VirtualFileSystem>(duckdb::unique_ptr<FaultInjectionFileSystem>(raw_fs));
|
|
|
|
duckdb::DuckDB db(file_path, &config);
|
|
duckdb::Connection con(db);
|
|
|
|
// Compute a checksum
|
|
if (!expected_checksum.empty()) {
|
|
auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
|
|
validate(*checksum);
|
|
auto computed_checksum = checksum->GetValue(0, 0).ToString();
|
|
PRINT_INFO("Verifying checksum computed=" << computed_checksum << ", actual=" << expected_checksum);
|
|
if (computed_checksum != expected_checksum) {
|
|
auto result = con.Query("SELECT * FROM t ORDER BY ALL");
|
|
string error;
|
|
ColumnDataCollection::ResultEquals(previous_result->Cast<MaterializedQueryResult>().Collection(),
|
|
result->Cast<MaterializedQueryResult>().Collection(), error);
|
|
Printer::PrintF("Checksum failure\nResult comparison:\n%s", error);
|
|
REQUIRE(computed_checksum == expected_checksum);
|
|
}
|
|
}
|
|
previous_result = con.Query("SELECT * FROM t ORDER BY ALL");
|
|
|
|
switch (action) {
|
|
case ActionType::TOGGLE_SKIP_CHECKPOINTS_ON_COMMIT:
|
|
skip_checkpoint_on_commit = !skip_checkpoint_on_commit;
|
|
PRINT_INFO("Setting skip commit=" << skip_checkpoint_on_commit);
|
|
config.options.set_variables["debug_skip_checkpoint_on_commit"] = duckdb::Value(skip_checkpoint_on_commit);
|
|
break;
|
|
case ActionType::SMALL_WRITE: {
|
|
std::string small_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
|
|
std::to_string(offset + 100) + ")";
|
|
PRINT_INFO("RUN: " << small_insert);
|
|
validate(*con.Query(small_insert));
|
|
offset += 100;
|
|
break;
|
|
}
|
|
case ActionType::LARGE_WRITE: {
|
|
std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
|
|
std::to_string(offset + 1000 * 1000) + ")";
|
|
PRINT_INFO("RUN: " << large_insert);
|
|
validate(*con.Query(large_insert));
|
|
offset += 1000 * 1000;
|
|
break;
|
|
}
|
|
case ActionType::UPDATE: {
|
|
if (offset != 0) {
|
|
uint64_t begin = rand() % offset;
|
|
uint64_t length = rand() % (offset - begin);
|
|
std::string update_query = "UPDATE t SET i = i * 2 WHERE i > " + std::to_string(begin) + " and i <" +
|
|
std::to_string(begin + length);
|
|
|
|
PRINT_INFO("RUN: " << update_query);
|
|
validate(*con.Query(update_query));
|
|
}
|
|
break;
|
|
}
|
|
case ActionType::DELETE: {
|
|
if (offset != 0) {
|
|
uint64_t begin = rand() % offset;
|
|
uint64_t length = rand() % (offset - begin);
|
|
std::string delete_query =
|
|
"DELETE FROM t WHERE i > " + std::to_string(begin) + " and i <" + std::to_string(begin + length);
|
|
|
|
PRINT_INFO("RUN: " << delete_query);
|
|
validate(*con.Query(delete_query));
|
|
break;
|
|
}
|
|
}
|
|
case ActionType::LARGE_WRITE_WITH_FAULT: {
|
|
raw_fs->InjectFault(LazyFlushFileSystem::FaultInjectionSite::FSYNC);
|
|
std::string large_insert = "INSERT INTO t SELECT * FROM RANGE(" + std::to_string(offset) + ", " +
|
|
std::to_string(offset + 1000 * 1000) + ")";
|
|
|
|
PRINT_INFO("RUN with fault: " << large_insert);
|
|
validate(*con.Query(large_insert), "Injected fault");
|
|
break;
|
|
}
|
|
case ActionType::RESET_TABLE: {
|
|
std::string replace_query = "CREATE OR REPLACE TABLE t(i INTEGER)";
|
|
PRINT_INFO("RUN with fault: " << replace_query);
|
|
validate(*con.Query(replace_query));
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Compute a checksum (unless we injected a fault - which will invalidate the database)
|
|
if (action != ActionType::LARGE_WRITE_WITH_FAULT) {
|
|
auto checksum = con.Query("SELECT bit_xor(hash(i)) FROM t");
|
|
validate(*checksum);
|
|
expected_checksum = checksum->GetValue(0, 0).ToString();
|
|
|
|
PRINT_INFO("Computed new checksum: " << expected_checksum);
|
|
} else {
|
|
PRINT_INFO("Keeping old checksum due to faults: " << expected_checksum);
|
|
}
|
|
}
|
|
}
|