Files
email-tracker/external/duckdb/test/common/test_local_file_urls.test
2025-10-24 19:21:19 -05:00

99 lines
3.4 KiB
SQL

# name: test/common/test_local_file_urls.test
# group: [common]
# Note: __WORKING_DIRECTORY__ will be replaced with the full path to the working dir of the tests (root of duckdb repo)
statement ok
SET VARIABLE work_dir_no_host='file:/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
statement ok
SET VARIABLE work_dir_triple_slash='file:///' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
statement ok
SET VARIABLE work_dir_localhost='file://localhost/' || ltrim(replace('__WORKING_DIRECTORY__', '\', '/'), '/')
# testing file:/some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/normalize.csv');
----
John ipsum
# testing file:///some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/normalize.csv');
----
John ipsum
# testing file://localhost/some/path/to/duckdb/repo
query II
SELECT * FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/normalize.csv');
----
John ipsum
# Test glob with file:/some/path
query II
SELECT file[:6], parse_filename(file) FROM glob(getvariable('work_dir_no_host') || '/data/*/bad_date_timestamp_mix.csv')
----
file:/ bad_date_timestamp_mix.csv
# Test glob with file:///some/path
query II
SELECT file[:8], parse_filename(file) FROM glob(getvariable('work_dir_triple_slash') || '/data/*/bad_date_timestamp_mix.csv')
----
file:/// bad_date_timestamp_mix.csv
# Test glob with file://localhost/some/path/to/duckdb/repo
query II
SELECT file[:17], parse_filename(file) FROM glob(getvariable('work_dir_localhost') || '/data/*/bad_date_timestamp_mix.csv')
----
file://localhost/ bad_date_timestamp_mix.csv
# Test scanning multiple files using glob with file:/some/path
query III
SELECT id, filename[:6], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_no_host') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file:/ test.csv
2 file:/ test.csv
# Test scanning multiple files using glob with file:///some/path
query III
SELECT id, filename[:8], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file:/// test.csv
2 file:/// test.csv
# Test scanning multiple files using glob with file://localhost/some/path
query III
SELECT id, filename[:17], parse_filename(filename) FROM read_csv_auto(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', filename=1) ORDER BY id
----
1 file://localhost/ test.csv
2 file://localhost/ test.csv
require noforcestorage
# Ensure secrets work correctly using the file://
statement ok
create secret secret_file_url_tripleslash (TYPE HTTP, scope 'file:///');
statement ok
create secret secret_file_url_localhost (TYPE HTTP, scope 'file://localhost/');
statement ok
create secret secret_without_file_path (TYPE HTTP);
query I
SELECT name FROM which_secret(getvariable('work_dir_triple_slash') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_file_url_tripleslash
query I
SELECT name FROM which_secret(getvariable('work_dir_localhost') || '/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_file_url_localhost
# raw paths now do not match
query I
SELECT name FROM which_secret('__WORKING_DIRECTORY__/data/csv/hive-partitioning/simple/*/*/test.csv', 'http')
----
secret_without_file_path