Files
email-tracker/external/duckdb/test/sql/copy/parquet/parquet2.test
2025-10-24 19:21:19 -05:00

286 lines
2.3 KiB
SQL

# name: test/sql/copy/parquet/parquet2.test
# description: Issue #2261: TPC-H Q6 fails on Parquet input
# group: [parquet]
# Here's how we generate this
# from pyspark.sql import SparkSession
# from pyspark.sql.types import *
#
# spark = SparkSession.builder.master("local").config("spark.hadoop.parquet.writer.version", "v2").getOrCreate()
# sc = spark.sparkContext
#
# ref = spark.range(42, 10000, 2).toDF("id").orderBy(rand())
# ref.show(10)
#
# ref.write.parquet("p2.parquet")
require parquet
query I
SELECT id FROM 'data/parquet-testing/p2.parquet' offset 4968;
----
1436
2596
4774
4402
5378
5372
8658
808
5876
7214
9816
query I
SELECT id FROM 'data/parquet-testing/p2.parquet' limit 10;
----
2644
8534
3276
5264
5766
6018
2080
576
1350
9312
query I
SELECT id FROM 'data/parquet-testing/p2.parquet' limit 100;
----
2644
8534
3276
5264
5766
6018
2080
576
1350
9312
8898
1126
6704
2836
390
4440
7582
4386
4482
6866
7814
7246
8998
8454
2004
7770
7590
9092
7586
4762
5672
6782
3968
8102
726
3384
3232
9628
4460
556
1368
560
4116
4294
988
1404
8380
862
9172
3964
5728
8018
8052
8786
8828
8140
4044
324
7102
5898
6848
174
5240
4834
1354
5080
2386
7402
8508
2006
1270
4936
4682
436
6056
7772
2792
982
7028
8964
6632
4062
8260
9494
6260
8850
9238
7968
9430
8156
9388
478
4478
3400
370
130
552
7614
1234
5302
query I
SELECT id_with_null FROM 'data/parquet-testing/p2.parquet' limit 100;
----
2644
8534
3276
5264
5766
6018
NULL
576
NULL
9312
8898
1126
6704
2836
NULL
NULL
7582
4386
4482
6866
7814
7246
8998
8454
2004
NULL
NULL
9092
7586
4762
5672
6782
3968
8102
726
3384
3232
9628
NULL
556
1368
NULL
4116
4294
988
1404
NULL
862
9172
3964
5728
8018
8052
8786
8828
NULL
4044
324
7102
5898
6848
174
NULL
4834
1354
NULL
2386
7402
8508
2006
NULL
4936
4682
436
6056
7772
2792
982
7028
8964
6632
4062
NULL
9494
NULL
NULL
9238
7968
NULL
8156
9388
478
4478
NULL
NULL
NULL
552
7614
1234
5302
query IIIIIIII
select min(id), max(id), sum(id), count(id), min(id_with_null), max(id_with_null), sum(id_with_null), count(id_with_null) from 'data/parquet-testing/p2.parquet'
----
42 9998 24994580 4979 42 9998 19999680 3984
query IIII
select min(id_int), max(id_int), sum(id_int), count(id_int) from 'data/parquet-testing/p2.parquet'
----
42 9998 19999680 3984
# from bug 2882
query I
select * from 'data/parquet-testing/7-set.snappy.arrow2.parquet';
----
0
1
2
3
4
5
6