26 lines
877 B
Python
26 lines
877 B
Python
import os
|
|
base_path = "asl_kaggle"
|
|
|
|
# Check if train.csv exists
|
|
print(f"train.csv exists: {os.path.exists(os.path.join(base_path, 'train.csv'))}")
|
|
|
|
# Check first few rows
|
|
import pandas as pd
|
|
train_df = pd.read_csv(os.path.join(base_path, "train.csv"))
|
|
print("\nFirst few rows:")
|
|
print(train_df.head())
|
|
|
|
# Check if parquet files exist
|
|
sample_path = os.path.join(base_path, train_df.iloc[0]['path'])
|
|
print(f"\nFirst parquet file exists: {os.path.exists(sample_path)}")
|
|
print(f"Full path: {sample_path}")# Test extraction on one file
|
|
test_path = os.path.join(base_path, train_df.iloc[0]['path'])
|
|
print(f"Testing: {test_path}")
|
|
|
|
df = pd.read_parquet(test_path)
|
|
print(f"\nDataFrame shape: {df.shape}")
|
|
print(f"Columns: {df.columns.tolist()}")
|
|
print(f"Types present: {df['type'].unique()}")
|
|
print(f"Frames: {len(df['frame'].unique())}")
|
|
print(f"\nFirst few rows:")
|
|
print(df.head(10)) |