soy cooked pt 2

This commit is contained in:
2026-01-19 23:21:35 -06:00
parent 16a4f1d2b9
commit 2cbd178ba8

View File

@@ -2,7 +2,7 @@ import os
import json import json
import math import math
import numpy as np import numpy as np
import pandas as pd import polars as pl
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
@@ -56,7 +56,7 @@ TOTAL_POINTS_PER_FRAME = NUM_HAND_POINTS + NUM_FACE_POINTS
# =============================== # ===============================
# ENHANCED DATA EXTRACTION (FIXED) # ENHANCED DATA EXTRACTION (POLARS)
# =============================== # ===============================
def extract_multi_landmarks(path, min_valid_frames=3): def extract_multi_landmarks(path, min_valid_frames=3):
""" """
@@ -64,13 +64,13 @@ def extract_multi_landmarks(path, min_valid_frames=3):
Returns: dict with 'landmarks', 'left_hand_valid', 'right_hand_valid', 'face_valid' Returns: dict with 'landmarks', 'left_hand_valid', 'right_hand_valid', 'face_valid'
""" """
try: try:
df = pd.read_parquet(path) df = pl.read_parquet(path)
seq = [] seq = []
left_valid_frames = [] left_valid_frames = []
right_valid_frames = [] right_valid_frames = []
face_valid_frames = [] face_valid_frames = []
all_types = df["type"].unique() all_types = df.select("type").unique().to_series().to_list()
# Check if we have at least one of the required types # Check if we have at least one of the required types
has_data = any(t in all_types for t in ["left_hand", "right_hand", "face"]) has_data = any(t in all_types for t in ["left_hand", "right_hand", "face"])
@@ -78,13 +78,13 @@ def extract_multi_landmarks(path, min_valid_frames=3):
return None return None
# Get all frames (might not start at 0) # Get all frames (might not start at 0)
frames = sorted(df["frame"].unique()) frames = sorted(df.select("frame").unique().to_series().to_list())
if len(frames) < min_valid_frames: if len(frames) < min_valid_frames:
return None return None
for frame in frames: for frame in frames:
frame_df = df[df["frame"] == frame] frame_df = df.filter(pl.col("frame") == frame)
frame_points = np.full((TOTAL_POINTS_PER_FRAME, 3), np.nan, dtype=np.float32) frame_points = np.full((TOTAL_POINTS_PER_FRAME, 3), np.nan, dtype=np.float32)
pos = 0 pos = 0
@@ -93,48 +93,54 @@ def extract_multi_landmarks(path, min_valid_frames=3):
face_valid = False face_valid = False
# Left hand (need at least 10 valid points) # Left hand (need at least 10 valid points)
left = frame_df[frame_df["type"] == "left_hand"] left = frame_df.filter(pl.col("type") == "left_hand")
if len(left) > 0: if left.height > 0:
valid_count = 0 valid_count = 0
for i in range(21): for i in range(21):
row = left[left["landmark_index"] == i] row = left.filter(pl.col("landmark_index") == i)
if len(row) > 0 and row[['x', 'y', 'z']].notna().all().all(): if row.height > 0:
frame_points[pos] = row[['x', 'y', 'z']].values[0] coords = row.select(["x", "y", "z"]).row(0)
if all(c is not None for c in coords):
frame_points[pos] = coords
valid_count += 1 valid_count += 1
pos += 1 pos += 1
left_valid = (valid_count >= 10) # Relaxed from 15 left_valid = (valid_count >= 10)
else: else:
pos += 21 pos += 21
# Right hand (need at least 10 valid points) # Right hand (need at least 10 valid points)
right = frame_df[frame_df["type"] == "right_hand"] right = frame_df.filter(pl.col("type") == "right_hand")
if len(right) > 0: if right.height > 0:
valid_count = 0 valid_count = 0
for i in range(21): for i in range(21):
row = right[right["landmark_index"] == i] row = right.filter(pl.col("landmark_index") == i)
if len(row) > 0 and row[['x', 'y', 'z']].notna().all().all(): if row.height > 0:
frame_points[pos] = row[['x', 'y', 'z']].values[0] coords = row.select(["x", "y", "z"]).row(0)
if all(c is not None for c in coords):
frame_points[pos] = coords
valid_count += 1 valid_count += 1
pos += 1 pos += 1
right_valid = (valid_count >= 10) # Relaxed from 15 right_valid = (valid_count >= 10)
else: else:
pos += 21 pos += 21
# Face (need at least 30% of selected landmarks) # Face (need at least 30% of selected landmarks)
face = frame_df[frame_df["type"] == "face"] face = frame_df.filter(pl.col("type") == "face")
if len(face) > 0: if face.height > 0:
valid_count = 0 valid_count = 0
for idx in IMPORTANT_FACE_INDICES: for idx in IMPORTANT_FACE_INDICES:
row = face[face["landmark_index"] == idx] row = face.filter(pl.col("landmark_index") == idx)
if len(row) > 0 and row[['x', 'y', 'z']].notna().all().all(): if row.height > 0:
frame_points[pos] = row[['x', 'y', 'z']].values[0] coords = row.select(["x", "y", "z"]).row(0)
if all(c is not None for c in coords):
frame_points[pos] = coords
valid_count += 1 valid_count += 1
pos += 1 pos += 1
face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.3) # Relaxed from 0.5 face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.3)
# Accept frame if we have at least 20% valid data overall # Accept frame if we have at least 20% valid data overall
valid_ratio = 1 - np.isnan(frame_points).mean() valid_ratio = 1 - np.isnan(frame_points).mean()
if valid_ratio >= 0.20: # Relaxed from 0.40 if valid_ratio >= 0.20:
frame_points = np.nan_to_num(frame_points, nan=0.0) frame_points = np.nan_to_num(frame_points, nan=0.0)
seq.append(frame_points) seq.append(frame_points)
left_valid_frames.append(left_valid) left_valid_frames.append(left_valid)
@@ -328,9 +334,9 @@ class ModalityAwareTransformer(nn.Module):
def load_kaggle_asl_data(base_path): def load_kaggle_asl_data(base_path):
"""Load training metadata""" """Load training metadata using Polars"""
train_path = os.path.join(base_path, "train.csv") train_path = os.path.join(base_path, "train.csv")
train_df = pd.read_csv(train_path) train_df = pl.read_csv(train_path)
return train_df, None return train_df, None
@@ -345,10 +351,10 @@ def main():
print("\nLoading metadata...") print("\nLoading metadata...")
train_df, _ = load_kaggle_asl_data(base_path) train_df, _ = load_kaggle_asl_data(base_path)
print(f"Total samples in train.csv: {len(train_df)}") print(f"Total samples in train.csv: {train_df.height}")
# Convert to simple tuples for multiprocessing compatibility # Convert to simple tuples for multiprocessing compatibility
rows = [(row['path'], row['sign']) for _, row in train_df.iterrows()] rows = [(row[0], row[1]) for row in train_df.select(["path", "sign"]).iter_rows()]
print("\nProcessing sequences with BOTH hands + FACE (enhanced)...") print("\nProcessing sequences with BOTH hands + FACE (enhanced)...")
print("This may take a few minutes...") print("This may take a few minutes...")