soy cooked

2026-01-19 22:40:05 -06:00
parent 9256050292
commit 16a4f1d2b9
2 changed files with 72 additions and 18 deletions
--- a/actual_test.py
+++ b/actual_test.py
@@ -0,0 +1,26 @@
 import os
 base_path = "asl_kaggle"
 # Check if train.csv exists
 print(f"train.csv exists: {os.path.exists(os.path.join(base_path, 'train.csv'))}")
 # Check first few rows
 import pandas as pd
 train_df = pd.read_csv(os.path.join(base_path, "train.csv"))
 print("\nFirst few rows:")
 print(train_df.head())
 # Check if parquet files exist
 sample_path = os.path.join(base_path, train_df.iloc[0]['path'])
 print(f"\nFirst parquet file exists: {os.path.exists(sample_path)}")
 print(f"Full path: {sample_path}")# Test extraction on one file
 test_path = os.path.join(base_path, train_df.iloc[0]['path'])
 print(f"Testing: {test_path}")
 df = pd.read_parquet(test_path)
 print(f"\nDataFrame shape: {df.shape}")
 print(f"Columns: {df.columns.tolist()}")
 print(f"Types present: {df['type'].unique()}")
 print(f"Frames: {len(df['frame'].unique())}")
 print(f"\nFirst few rows:")
 print(df.head(10))
--- a/training.py
+++ b/training.py
@@ -56,9 +56,9 @@ TOTAL_POINTS_PER_FRAME = NUM_HAND_POINTS + NUM_FACE_POINTS
 # ===============================
-# ENHANCED DATA EXTRACTION
+# ENHANCED DATA EXTRACTION (FIXED)
 # ===============================
-def extract_multi_landmarks(path, min_valid_frames=5):
+def extract_multi_landmarks(path, min_valid_frames=3):
    """
    Extract both hands + selected face landmarks with modality flags
    Returns: dict with 'landmarks', 'left_hand_valid', 'right_hand_valid', 'face_valid'
@@ -71,12 +71,16 @@ def extract_multi_landmarks(path, min_valid_frames=5):
        face_valid_frames = []
        all_types = df["type"].unique()
-        if "left_hand" in all_types or "right_hand" in all_types or "face" in all_types:
+        # Check if we have at least one of the required types
-            frames = sorted(df["frame"].unique())
+        has_data = any(t in all_types for t in ["left_hand", "right_hand", "face"])
-        else:
+
        if not has_data:
            return None
-        if frames is None or len(frames) < min_valid_frames:
+        # Get all frames (might not start at 0)
        frames = sorted(df["frame"].unique())
        if len(frames) < min_valid_frames:
            return None
        for frame in frames:
@@ -88,9 +92,9 @@ def extract_multi_landmarks(path, min_valid_frames=5):
            right_valid = False
            face_valid = False
-            # Left hand
+            # Left hand (need at least 10 valid points)
            left = frame_df[frame_df["type"] == "left_hand"]
-            if len(left) >= 15:
+            if len(left) > 0:
                valid_count = 0
                for i in range(21):
                    row = left[left["landmark_index"] == i]
@@ -98,13 +102,13 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                left_valid = (valid_count >= 15)
+                left_valid = (valid_count >= 10)  # Relaxed from 15
            else:
                pos += 21
-            # Right hand
+            # Right hand (need at least 10 valid points)
            right = frame_df[frame_df["type"] == "right_hand"]
-            if len(right) >= 15:
+            if len(right) > 0:
                valid_count = 0
                for i in range(21):
                    row = right[right["landmark_index"] == i]
@@ -112,11 +116,11 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                right_valid = (valid_count >= 15)
+                right_valid = (valid_count >= 10)  # Relaxed from 15
            else:
                pos += 21
-            # Face
+            # Face (need at least 30% of selected landmarks)
            face = frame_df[frame_df["type"] == "face"]
            if len(face) > 0:
                valid_count = 0
@@ -126,10 +130,11 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.5)
+                face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.3)  # Relaxed from 0.5
            # Accept frame if we have at least 20% valid data overall
            valid_ratio = 1 - np.isnan(frame_points).mean()
-            if valid_ratio >= 0.40:
+            if valid_ratio >= 0.20:  # Relaxed from 0.40
                frame_points = np.nan_to_num(frame_points, nan=0.0)
                seq.append(frame_points)
                left_valid_frames.append(left_valid)
@@ -146,7 +151,9 @@ def extract_multi_landmarks(path, min_valid_frames=5):
            'face_valid': np.array(face_valid_frames)
        }
-    except Exception:
+    except Exception as e:
        # Uncomment for debugging:
        # print(f"Error processing {path}: {e}")
        return None
@@ -338,10 +345,14 @@ def main():
    print("\nLoading metadata...")
    train_df, _ = load_kaggle_asl_data(base_path)
    print(f"Total samples in train.csv: {len(train_df)}")
    # Convert to simple tuples for multiprocessing compatibility
    rows = [(row['path'], row['sign']) for _, row in train_df.iterrows()]
    print("\nProcessing sequences with BOTH hands + FACE (enhanced)...")
    print("This may take a few minutes...")
    with Pool(cpu_count()) as pool:
        results = list(tqdm(
            pool.imap(
@@ -354,17 +365,28 @@ def main():
        ))
    X_list, frame_masks_list, modality_masks_list, y_list = [], [], [], []
    failed_count = 0
    for feat, frame_mask, modality_mask, sign in results:
        if feat is not None and frame_mask is not None:
            X_list.append(feat)
            frame_masks_list.append(frame_mask)
            modality_masks_list.append(modality_mask)
            y_list.append(sign)
        else:
            failed_count += 1
    if not X_list:
-        print("No valid sequences extracted!")
+        print(f"\n❌ No valid sequences extracted!")
        print(f"Failed to process: {failed_count}/{len(results)} files")
        print("\nTroubleshooting tips:")
        print("1. Check that parquet files contain 'left_hand', 'right_hand', or 'face' types")
        print("2. Verify files have at least 3 frames")
        print("3. Ensure landmark data is not all NaN")
        return
    print(f"\n✓ Successfully processed: {len(X_list)}/{len(results)} files")
    print(f"✗ Failed: {failed_count}/{len(results)} files")
    X = np.stack(X_list)
    frame_masks = np.stack(frame_masks_list)
    modality_masks = np.stack(modality_masks_list)
@@ -455,6 +477,10 @@ def main():
    best_acc = 0.0
    epochs = 70
    print("\n" + "=" * 60)
    print("TRAINING START")
    print("=" * 60)
    for epoch in range(epochs):
        model.train()
        total_loss = correct = total = 0
@@ -508,7 +534,9 @@ def main():
        else:
            print()
-    print(f"\nBest test accuracy: {best_acc:.2f}%")
+    print("\n" + "=" * 60)
    print(f"TRAINING COMPLETE - Best test accuracy: {best_acc:.2f}%")
    print("=" * 60)
 if __name__ == "__main__":