soy cooked

2026-01-19 22:40:05 -06:00
parent 9256050292
commit 16a4f1d2b9
2 changed files with 72 additions and 18 deletions
--- a/actual_test.py
+++ b/actual_test.py
@@ -0,0 +1,26 @@
+import os
+base_path = "asl_kaggle"
+
+# Check if train.csv exists
+print(f"train.csv exists: {os.path.exists(os.path.join(base_path, 'train.csv'))}")
+
+# Check first few rows
+import pandas as pd
+train_df = pd.read_csv(os.path.join(base_path, "train.csv"))
+print("\nFirst few rows:")
+print(train_df.head())
+
+# Check if parquet files exist
+sample_path = os.path.join(base_path, train_df.iloc[0]['path'])
+print(f"\nFirst parquet file exists: {os.path.exists(sample_path)}")
+print(f"Full path: {sample_path}")# Test extraction on one file
+test_path = os.path.join(base_path, train_df.iloc[0]['path'])
+print(f"Testing: {test_path}")
+
+df = pd.read_parquet(test_path)
+print(f"\nDataFrame shape: {df.shape}")
+print(f"Columns: {df.columns.tolist()}")
+print(f"Types present: {df['type'].unique()}")
+print(f"Frames: {len(df['frame'].unique())}")
+print(f"\nFirst few rows:")
+print(df.head(10))
--- a/training.py
+++ b/training.py
@@ -56,9 +56,9 @@ TOTAL_POINTS_PER_FRAME = NUM_HAND_POINTS + NUM_FACE_POINTS


 # ===============================
-# ENHANCED DATA EXTRACTION
+# ENHANCED DATA EXTRACTION (FIXED)
 # ===============================
-def extract_multi_landmarks(path, min_valid_frames=5):
+def extract_multi_landmarks(path, min_valid_frames=3):
    """
    Extract both hands + selected face landmarks with modality flags
    Returns: dict with 'landmarks', 'left_hand_valid', 'right_hand_valid', 'face_valid'
@@ -71,12 +71,16 @@ def extract_multi_landmarks(path, min_valid_frames=5):
        face_valid_frames = []

        all_types = df["type"].unique()
-        if "left_hand" in all_types or "right_hand" in all_types or "face" in all_types:
-            frames = sorted(df["frame"].unique())
-        else:
+        # Check if we have at least one of the required types
+        has_data = any(t in all_types for t in ["left_hand", "right_hand", "face"])
+
+        if not has_data:
            return None

-        if frames is None or len(frames) < min_valid_frames:
+        # Get all frames (might not start at 0)
+        frames = sorted(df["frame"].unique())
+
+        if len(frames) < min_valid_frames:
            return None

        for frame in frames:
@@ -88,9 +92,9 @@ def extract_multi_landmarks(path, min_valid_frames=5):
            right_valid = False
            face_valid = False

-            # Left hand
+            # Left hand (need at least 10 valid points)
            left = frame_df[frame_df["type"] == "left_hand"]
-            if len(left) >= 15:
+            if len(left) > 0:
                valid_count = 0
                for i in range(21):
                    row = left[left["landmark_index"] == i]
@@ -98,13 +102,13 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                left_valid = (valid_count >= 15)
+                left_valid = (valid_count >= 10)  # Relaxed from 15
            else:
                pos += 21

-            # Right hand
+            # Right hand (need at least 10 valid points)
            right = frame_df[frame_df["type"] == "right_hand"]
-            if len(right) >= 15:
+            if len(right) > 0:
                valid_count = 0
                for i in range(21):
                    row = right[right["landmark_index"] == i]
@@ -112,11 +116,11 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                right_valid = (valid_count >= 15)
+                right_valid = (valid_count >= 10)  # Relaxed from 15
            else:
                pos += 21

-            # Face
+            # Face (need at least 30% of selected landmarks)
            face = frame_df[frame_df["type"] == "face"]
            if len(face) > 0:
                valid_count = 0
@@ -126,10 +130,11 @@ def extract_multi_landmarks(path, min_valid_frames=5):
                        frame_points[pos] = row[['x', 'y', 'z']].values[0]
                        valid_count += 1
                    pos += 1
-                face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.5)
+                face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.3)  # Relaxed from 0.5

+            # Accept frame if we have at least 20% valid data overall
            valid_ratio = 1 - np.isnan(frame_points).mean()
-            if valid_ratio >= 0.40:
+            if valid_ratio >= 0.20:  # Relaxed from 0.40
                frame_points = np.nan_to_num(frame_points, nan=0.0)
                seq.append(frame_points)
                left_valid_frames.append(left_valid)
@@ -146,7 +151,9 @@ def extract_multi_landmarks(path, min_valid_frames=5):
            'face_valid': np.array(face_valid_frames)
        }

-    except Exception:
+    except Exception as e:
+        # Uncomment for debugging:
+        # print(f"Error processing {path}: {e}")
        return None


@@ -338,10 +345,14 @@ def main():
    print("\nLoading metadata...")
    train_df, _ = load_kaggle_asl_data(base_path)

+    print(f"Total samples in train.csv: {len(train_df)}")
+
    # Convert to simple tuples for multiprocessing compatibility
    rows = [(row['path'], row['sign']) for _, row in train_df.iterrows()]

    print("\nProcessing sequences with BOTH hands + FACE (enhanced)...")
+    print("This may take a few minutes...")
+
    with Pool(cpu_count()) as pool:
        results = list(tqdm(
            pool.imap(
@@ -354,17 +365,28 @@ def main():
        ))

    X_list, frame_masks_list, modality_masks_list, y_list = [], [], [], []
+    failed_count = 0
    for feat, frame_mask, modality_mask, sign in results:
        if feat is not None and frame_mask is not None:
            X_list.append(feat)
            frame_masks_list.append(frame_mask)
            modality_masks_list.append(modality_mask)
            y_list.append(sign)
+        else:
+            failed_count += 1

    if not X_list:
-        print("No valid sequences extracted!")
+        print(f"\n❌ No valid sequences extracted!")
+        print(f"Failed to process: {failed_count}/{len(results)} files")
+        print("\nTroubleshooting tips:")
+        print("1. Check that parquet files contain 'left_hand', 'right_hand', or 'face' types")
+        print("2. Verify files have at least 3 frames")
+        print("3. Ensure landmark data is not all NaN")
        return

+    print(f"\n✓ Successfully processed: {len(X_list)}/{len(results)} files")
+    print(f"✗ Failed: {failed_count}/{len(results)} files")
+
    X = np.stack(X_list)
    frame_masks = np.stack(frame_masks_list)
    modality_masks = np.stack(modality_masks_list)
@@ -455,6 +477,10 @@ def main():
    best_acc = 0.0
    epochs = 70

+    print("\n" + "=" * 60)
+    print("TRAINING START")
+    print("=" * 60)
+
    for epoch in range(epochs):
        model.train()
        total_loss = correct = total = 0
@@ -508,7 +534,9 @@ def main():
        else:
            print()

-    print(f"\nBest test accuracy: {best_acc:.2f}%")
+    print("\n" + "=" * 60)
+    print(f"TRAINING COMPLETE - Best test accuracy: {best_acc:.2f}%")
+    print("=" * 60)


 if __name__ == "__main__":