diff --git a/actual_test.py b/actual_test.py new file mode 100644 index 0000000..739888a --- /dev/null +++ b/actual_test.py @@ -0,0 +1,26 @@ +import os +base_path = "asl_kaggle" + +# Check if train.csv exists +print(f"train.csv exists: {os.path.exists(os.path.join(base_path, 'train.csv'))}") + +# Check first few rows +import pandas as pd +train_df = pd.read_csv(os.path.join(base_path, "train.csv")) +print("\nFirst few rows:") +print(train_df.head()) + +# Check if parquet files exist +sample_path = os.path.join(base_path, train_df.iloc[0]['path']) +print(f"\nFirst parquet file exists: {os.path.exists(sample_path)}") +print(f"Full path: {sample_path}")# Test extraction on one file +test_path = os.path.join(base_path, train_df.iloc[0]['path']) +print(f"Testing: {test_path}") + +df = pd.read_parquet(test_path) +print(f"\nDataFrame shape: {df.shape}") +print(f"Columns: {df.columns.tolist()}") +print(f"Types present: {df['type'].unique()}") +print(f"Frames: {len(df['frame'].unique())}") +print(f"\nFirst few rows:") +print(df.head(10)) \ No newline at end of file diff --git a/training.py b/training.py index 4d156dd..b76ee76 100644 --- a/training.py +++ b/training.py @@ -56,9 +56,9 @@ TOTAL_POINTS_PER_FRAME = NUM_HAND_POINTS + NUM_FACE_POINTS # =============================== -# ENHANCED DATA EXTRACTION +# ENHANCED DATA EXTRACTION (FIXED) # =============================== -def extract_multi_landmarks(path, min_valid_frames=5): +def extract_multi_landmarks(path, min_valid_frames=3): """ Extract both hands + selected face landmarks with modality flags Returns: dict with 'landmarks', 'left_hand_valid', 'right_hand_valid', 'face_valid' @@ -71,12 +71,16 @@ def extract_multi_landmarks(path, min_valid_frames=5): face_valid_frames = [] all_types = df["type"].unique() - if "left_hand" in all_types or "right_hand" in all_types or "face" in all_types: - frames = sorted(df["frame"].unique()) - else: + # Check if we have at least one of the required types + has_data = any(t in all_types for t in ["left_hand", "right_hand", "face"]) + + if not has_data: return None - if frames is None or len(frames) < min_valid_frames: + # Get all frames (might not start at 0) + frames = sorted(df["frame"].unique()) + + if len(frames) < min_valid_frames: return None for frame in frames: @@ -88,9 +92,9 @@ def extract_multi_landmarks(path, min_valid_frames=5): right_valid = False face_valid = False - # Left hand + # Left hand (need at least 10 valid points) left = frame_df[frame_df["type"] == "left_hand"] - if len(left) >= 15: + if len(left) > 0: valid_count = 0 for i in range(21): row = left[left["landmark_index"] == i] @@ -98,13 +102,13 @@ def extract_multi_landmarks(path, min_valid_frames=5): frame_points[pos] = row[['x', 'y', 'z']].values[0] valid_count += 1 pos += 1 - left_valid = (valid_count >= 15) + left_valid = (valid_count >= 10) # Relaxed from 15 else: pos += 21 - # Right hand + # Right hand (need at least 10 valid points) right = frame_df[frame_df["type"] == "right_hand"] - if len(right) >= 15: + if len(right) > 0: valid_count = 0 for i in range(21): row = right[right["landmark_index"] == i] @@ -112,11 +116,11 @@ def extract_multi_landmarks(path, min_valid_frames=5): frame_points[pos] = row[['x', 'y', 'z']].values[0] valid_count += 1 pos += 1 - right_valid = (valid_count >= 15) + right_valid = (valid_count >= 10) # Relaxed from 15 else: pos += 21 - # Face + # Face (need at least 30% of selected landmarks) face = frame_df[frame_df["type"] == "face"] if len(face) > 0: valid_count = 0 @@ -126,10 +130,11 @@ def extract_multi_landmarks(path, min_valid_frames=5): frame_points[pos] = row[['x', 'y', 'z']].values[0] valid_count += 1 pos += 1 - face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.5) + face_valid = (valid_count >= len(IMPORTANT_FACE_INDICES) * 0.3) # Relaxed from 0.5 + # Accept frame if we have at least 20% valid data overall valid_ratio = 1 - np.isnan(frame_points).mean() - if valid_ratio >= 0.40: + if valid_ratio >= 0.20: # Relaxed from 0.40 frame_points = np.nan_to_num(frame_points, nan=0.0) seq.append(frame_points) left_valid_frames.append(left_valid) @@ -146,7 +151,9 @@ def extract_multi_landmarks(path, min_valid_frames=5): 'face_valid': np.array(face_valid_frames) } - except Exception: + except Exception as e: + # Uncomment for debugging: + # print(f"Error processing {path}: {e}") return None @@ -338,10 +345,14 @@ def main(): print("\nLoading metadata...") train_df, _ = load_kaggle_asl_data(base_path) + print(f"Total samples in train.csv: {len(train_df)}") + # Convert to simple tuples for multiprocessing compatibility rows = [(row['path'], row['sign']) for _, row in train_df.iterrows()] print("\nProcessing sequences with BOTH hands + FACE (enhanced)...") + print("This may take a few minutes...") + with Pool(cpu_count()) as pool: results = list(tqdm( pool.imap( @@ -354,17 +365,28 @@ def main(): )) X_list, frame_masks_list, modality_masks_list, y_list = [], [], [], [] + failed_count = 0 for feat, frame_mask, modality_mask, sign in results: if feat is not None and frame_mask is not None: X_list.append(feat) frame_masks_list.append(frame_mask) modality_masks_list.append(modality_mask) y_list.append(sign) + else: + failed_count += 1 if not X_list: - print("No valid sequences extracted!") + print(f"\nāŒ No valid sequences extracted!") + print(f"Failed to process: {failed_count}/{len(results)} files") + print("\nTroubleshooting tips:") + print("1. Check that parquet files contain 'left_hand', 'right_hand', or 'face' types") + print("2. Verify files have at least 3 frames") + print("3. Ensure landmark data is not all NaN") return + print(f"\nāœ“ Successfully processed: {len(X_list)}/{len(results)} files") + print(f"āœ— Failed: {failed_count}/{len(results)} files") + X = np.stack(X_list) frame_masks = np.stack(frame_masks_list) modality_masks = np.stack(modality_masks_list) @@ -455,6 +477,10 @@ def main(): best_acc = 0.0 epochs = 70 + print("\n" + "=" * 60) + print("TRAINING START") + print("=" * 60) + for epoch in range(epochs): model.train() total_loss = correct = total = 0 @@ -508,7 +534,9 @@ def main(): else: print() - print(f"\nBest test accuracy: {best_acc:.2f}%") + print("\n" + "=" * 60) + print(f"TRAINING COMPLETE - Best test accuracy: {best_acc:.2f}%") + print("=" * 60) if __name__ == "__main__":