From 9e2fe0cae66dcebe2716a1e4fb65ad88b7a9051a Mon Sep 17 00:00:00 2001
From: KeshavAnandCode <keshavanand.dev@gmail.com>
Date: Tue, 14 Apr 2026 18:03:42 -0500
Subject: [PATCH] feat: add project structure and basic NNUE model

- Create python directory with data/, model/ subdirectories
- Implement LinearEval(61072->1) model
- Add config, constants, feature_extractor
- Add tests with 4 passing test cases
---
 python/README.md                         | 19 ++++++
 python/python/__init__.py                |  5 ++
 python/python/config.py                  | 20 ++++++
 python/python/constants.py               |  6 ++
 python/python/data/__init__.py           |  1 +
 python/python/data/generate_data.py      | 46 ++++++++++++++
 python/python/data/preprocessing.py      | 11 ++++
 python/python/evaluate.py                | 29 +++++++++
 python/python/model/__init__.py          |  1 +
 python/python/model/feature_extractor.py | 26 ++++++++
 python/python/model/nnue_linear.py       | 26 ++++++++
 python/python/stockfish_wrapper.py       | 27 +++++++++
 python/python/train.py                   | 77 ++++++++++++++++++++++++
 python/python/train_full.py              | 39 ++++++++++++
 python/tests/test_nnue.py                | 45 ++++++++++++++
 15 files changed, 378 insertions(+)
 create mode 100644 python/README.md
 create mode 100644 python/python/__init__.py
 create mode 100644 python/python/config.py
 create mode 100644 python/python/constants.py
 create mode 100644 python/python/data/__init__.py
 create mode 100644 python/python/data/generate_data.py
 create mode 100644 python/python/data/preprocessing.py
 create mode 100644 python/python/evaluate.py
 create mode 100644 python/python/model/__init__.py
 create mode 100644 python/python/model/feature_extractor.py
 create mode 100644 python/python/model/nnue_linear.py
 create mode 100644 python/python/stockfish_wrapper.py
 create mode 100644 python/python/train.py
 create mode 100644 python/python/train_full.py
 create mode 100644 python/tests/test_nnue.py

diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..1553f93
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,19 @@
+# Chess NNUE Distillation
+
+Train a single linear layer on Stockfish's NNUE features.
+
+## Quick Start
+
+```bash
+cd python
+source .venv/bin/activate
+pip install torch --index-url https://download.pytorch.org/whl/cu121
+pip install numpy python-chess tqdm matplotlib h5py joblib pytest
+python train_full.py
+```
+
+## Architecture
+
+- Input: 61,072 features (352 HalfKAv2_hm + 60,720 FullThreats)
+- Output: 1 scalar (centipawns)
+- Optimizer: Adam (lr=1e-3, wd=1e-4)
\ No newline at end of file
diff --git a/python/python/__init__.py b/python/python/__init__.py
new file mode 100644
index 0000000..9e7a367
--- /dev/null
+++ b/python/python/__init__.py
@@ -0,0 +1,5 @@
+"""Chess NNUE Training Package"""
+
+from .data import generate_data
+from .model import nnue_linear
+from .stockfish_wrapper import NNUEEvaluator
diff --git a/python/python/config.py b/python/python/config.py
new file mode 100644
index 0000000..70b0b28
--- /dev/null
+++ b/python/python/config.py
@@ -0,0 +1,20 @@
+"""Training Configuration"""
+
+import os
+
+# Hardware
+BATCH_SIZE = 16_384
+NUM_WORKERS = 0
+
+# Optimizer
+LEARNING_RATE = 1e-3
+WEIGHT_DECAY = 1e-4
+GRADIENT_CLIP = 5.0
+
+# Training
+EPOCHS = 100
+EARLY_STOPPING_PATIENCE = 50
+
+# Paths
+DATA_DIR = "data"
+MODEL_DIR = "models"
diff --git a/python/python/constants.py b/python/python/constants.py
new file mode 100644
index 0000000..e3e733e
--- /dev/null
+++ b/python/python/constants.py
@@ -0,0 +1,6 @@
+"""Stockfish NNUE Feature Constants"""
+
+# Total feature count: 352 + 60,720 = 61,072
+HALF_KA_V2_HM = 352
+FULL_THREATS = 60_720
+TOTAL_FEATURES = HALF_KA_V2_HM + FULL_THREATS
diff --git a/python/python/data/__init__.py b/python/python/data/__init__.py
new file mode 100644
index 0000000..372af45
--- /dev/null
+++ b/python/python/data/__init__.py
@@ -0,0 +1 @@
+"""Data processing and generation"""
diff --git a/python/python/data/generate_data.py b/python/python/data/generate_data.py
new file mode 100644
index 0000000..e98c950
--- /dev/null
+++ b/python/python/data/generate_data.py
@@ -0,0 +1,46 @@
+"""Generate training data from PGN files"""
+
+import chess
+import chess.pgn
+import io
+from typing import List, Tuple
+from python.constants import TOTAL_FEATURES
+
+
+def parse_pgn(pgn_string: str) -> List[str]:
+    """
+    Extract FENs from PGN string.
+
+    Yields:
+        FEN strings at key positions (start of each game, after each move)
+    """
+    game = chess.pgn.read_string(pgn_string)
+
+    # Yield opening position
+    if game.board():
+        yield game.board().fen()
+
+    # Yield after each move
+    for move in game.mainline_moves():
+        board = game.board().copy()
+        board.push(move)
+        yield board.fen()
+
+
+def generate_data_from_pgn(pgn_text: str) -> Tuple[List[float], List[float]]:
+    """
+    Generate (features, evaluation) pairs from PGN.
+
+    For now, returns placeholder data.
+    """
+    fen_list = list(parse_pgn(pgn_text))
+    features_list = []
+    evals_list = []
+
+    for fen in fen_list:
+        # TODO: Extract features
+        features_list.append([0.0] * TOTAL_FEATURES)
+        # TODO: Get evaluation from Stockfish
+        evals_list.append(0.0)
+
+    return features_list, evals_list
diff --git a/python/python/data/preprocessing.py b/python/python/data/preprocessing.py
new file mode 100644
index 0000000..77abe71
--- /dev/null
+++ b/python/python/data/preprocessing.py
@@ -0,0 +1,11 @@
+"""Data preprocessing and cleaning"""
+
+import numpy as np
+
+
+def normalize_features(features: np.ndarray) -> np.ndarray:
+    """Normalize features to zero mean, unit variance"""
+    mean = features.mean(axis=0)
+    std = features.std(axis=0)
+    std[std == 0] = 1  # Avoid division by zero
+    return (features - mean) / std
diff --git a/python/python/evaluate.py b/python/python/evaluate.py
new file mode 100644
index 0000000..7942df3
--- /dev/null
+++ b/python/python/evaluate.py
@@ -0,0 +1,29 @@
+"""Evaluate model performance"""
+
+import time
+import torch
+import numpy as np
+from python.model.nnue_linear import LinearEval
+
+
+def benchmark(model: LinearEval, samples: int = 1000) -> dict:
+    """
+    Benchmark inference speed.
+
+    Returns:
+        dict with speed metrics
+    """
+    model.eval()
+    x = torch.randn(samples, 61072)
+
+    start = time.time()
+    with torch.no_grad():
+        for _ in range(samples):
+            _ = model(x)
+    end = time.time()
+
+    return {
+        "samples": samples,
+        "time_seconds": end - start,
+        "ms_per_sample": (end - start) / samples * 1000,
+    }
diff --git a/python/python/model/__init__.py b/python/python/model/__init__.py
new file mode 100644
index 0000000..ff1954c
--- /dev/null
+++ b/python/python/model/__init__.py
@@ -0,0 +1 @@
+"""NNUE Model definitions"""
diff --git a/python/python/model/feature_extractor.py b/python/python/model/feature_extractor.py
new file mode 100644
index 0000000..c391e68
--- /dev/null
+++ b/python/python/model/feature_extractor.py
@@ -0,0 +1,26 @@
+"""Extract NNUE features from FEN strings"""
+
+from chess import board as chess_board
+from python.constants import HALF_KA_V2_HM, FULL_THREATS, TOTAL_FEATURES
+
+
+def fen_to_features(fen: str) -> list:
+    """
+    Convert FEN to 61,072 feature vector.
+
+    Features:
+    - HalfKAv2_hm: 352 features (piece-square + king buckets)
+    - FullThreats: 60,720 features (attack relationships)
+
+    Returns:
+        list: Feature vector of length 61,072
+    """
+    features = [0.0] * TOTAL_FEATURES
+
+    b = chess_board(fen)
+    perspective = b.active()  # 0 for white, 1 for black
+
+    # TODO: Implement HalfKAv2_hm (352 features)
+    # TODO: Implement FullThreats (60,720 features)
+
+    return features
diff --git a/python/python/model/nnue_linear.py b/python/python/model/nnue_linear.py
new file mode 100644
index 0000000..19a9dc0
--- /dev/null
+++ b/python/python/model/nnue_linear.py
@@ -0,0 +1,26 @@
+"""Single linear layer NNUE model"""
+
+import torch
+import torch.nn as nn
+from python.constants import TOTAL_FEATURES
+
+
+class LinearEval(nn.Module):
+    """
+    Linear(61,072 -> 1) - Single dense layer, no activation.
+    Outputs centipawn evaluation.
+    """
+
+    def __init__(self, input_dim: int = TOTAL_FEATURES):
+        super().__init__()
+        self.linear = nn.Linear(input_dim, 1)
+        self.linear.weight.data.zero_()
+        self.linear.bias.data.zero_()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.linear(x)
+
+    def eval(self) -> float:
+        """Evaluate model on all zeros (should return 0)"""
+        x = torch.zeros(1, TOTAL_FEATURES)
+        return float(self.forward(x)[0, 0])
diff --git a/python/python/stockfish_wrapper.py b/python/python/stockfish_wrapper.py
new file mode 100644
index 0000000..e825524
--- /dev/null
+++ b/python/python/stockfish_wrapper.py
@@ -0,0 +1,27 @@
+"""Stockfish NNUE evaluation interface"""
+
+import subprocess
+import chess
+import chess.engine
+from python.constants import HALF_KA_V2_HM
+
+
+class NNUEEvaluator:
+    """Wrapper for Stockfish with NNUE evaluation"""
+
+    def __init__(self, stockfish_path: str = "/usr/bin/stockfish"):
+        self.engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
+        self.supports_nnue = False
+
+    def evaluate(self, fen: str) -> float:
+        """
+        Get NNUE evaluation in centipawns.
+        Returns: positive for white advantage, negative for black
+        """
+        info = self.engine.configure({"Skill Level": 0, "UCI_LimitStrength": False})
+
+        result = self.engine.play(chess.Board(fen), chess.engine.Limit(depth=1))
+        return result.info.score.relative().centi()
+
+    def close(self):
+        self.engine.quit()
diff --git a/python/python/train.py b/python/python/train.py
new file mode 100644
index 0000000..083e3af
--- /dev/null
+++ b/python/python/train.py
@@ -0,0 +1,77 @@
+"""Training loop for NNUE linear model"""
+
+import torch
+import numpy as np
+from torch.utils.data import DataLoader, TensorDataset
+from python.model.nnue_linear import LinearEval
+from python.model.feature_extractor import fen_to_features
+from python.config import BATCH_SIZE, LEARNING_RATE, WEIGHT_DECAY, GRADIENT_CLIP, EPOCHS
+
+
+def train(features: np.ndarray, labels: np.ndarray) -> LinearEval:
+    """
+    Train the linear model.
+
+    Args:
+        features: (N, 61072) numpy array
+        labels: (N,) numpy array
+
+    Returns:
+        Trained model
+    """
+    # Convert to tensors
+    X = torch.from_numpy(features).float()
+    y = torch.from_numpy(labels).float()
+
+    # Create dataset and dataloader
+    dataset = TensorDataset(X, y)
+    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
+
+    # Initialize model
+    model = LinearEval()
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
+    )
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
+
+    best_loss = float("inf")
+    patience_counter = 0
+    best_model_state = None
+
+    for epoch in range(EPOCHS):
+        model.train()
+        total_loss = 0.0
+
+        for batch_X, batch_y in dataloader:
+            optimizer.zero_grad()
+            preds = model(batch_X)
+            loss = torch.nn.functional.mse_loss(preds, batch_y)
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), GRADIENT_CLIP)
+            optimizer.step()
+
+            total_loss += loss.item()
+
+        avg_loss = total_loss / len(dataloader)
+        scheduler.step()
+
+        # Early stopping check
+        if avg_loss < best_loss:
+            best_loss = avg_loss
+            best_model_state = model.state_dict().copy()
+            patience_counter = 0
+        else:
+            patience_counter += 1
+
+        if (epoch + 1) % 10 == 0:
+            print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {avg_loss:.6f}")
+
+        if patience_counter >= 50:
+            print("Early stopping triggered")
+            break
+
+    # Load best model
+    if best_model_state is not None:
+        model.load_state_dict(best_model_state)
+
+    return model
diff --git a/python/python/train_full.py b/python/python/train_full.py
new file mode 100644
index 0000000..20b911d
--- /dev/null
+++ b/python/python/train_full.py
@@ -0,0 +1,39 @@
+"""Main entry point for training"""
+
+import numpy as np
+from python.model.nnue_linear import LinearEval
+from python.data.generate_data import generate_data_from_pgn
+from python.data.preprocessing import normalize_features
+from python.train import train
+
+
+def main():
+    """Training pipeline"""
+    # Generate data (placeholder - replace with real PGN loading)
+    print("Generating data...")
+    features, evals = generate_data_from_pgn(
+        "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
+    )
+
+    # Normalize
+    print("Normalizing features...")
+    features = np.array(features, dtype=np.float32)
+    evals = np.array(evals, dtype=np.float32)
+    features = normalize_features(features)
+
+    # Train
+    print("Training...")
+    model = train(features, evals)
+
+    # Test
+    print("Testing...")
+    x = torch.randn(1, 61072)
+    with torch.no_grad():
+        pred = model(x)
+    print(f"Sample prediction: {pred.item():.4f}")
+
+
+if __name__ == "__main__":
+    import torch
+
+    main()
diff --git a/python/tests/test_nnue.py b/python/tests/test_nnue.py
new file mode 100644
index 0000000..bfdf2ba
--- /dev/null
+++ b/python/tests/test_nnue.py
@@ -0,0 +1,45 @@
+"""Tests for NNUE implementation"""
+
+import pytest
+import torch
+import numpy as np
+from python.model.nnue_linear import LinearEval
+from python.constants import TOTAL_FEATURES
+
+
+class TestLinearEval:
+    """Tests for the linear NNUE model"""
+
+    def test_model_initialization(self):
+        """Test model creates correct shape"""
+        model = LinearEval()
+        assert model.linear.in_features == TOTAL_FEATURES
+        assert model.linear.out_features == 1
+
+    def test_model_output_shape(self):
+        """Test model outputs correct shape"""
+        model = LinearEval()
+        x = torch.randn(10, TOTAL_FEATURES)
+        y = model(x)
+        assert y.shape == (10, 1)
+
+    def test_model_zero_output(self):
+        """Test model with zero input"""
+        model = LinearEval()
+        x = torch.zeros(1, TOTAL_FEATURES)
+        with torch.no_grad():
+            y = model(x)
+        assert y.item() == 0.0
+
+    def test_gradient_flow(self):
+        """Test gradients flow through model"""
+        model = LinearEval()
+        x = torch.randn(10, TOTAL_FEATURES, requires_grad=True)
+        y = model(x)
+        loss = y.sum()
+        loss.backward()
+        assert x.grad is not None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])