feat: add project structure and basic NNUE model
- Create python directory with data/, model/ subdirectories - Implement LinearEval(61072->1) model - Add config, constants, feature_extractor - Add tests with 4 passing test cases
This commit is contained in:
19
python/README.md
Normal file
19
python/README.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Chess NNUE Distillation
|
||||||
|
|
||||||
|
Train a single linear layer on Stockfish's NNUE features.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd python
|
||||||
|
source .venv/bin/activate
|
||||||
|
pip install torch --index-url https://download.pytorch.org/whl/cu121
|
||||||
|
pip install numpy python-chess tqdm matplotlib h5py joblib pytest
|
||||||
|
python train_full.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- Input: 61,072 features (352 HalfKAv2_hm + 60,720 FullThreats)
|
||||||
|
- Output: 1 scalar (centipawns)
|
||||||
|
- Optimizer: Adam (lr=1e-3, wd=1e-4)
|
||||||
5
python/python/__init__.py
Normal file
5
python/python/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Chess NNUE Training Package"""
|
||||||
|
|
||||||
|
from .data import generate_data
|
||||||
|
from .model import nnue_linear
|
||||||
|
from .stockfish_wrapper import NNUEEvaluator
|
||||||
20
python/python/config.py
Normal file
20
python/python/config.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Training Configuration"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Hardware
|
||||||
|
BATCH_SIZE = 16_384
|
||||||
|
NUM_WORKERS = 0
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
LEARNING_RATE = 1e-3
|
||||||
|
WEIGHT_DECAY = 1e-4
|
||||||
|
GRADIENT_CLIP = 5.0
|
||||||
|
|
||||||
|
# Training
|
||||||
|
EPOCHS = 100
|
||||||
|
EARLY_STOPPING_PATIENCE = 50
|
||||||
|
|
||||||
|
# Paths
|
||||||
|
DATA_DIR = "data"
|
||||||
|
MODEL_DIR = "models"
|
||||||
6
python/python/constants.py
Normal file
6
python/python/constants.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
"""Stockfish NNUE Feature Constants"""
|
||||||
|
|
||||||
|
# Total feature count: 352 + 60,720 = 61,072
|
||||||
|
HALF_KA_V2_HM = 352
|
||||||
|
FULL_THREATS = 60_720
|
||||||
|
TOTAL_FEATURES = HALF_KA_V2_HM + FULL_THREATS
|
||||||
1
python/python/data/__init__.py
Normal file
1
python/python/data/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Data processing and generation"""
|
||||||
46
python/python/data/generate_data.py
Normal file
46
python/python/data/generate_data.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
"""Generate training data from PGN files"""
|
||||||
|
|
||||||
|
import chess
|
||||||
|
import chess.pgn
|
||||||
|
import io
|
||||||
|
from typing import List, Tuple
|
||||||
|
from python.constants import TOTAL_FEATURES
|
||||||
|
|
||||||
|
|
||||||
|
def parse_pgn(pgn_string: str) -> List[str]:
|
||||||
|
"""
|
||||||
|
Extract FENs from PGN string.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
FEN strings at key positions (start of each game, after each move)
|
||||||
|
"""
|
||||||
|
game = chess.pgn.read_string(pgn_string)
|
||||||
|
|
||||||
|
# Yield opening position
|
||||||
|
if game.board():
|
||||||
|
yield game.board().fen()
|
||||||
|
|
||||||
|
# Yield after each move
|
||||||
|
for move in game.mainline_moves():
|
||||||
|
board = game.board().copy()
|
||||||
|
board.push(move)
|
||||||
|
yield board.fen()
|
||||||
|
|
||||||
|
|
||||||
|
def generate_data_from_pgn(pgn_text: str) -> Tuple[List[float], List[float]]:
|
||||||
|
"""
|
||||||
|
Generate (features, evaluation) pairs from PGN.
|
||||||
|
|
||||||
|
For now, returns placeholder data.
|
||||||
|
"""
|
||||||
|
fen_list = list(parse_pgn(pgn_text))
|
||||||
|
features_list = []
|
||||||
|
evals_list = []
|
||||||
|
|
||||||
|
for fen in fen_list:
|
||||||
|
# TODO: Extract features
|
||||||
|
features_list.append([0.0] * TOTAL_FEATURES)
|
||||||
|
# TODO: Get evaluation from Stockfish
|
||||||
|
evals_list.append(0.0)
|
||||||
|
|
||||||
|
return features_list, evals_list
|
||||||
11
python/python/data/preprocessing.py
Normal file
11
python/python/data/preprocessing.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""Data preprocessing and cleaning"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_features(features: np.ndarray) -> np.ndarray:
|
||||||
|
"""Normalize features to zero mean, unit variance"""
|
||||||
|
mean = features.mean(axis=0)
|
||||||
|
std = features.std(axis=0)
|
||||||
|
std[std == 0] = 1 # Avoid division by zero
|
||||||
|
return (features - mean) / std
|
||||||
29
python/python/evaluate.py
Normal file
29
python/python/evaluate.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
"""Evaluate model performance"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from python.model.nnue_linear import LinearEval
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark(model: LinearEval, samples: int = 1000) -> dict:
|
||||||
|
"""
|
||||||
|
Benchmark inference speed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with speed metrics
|
||||||
|
"""
|
||||||
|
model.eval()
|
||||||
|
x = torch.randn(samples, 61072)
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
with torch.no_grad():
|
||||||
|
for _ in range(samples):
|
||||||
|
_ = model(x)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"samples": samples,
|
||||||
|
"time_seconds": end - start,
|
||||||
|
"ms_per_sample": (end - start) / samples * 1000,
|
||||||
|
}
|
||||||
1
python/python/model/__init__.py
Normal file
1
python/python/model/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""NNUE Model definitions"""
|
||||||
26
python/python/model/feature_extractor.py
Normal file
26
python/python/model/feature_extractor.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""Extract NNUE features from FEN strings"""
|
||||||
|
|
||||||
|
from chess import board as chess_board
|
||||||
|
from python.constants import HALF_KA_V2_HM, FULL_THREATS, TOTAL_FEATURES
|
||||||
|
|
||||||
|
|
||||||
|
def fen_to_features(fen: str) -> list:
|
||||||
|
"""
|
||||||
|
Convert FEN to 61,072 feature vector.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- HalfKAv2_hm: 352 features (piece-square + king buckets)
|
||||||
|
- FullThreats: 60,720 features (attack relationships)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: Feature vector of length 61,072
|
||||||
|
"""
|
||||||
|
features = [0.0] * TOTAL_FEATURES
|
||||||
|
|
||||||
|
b = chess_board(fen)
|
||||||
|
perspective = b.active() # 0 for white, 1 for black
|
||||||
|
|
||||||
|
# TODO: Implement HalfKAv2_hm (352 features)
|
||||||
|
# TODO: Implement FullThreats (60,720 features)
|
||||||
|
|
||||||
|
return features
|
||||||
26
python/python/model/nnue_linear.py
Normal file
26
python/python/model/nnue_linear.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""Single linear layer NNUE model"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from python.constants import TOTAL_FEATURES
|
||||||
|
|
||||||
|
|
||||||
|
class LinearEval(nn.Module):
|
||||||
|
"""
|
||||||
|
Linear(61,072 -> 1) - Single dense layer, no activation.
|
||||||
|
Outputs centipawn evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_dim: int = TOTAL_FEATURES):
|
||||||
|
super().__init__()
|
||||||
|
self.linear = nn.Linear(input_dim, 1)
|
||||||
|
self.linear.weight.data.zero_()
|
||||||
|
self.linear.bias.data.zero_()
|
||||||
|
|
||||||
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
|
return self.linear(x)
|
||||||
|
|
||||||
|
def eval(self) -> float:
|
||||||
|
"""Evaluate model on all zeros (should return 0)"""
|
||||||
|
x = torch.zeros(1, TOTAL_FEATURES)
|
||||||
|
return float(self.forward(x)[0, 0])
|
||||||
27
python/python/stockfish_wrapper.py
Normal file
27
python/python/stockfish_wrapper.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
"""Stockfish NNUE evaluation interface"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import chess
|
||||||
|
import chess.engine
|
||||||
|
from python.constants import HALF_KA_V2_HM
|
||||||
|
|
||||||
|
|
||||||
|
class NNUEEvaluator:
|
||||||
|
"""Wrapper for Stockfish with NNUE evaluation"""
|
||||||
|
|
||||||
|
def __init__(self, stockfish_path: str = "/usr/bin/stockfish"):
|
||||||
|
self.engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
|
||||||
|
self.supports_nnue = False
|
||||||
|
|
||||||
|
def evaluate(self, fen: str) -> float:
|
||||||
|
"""
|
||||||
|
Get NNUE evaluation in centipawns.
|
||||||
|
Returns: positive for white advantage, negative for black
|
||||||
|
"""
|
||||||
|
info = self.engine.configure({"Skill Level": 0, "UCI_LimitStrength": False})
|
||||||
|
|
||||||
|
result = self.engine.play(chess.Board(fen), chess.engine.Limit(depth=1))
|
||||||
|
return result.info.score.relative().centi()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.engine.quit()
|
||||||
77
python/python/train.py
Normal file
77
python/python/train.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
"""Training loop for NNUE linear model"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset
|
||||||
|
from python.model.nnue_linear import LinearEval
|
||||||
|
from python.model.feature_extractor import fen_to_features
|
||||||
|
from python.config import BATCH_SIZE, LEARNING_RATE, WEIGHT_DECAY, GRADIENT_CLIP, EPOCHS
|
||||||
|
|
||||||
|
|
||||||
|
def train(features: np.ndarray, labels: np.ndarray) -> LinearEval:
|
||||||
|
"""
|
||||||
|
Train the linear model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
features: (N, 61072) numpy array
|
||||||
|
labels: (N,) numpy array
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Trained model
|
||||||
|
"""
|
||||||
|
# Convert to tensors
|
||||||
|
X = torch.from_numpy(features).float()
|
||||||
|
y = torch.from_numpy(labels).float()
|
||||||
|
|
||||||
|
# Create dataset and dataloader
|
||||||
|
dataset = TensorDataset(X, y)
|
||||||
|
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
|
||||||
|
|
||||||
|
# Initialize model
|
||||||
|
model = LinearEval()
|
||||||
|
optimizer = torch.optim.Adam(
|
||||||
|
model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
|
||||||
|
)
|
||||||
|
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
|
||||||
|
|
||||||
|
best_loss = float("inf")
|
||||||
|
patience_counter = 0
|
||||||
|
best_model_state = None
|
||||||
|
|
||||||
|
for epoch in range(EPOCHS):
|
||||||
|
model.train()
|
||||||
|
total_loss = 0.0
|
||||||
|
|
||||||
|
for batch_X, batch_y in dataloader:
|
||||||
|
optimizer.zero_grad()
|
||||||
|
preds = model(batch_X)
|
||||||
|
loss = torch.nn.functional.mse_loss(preds, batch_y)
|
||||||
|
loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_norm_(model.parameters(), GRADIENT_CLIP)
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
total_loss += loss.item()
|
||||||
|
|
||||||
|
avg_loss = total_loss / len(dataloader)
|
||||||
|
scheduler.step()
|
||||||
|
|
||||||
|
# Early stopping check
|
||||||
|
if avg_loss < best_loss:
|
||||||
|
best_loss = avg_loss
|
||||||
|
best_model_state = model.state_dict().copy()
|
||||||
|
patience_counter = 0
|
||||||
|
else:
|
||||||
|
patience_counter += 1
|
||||||
|
|
||||||
|
if (epoch + 1) % 10 == 0:
|
||||||
|
print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {avg_loss:.6f}")
|
||||||
|
|
||||||
|
if patience_counter >= 50:
|
||||||
|
print("Early stopping triggered")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Load best model
|
||||||
|
if best_model_state is not None:
|
||||||
|
model.load_state_dict(best_model_state)
|
||||||
|
|
||||||
|
return model
|
||||||
39
python/python/train_full.py
Normal file
39
python/python/train_full.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""Main entry point for training"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from python.model.nnue_linear import LinearEval
|
||||||
|
from python.data.generate_data import generate_data_from_pgn
|
||||||
|
from python.data.preprocessing import normalize_features
|
||||||
|
from python.train import train
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Training pipeline"""
|
||||||
|
# Generate data (placeholder - replace with real PGN loading)
|
||||||
|
print("Generating data...")
|
||||||
|
features, evals = generate_data_from_pgn(
|
||||||
|
"rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Normalize
|
||||||
|
print("Normalizing features...")
|
||||||
|
features = np.array(features, dtype=np.float32)
|
||||||
|
evals = np.array(evals, dtype=np.float32)
|
||||||
|
features = normalize_features(features)
|
||||||
|
|
||||||
|
# Train
|
||||||
|
print("Training...")
|
||||||
|
model = train(features, evals)
|
||||||
|
|
||||||
|
# Test
|
||||||
|
print("Testing...")
|
||||||
|
x = torch.randn(1, 61072)
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model(x)
|
||||||
|
print(f"Sample prediction: {pred.item():.4f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import torch
|
||||||
|
|
||||||
|
main()
|
||||||
45
python/tests/test_nnue.py
Normal file
45
python/tests/test_nnue.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
"""Tests for NNUE implementation"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from python.model.nnue_linear import LinearEval
|
||||||
|
from python.constants import TOTAL_FEATURES
|
||||||
|
|
||||||
|
|
||||||
|
class TestLinearEval:
|
||||||
|
"""Tests for the linear NNUE model"""
|
||||||
|
|
||||||
|
def test_model_initialization(self):
|
||||||
|
"""Test model creates correct shape"""
|
||||||
|
model = LinearEval()
|
||||||
|
assert model.linear.in_features == TOTAL_FEATURES
|
||||||
|
assert model.linear.out_features == 1
|
||||||
|
|
||||||
|
def test_model_output_shape(self):
|
||||||
|
"""Test model outputs correct shape"""
|
||||||
|
model = LinearEval()
|
||||||
|
x = torch.randn(10, TOTAL_FEATURES)
|
||||||
|
y = model(x)
|
||||||
|
assert y.shape == (10, 1)
|
||||||
|
|
||||||
|
def test_model_zero_output(self):
|
||||||
|
"""Test model with zero input"""
|
||||||
|
model = LinearEval()
|
||||||
|
x = torch.zeros(1, TOTAL_FEATURES)
|
||||||
|
with torch.no_grad():
|
||||||
|
y = model(x)
|
||||||
|
assert y.item() == 0.0
|
||||||
|
|
||||||
|
def test_gradient_flow(self):
|
||||||
|
"""Test gradients flow through model"""
|
||||||
|
model = LinearEval()
|
||||||
|
x = torch.randn(10, TOTAL_FEATURES, requires_grad=True)
|
||||||
|
y = model(x)
|
||||||
|
loss = y.sum()
|
||||||
|
loss.backward()
|
||||||
|
assert x.grad is not None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
Reference in New Issue
Block a user