stable but browser automation needs work

This commit is contained in:
2025-12-19 17:05:23 -06:00
parent 8ea70ef868
commit 35dbfcb8f2
3 changed files with 121 additions and 110 deletions

View File

@@ -114,6 +114,7 @@ void batched_update(std::shared_ptr<clickhouse::Client> client_ptr, std::string
for (auto& future : futures_for_this_batch) {
future.get();
spdlog::debug("finished processing another user at future.get()");
}
spdlog::info("Batch complete: processed {} users", futures_for_this_batch.size());
}

View File

@@ -1,5 +1,8 @@
#include "types.hpp"
#include "clickhouse/base/uuid.h"
#include "clickhouse/columns/date.h"
#include "clickhouse/types/types.h"
#include <cstdint>
#include <spdlog/spdlog.h>
#include <unordered_map>
#include <sstream>
@@ -92,13 +95,11 @@ std::string uuid_to_string(const clickhouse::UUID& u) {
// ============================================================================
// Date Parsing Helper
// ============================================================================
uint16_t parse_date_to_clickhouse(const std::string& date_str) {
uint_fast64_t parse_date_to_clickhouse(const std::string& date_str) {
if (date_str.empty()) {
return 0; // Epoch date
}
// Try to parse YYYY-MM-DD format
std::tm tm = {};
std::istringstream ss(date_str);
ss >> std::get_time(&tm, "%m/%d/%Y");
@@ -109,10 +110,15 @@ uint16_t parse_date_to_clickhouse(const std::string& date_str) {
}
std::time_t time = std::mktime(&tm);
// ClickHouse Date is days since 1970-01-01
return static_cast<uint16_t>(time / 86400);
if (time == -1) {
spdlog::warn("mktime failed for date: {}, using epoch", date_str);
return 0;
}
return static_cast<uint_fast64_t>(time / 86400); // ClickHouse Date = days since 1970-01-01
}
// ============================================================================
// User Operations
// ============================================================================
@@ -199,17 +205,23 @@ std::optional<clickhouse::UUID> get_user_uuid(const CHClient& client,
// Get or Create Stable IDs
// ============================================================================
std::string get_or_create_class(const CHClient& client,
const std::string& user_id,
const api_utils::ClassGrades& class_data) {
spdlog::debug("get_or_create_class: user={}, class={}", user_id, class_data.className);
std::string get_or_create_class(
const CHClient& client,
const std::string& user_id,
const api_utils::ClassGrades& class_data
) {
spdlog::debug("get_or_create_class: user={}, class={}, category={}",
user_id, class_data.className, class_data.category);
// Try to find existing class
std::string class_id;
// 1⃣ Fetch existing class_id (latest version) considering category
client->Select(
"SELECT class_id FROM user_classes "
"SELECT class_id "
"FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"AND category = '" + sanitize_clickhouse_string(class_data.category) + "' "
"LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
@@ -218,69 +230,66 @@ std::string get_or_create_class(const CHClient& client,
}
);
if (!class_id.empty()) {
// Update last_seen
client->Execute(
"ALTER TABLE user_classes UPDATE "
"teacher = '" + sanitize_clickhouse_string(class_data.teacher) + "', "
"period = '" + sanitize_clickhouse_string(class_data.period) + "', "
"category = '" + sanitize_clickhouse_string(class_data.category) + "', "
"last_seen = now() "
"WHERE class_id = '" + sanitize_clickhouse_string(class_id) + "'"
);
return class_id;
// 2⃣ Insert new class if it doesn't exist
if (class_id.empty()) {
Block insert_block;
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto teacher_col = std::make_shared<ColumnString>();
auto period_col = std::make_shared<ColumnString>();
auto category_col = std::make_shared<ColumnString>();
user_col->Append(parse_uuid(user_id));
name_col->Append(class_data.className);
teacher_col->Append(class_data.teacher);
period_col->Append(class_data.period);
category_col->Append(class_data.category);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("class_name", name_col);
insert_block.AppendColumn("teacher", teacher_col);
insert_block.AppendColumn("period", period_col);
insert_block.AppendColumn("category", category_col);
client->Insert("user_classes", insert_block);
// 3⃣ Fetch the generated class_id
client->Select(
"SELECT class_id "
"FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"AND category = '" + sanitize_clickhouse_string(class_data.category) + "' "
"LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
class_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new class: {} [{}] -> {}", class_data.className, class_data.category, class_id);
}
// Create new class
Block insert_block;
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto teacher_col = std::make_shared<ColumnString>();
auto period_col = std::make_shared<ColumnString>();
auto category_col = std::make_shared<ColumnString>();
user_col->Append(parse_uuid(user_id));
name_col->Append(class_data.className);
teacher_col->Append(class_data.teacher);
period_col->Append(class_data.period);
category_col->Append(class_data.category);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("class_name", name_col);
insert_block.AppendColumn("teacher", teacher_col);
insert_block.AppendColumn("period", period_col);
insert_block.AppendColumn("category", category_col);
client->Insert("user_classes", insert_block);
// Retrieve the created class_id
client->Select(
"SELECT class_id FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"ORDER BY first_seen DESC LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
class_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new class: {} -> {}", class_data.className, class_id);
return class_id;
}
std::string get_or_create_assignment(const CHClient& client,
const std::string& user_id,
const std::string& class_id,
const api_utils::AssignmentGrade& assignment_data) {
spdlog::debug("get_or_create_assignment: class={}, assignment={}",
class_id, assignment_data.name);
// Try to find existing assignment
std::string get_or_create_assignment(
const CHClient& client,
const std::string& user_id,
const std::string& class_id,
const api_utils::AssignmentGrade& assignment_data
) {
spdlog::debug("get_or_create_assignment: class={}, assignment={}", class_id, assignment_data.name);
std::string assignment_id;
// 1⃣ Fetch existing assignment_id
client->Select(
"SELECT assignment_id FROM user_assignments "
"SELECT assignment_id "
"FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
@@ -292,48 +301,48 @@ std::string get_or_create_assignment(const CHClient& client,
}
);
if (!assignment_id.empty()) {
// Update last_seen
return assignment_id;
// 2⃣ Insert new assignment if it doesn't exist
if (assignment_id.empty()) {
Block insert_block;
auto class_col = std::make_shared<ColumnUUID>();
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto date_col = std::make_shared<ColumnDate>();
auto major_col = std::make_shared<ColumnUInt8>();
class_col->Append(parse_uuid(class_id));
user_col->Append(parse_uuid(user_id));
name_col->Append(assignment_data.name);
date_col->Append(parse_date_to_clickhouse(assignment_data.dueDate));
major_col->Append(assignment_data.isMajorGrade ? 1 : 0);
insert_block.AppendColumn("class_id", class_col);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("assignment_name", name_col);
insert_block.AppendColumn("due_date", date_col);
insert_block.AppendColumn("is_major_grade", major_col);
client->Insert("user_assignments", insert_block);
// 3⃣ Fetch the generated assignment_id
client->Select(
"SELECT assignment_id "
"FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
"LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
assignment_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new assignment: {} -> {}", assignment_data.name, assignment_id);
}
// Create new assignment
Block insert_block;
auto class_col = std::make_shared<ColumnUUID>();
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto date_col = std::make_shared<ColumnDate>();
auto major_col = std::make_shared<ColumnUInt8>();
class_col->Append(parse_uuid(class_id));
user_col->Append(parse_uuid(user_id));
name_col->Append(assignment_data.name);
date_col->Append(parse_date_to_clickhouse(assignment_data.dueDate));
major_col->Append(assignment_data.isMajorGrade ? 1 : 0);
insert_block.AppendColumn("class_id", class_col);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("assignment_name", name_col);
insert_block.AppendColumn("due_date", date_col);
insert_block.AppendColumn("is_major_grade", major_col);
client->Insert("user_assignments", insert_block);
// Retrieve the created assignment_id
client->Select(
"SELECT assignment_id FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
"ORDER BY first_seen DESC LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
assignment_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new assignment: {} -> {}", assignment_data.name, assignment_id);
return assignment_id;
}

View File

@@ -1,4 +1,5 @@
#pragma once
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
@@ -186,7 +187,7 @@ std::string get_or_create_assignment(
// ============================================================================
// Parse date string from API (format: "YYYY-MM-DD" or similar)
uint16_t parse_date_to_clickhouse(const std::string& date_str);
uint_fast64_t parse_date_to_clickhouse(const std::string& date_str);
// Build lookup key for assignments
inline std::string make_assignment_key(