stable but browser automation needs work

This commit is contained in:
2025-12-19 17:05:23 -06:00
parent 8ea70ef868
commit 35dbfcb8f2
3 changed files with 121 additions and 110 deletions

View File

@@ -114,6 +114,7 @@ void batched_update(std::shared_ptr<clickhouse::Client> client_ptr, std::string
for (auto& future : futures_for_this_batch) { for (auto& future : futures_for_this_batch) {
future.get(); future.get();
spdlog::debug("finished processing another user at future.get()");
} }
spdlog::info("Batch complete: processed {} users", futures_for_this_batch.size()); spdlog::info("Batch complete: processed {} users", futures_for_this_batch.size());
} }

View File

@@ -1,5 +1,8 @@
#include "types.hpp" #include "types.hpp"
#include "clickhouse/base/uuid.h" #include "clickhouse/base/uuid.h"
#include "clickhouse/columns/date.h"
#include "clickhouse/types/types.h"
#include <cstdint>
#include <spdlog/spdlog.h> #include <spdlog/spdlog.h>
#include <unordered_map> #include <unordered_map>
#include <sstream> #include <sstream>
@@ -92,13 +95,11 @@ std::string uuid_to_string(const clickhouse::UUID& u) {
// ============================================================================ // ============================================================================
// Date Parsing Helper // Date Parsing Helper
// ============================================================================ // ============================================================================
uint_fast64_t parse_date_to_clickhouse(const std::string& date_str) {
uint16_t parse_date_to_clickhouse(const std::string& date_str) {
if (date_str.empty()) { if (date_str.empty()) {
return 0; // Epoch date return 0; // Epoch date
} }
// Try to parse YYYY-MM-DD format
std::tm tm = {}; std::tm tm = {};
std::istringstream ss(date_str); std::istringstream ss(date_str);
ss >> std::get_time(&tm, "%m/%d/%Y"); ss >> std::get_time(&tm, "%m/%d/%Y");
@@ -109,10 +110,15 @@ uint16_t parse_date_to_clickhouse(const std::string& date_str) {
} }
std::time_t time = std::mktime(&tm); std::time_t time = std::mktime(&tm);
// ClickHouse Date is days since 1970-01-01 if (time == -1) {
return static_cast<uint16_t>(time / 86400); spdlog::warn("mktime failed for date: {}, using epoch", date_str);
return 0;
}
return static_cast<uint_fast64_t>(time / 86400); // ClickHouse Date = days since 1970-01-01
} }
// ============================================================================ // ============================================================================
// User Operations // User Operations
// ============================================================================ // ============================================================================
@@ -199,17 +205,23 @@ std::optional<clickhouse::UUID> get_user_uuid(const CHClient& client,
// Get or Create Stable IDs // Get or Create Stable IDs
// ============================================================================ // ============================================================================
std::string get_or_create_class(const CHClient& client, std::string get_or_create_class(
const std::string& user_id, const CHClient& client,
const api_utils::ClassGrades& class_data) { const std::string& user_id,
spdlog::debug("get_or_create_class: user={}, class={}", user_id, class_data.className); const api_utils::ClassGrades& class_data
) {
spdlog::debug("get_or_create_class: user={}, class={}, category={}",
user_id, class_data.className, class_data.category);
// Try to find existing class
std::string class_id; std::string class_id;
// 1⃣ Fetch existing class_id (latest version) considering category
client->Select( client->Select(
"SELECT class_id FROM user_classes " "SELECT class_id "
"FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' " "WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' " "AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"AND category = '" + sanitize_clickhouse_string(class_data.category) + "' "
"LIMIT 1", "LIMIT 1",
[&](const Block& b) { [&](const Block& b) {
if (b.GetRowCount() > 0) { if (b.GetRowCount() > 0) {
@@ -218,69 +230,66 @@ std::string get_or_create_class(const CHClient& client,
} }
); );
if (!class_id.empty()) { // 2⃣ Insert new class if it doesn't exist
// Update last_seen if (class_id.empty()) {
client->Execute( Block insert_block;
"ALTER TABLE user_classes UPDATE "
"teacher = '" + sanitize_clickhouse_string(class_data.teacher) + "', " auto user_col = std::make_shared<ColumnUUID>();
"period = '" + sanitize_clickhouse_string(class_data.period) + "', " auto name_col = std::make_shared<ColumnString>();
"category = '" + sanitize_clickhouse_string(class_data.category) + "', " auto teacher_col = std::make_shared<ColumnString>();
"last_seen = now() " auto period_col = std::make_shared<ColumnString>();
"WHERE class_id = '" + sanitize_clickhouse_string(class_id) + "'" auto category_col = std::make_shared<ColumnString>();
);
return class_id; user_col->Append(parse_uuid(user_id));
name_col->Append(class_data.className);
teacher_col->Append(class_data.teacher);
period_col->Append(class_data.period);
category_col->Append(class_data.category);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("class_name", name_col);
insert_block.AppendColumn("teacher", teacher_col);
insert_block.AppendColumn("period", period_col);
insert_block.AppendColumn("category", category_col);
client->Insert("user_classes", insert_block);
// 3⃣ Fetch the generated class_id
client->Select(
"SELECT class_id "
"FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"AND category = '" + sanitize_clickhouse_string(class_data.category) + "' "
"LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
class_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new class: {} [{}] -> {}", class_data.className, class_data.category, class_id);
} }
// Create new class
Block insert_block;
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto teacher_col = std::make_shared<ColumnString>();
auto period_col = std::make_shared<ColumnString>();
auto category_col = std::make_shared<ColumnString>();
user_col->Append(parse_uuid(user_id));
name_col->Append(class_data.className);
teacher_col->Append(class_data.teacher);
period_col->Append(class_data.period);
category_col->Append(class_data.category);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("class_name", name_col);
insert_block.AppendColumn("teacher", teacher_col);
insert_block.AppendColumn("period", period_col);
insert_block.AppendColumn("category", category_col);
client->Insert("user_classes", insert_block);
// Retrieve the created class_id
client->Select(
"SELECT class_id FROM user_classes "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_name = '" + sanitize_clickhouse_string(class_data.className) + "' "
"ORDER BY first_seen DESC LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
class_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new class: {} -> {}", class_data.className, class_id);
return class_id; return class_id;
} }
std::string get_or_create_assignment(const CHClient& client,
const std::string& user_id,
const std::string& class_id,
const api_utils::AssignmentGrade& assignment_data) {
spdlog::debug("get_or_create_assignment: class={}, assignment={}",
class_id, assignment_data.name);
// Try to find existing assignment std::string get_or_create_assignment(
const CHClient& client,
const std::string& user_id,
const std::string& class_id,
const api_utils::AssignmentGrade& assignment_data
) {
spdlog::debug("get_or_create_assignment: class={}, assignment={}", class_id, assignment_data.name);
std::string assignment_id; std::string assignment_id;
// 1⃣ Fetch existing assignment_id
client->Select( client->Select(
"SELECT assignment_id FROM user_assignments " "SELECT assignment_id "
"FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' " "WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' " "AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' " "AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
@@ -292,48 +301,48 @@ std::string get_or_create_assignment(const CHClient& client,
} }
); );
if (!assignment_id.empty()) { // 2⃣ Insert new assignment if it doesn't exist
// Update last_seen if (assignment_id.empty()) {
return assignment_id; Block insert_block;
auto class_col = std::make_shared<ColumnUUID>();
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto date_col = std::make_shared<ColumnDate>();
auto major_col = std::make_shared<ColumnUInt8>();
class_col->Append(parse_uuid(class_id));
user_col->Append(parse_uuid(user_id));
name_col->Append(assignment_data.name);
date_col->Append(parse_date_to_clickhouse(assignment_data.dueDate));
major_col->Append(assignment_data.isMajorGrade ? 1 : 0);
insert_block.AppendColumn("class_id", class_col);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("assignment_name", name_col);
insert_block.AppendColumn("due_date", date_col);
insert_block.AppendColumn("is_major_grade", major_col);
client->Insert("user_assignments", insert_block);
// 3⃣ Fetch the generated assignment_id
client->Select(
"SELECT assignment_id "
"FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
"LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
assignment_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new assignment: {} -> {}", assignment_data.name, assignment_id);
} }
// Create new assignment
Block insert_block;
auto class_col = std::make_shared<ColumnUUID>();
auto user_col = std::make_shared<ColumnUUID>();
auto name_col = std::make_shared<ColumnString>();
auto date_col = std::make_shared<ColumnDate>();
auto major_col = std::make_shared<ColumnUInt8>();
class_col->Append(parse_uuid(class_id));
user_col->Append(parse_uuid(user_id));
name_col->Append(assignment_data.name);
date_col->Append(parse_date_to_clickhouse(assignment_data.dueDate));
major_col->Append(assignment_data.isMajorGrade ? 1 : 0);
insert_block.AppendColumn("class_id", class_col);
insert_block.AppendColumn("user_id", user_col);
insert_block.AppendColumn("assignment_name", name_col);
insert_block.AppendColumn("due_date", date_col);
insert_block.AppendColumn("is_major_grade", major_col);
client->Insert("user_assignments", insert_block);
// Retrieve the created assignment_id
client->Select(
"SELECT assignment_id FROM user_assignments "
"WHERE user_id = '" + sanitize_clickhouse_string(user_id) + "' "
"AND class_id = '" + sanitize_clickhouse_string(class_id) + "' "
"AND assignment_name = '" + sanitize_clickhouse_string(assignment_data.name) + "' "
"ORDER BY first_seen DESC LIMIT 1",
[&](const Block& b) {
if (b.GetRowCount() > 0) {
assignment_id = uuid_to_string(b[0]->As<ColumnUUID>()->At(0));
}
}
);
spdlog::info("Created new assignment: {} -> {}", assignment_data.name, assignment_id);
return assignment_id; return assignment_id;
} }

View File

@@ -1,4 +1,5 @@
#pragma once #pragma once
#include <cstdint>
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
@@ -186,7 +187,7 @@ std::string get_or_create_assignment(
// ============================================================================ // ============================================================================
// Parse date string from API (format: "YYYY-MM-DD" or similar) // Parse date string from API (format: "YYYY-MM-DD" or similar)
uint16_t parse_date_to_clickhouse(const std::string& date_str); uint_fast64_t parse_date_to_clickhouse(const std::string& date_str);
// Build lookup key for assignments // Build lookup key for assignments
inline std::string make_assignment_key( inline std::string make_assignment_key(