ssctopper/db/init.py

146 lines
5.0 KiB
Python

#!/usr/bin/env python3
"""
Database initialization script for SSCTopper.
Creates the database at /tmp/ssctopper.db, applies schema, and seeds the syllabus structure.
"""
import sqlite3
import json
import os
import sys
DB_DIR = os.path.dirname(os.path.abspath(__file__))
DB_PATH = os.environ.get('SSCTOPPER_DB', '/tmp/ssctopper.db')
SCHEMA_PATH = os.path.join(DB_DIR, 'schema.sql')
SYLLABUS_PATH = os.path.join(DB_DIR, 'syllabus.json')
def get_db():
"""Get database connection."""
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
conn.row_factory = sqlite3.Row
return conn
def init_db(force=False):
"""Initialize database with schema and syllabus data."""
if os.path.exists(DB_PATH):
if force:
os.remove(DB_PATH)
print(f"Removed existing database at {DB_PATH}")
else:
print(f"Database already exists at {DB_PATH}. Use --force to recreate.")
return
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
cursor = conn.cursor()
# Apply schema
with open(SCHEMA_PATH, 'r') as f:
cursor.executescript(f.read())
print("Schema applied successfully.")
# Load syllabus
with open(SYLLABUS_PATH, 'r') as f:
syllabus = json.load(f)
# Seed syllabus data
stats = {'subjects': 0, 'subtopics': 0, 'topics': 0, 'qtypes': 0}
for subject in syllabus['subjects']:
cursor.execute(
"INSERT INTO subjects (name, tier, description, target_questions) VALUES (?, ?, ?, ?)",
(subject['name'], subject['tier'], subject['description'], subject.get('target_questions', 0))
)
subject_id = cursor.lastrowid
stats['subjects'] += 1
for subtopic in subject.get('subtopics', []):
cursor.execute(
"INSERT INTO subtopics (subject_id, name, description) VALUES (?, ?, ?)",
(subject_id, subtopic['name'], subtopic.get('description', ''))
)
subtopic_id = cursor.lastrowid
stats['subtopics'] += 1
for topic in subtopic.get('topics', []):
cursor.execute(
"INSERT INTO topics (subtopic_id, name, description) VALUES (?, ?, ?)",
(subtopic_id, topic['name'], topic.get('description', ''))
)
topic_id = cursor.lastrowid
stats['topics'] += 1
for qtype in topic.get('question_types', []):
cursor.execute(
"INSERT INTO question_types (topic_id, name) VALUES (?, ?)",
(topic_id, qtype)
)
stats['qtypes'] += 1
conn.commit()
conn.close()
print(f"\n{'='*50}")
print(f"Database initialized at: {DB_PATH}")
print(f"{'='*50}")
print(f" Subjects: {stats['subjects']}")
print(f" Sub-topics: {stats['subtopics']}")
print(f" Topics: {stats['topics']}")
print(f" Question Types: {stats['qtypes']}")
print(f"\nReady for question generation!")
return stats
def get_question_type_id(conn, subject_name, subtopic_name, topic_name, qtype_name):
"""Look up a question_type_id by hierarchical names."""
row = conn.execute("""
SELECT qt.id FROM question_types qt
JOIN topics t ON qt.topic_id = t.id
JOIN subtopics st ON t.subtopic_id = st.id
JOIN subjects s ON st.subject_id = s.id
WHERE s.name = ? AND st.name = ? AND t.name = ? AND qt.name = ?
""", (subject_name, subtopic_name, topic_name, qtype_name)).fetchone()
return row[0] if row else None
def insert_questions_batch(conn, questions):
"""Insert a batch of questions. Each question is a dict with keys:
question_type_id, question_text, option_a, option_b, option_c, option_d,
correct_option, explanation, difficulty
"""
conn.executemany("""
INSERT INTO questions (question_type_id, question_text, option_a, option_b, option_c, option_d,
correct_option, explanation, difficulty)
VALUES (:question_type_id, :question_text, :option_a, :option_b, :option_c, :option_d,
:correct_option, :explanation, :difficulty)
""", questions)
conn.commit()
def get_stats(conn):
"""Get current question statistics."""
stats = {}
rows = conn.execute("""
SELECT s.name, COUNT(q.id) as count
FROM subjects s
LEFT JOIN subtopics st ON st.subject_id = s.id
LEFT JOIN topics t ON t.subtopic_id = st.id
LEFT JOIN question_types qt ON qt.topic_id = t.id
LEFT JOIN questions q ON q.question_type_id = qt.id
GROUP BY s.id
""").fetchall()
for row in rows:
stats[row[0]] = row[1]
total = conn.execute("SELECT COUNT(*) FROM questions").fetchone()[0]
stats['TOTAL'] = total
return stats
if __name__ == '__main__':
force = '--force' in sys.argv
init_db(force=force)