Initial commit for SSCTopper platform

This commit is contained in:
Black Box 2026-03-29 10:58:13 +05:30
commit d3d0b1b3bf
22 changed files with 4907 additions and 0 deletions

29
.gitignore vendored Normal file
View File

@ -0,0 +1,29 @@
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# database
*.db

Binary file not shown.

145
db/init.py Normal file
View File

@ -0,0 +1,145 @@
#!/usr/bin/env python3
"""
Database initialization script for SSCTopper.
Creates the database at /tmp/ssctopper.db, applies schema, and seeds the syllabus structure.
"""
import sqlite3
import json
import os
import sys
DB_DIR = os.path.dirname(os.path.abspath(__file__))
DB_PATH = os.environ.get('SSCTOPPER_DB', '/tmp/ssctopper.db')
SCHEMA_PATH = os.path.join(DB_DIR, 'schema.sql')
SYLLABUS_PATH = os.path.join(DB_DIR, 'syllabus.json')
def get_db():
"""Get database connection."""
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
conn.row_factory = sqlite3.Row
return conn
def init_db(force=False):
"""Initialize database with schema and syllabus data."""
if os.path.exists(DB_PATH):
if force:
os.remove(DB_PATH)
print(f"Removed existing database at {DB_PATH}")
else:
print(f"Database already exists at {DB_PATH}. Use --force to recreate.")
return
conn = sqlite3.connect(DB_PATH)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA foreign_keys=ON")
cursor = conn.cursor()
# Apply schema
with open(SCHEMA_PATH, 'r') as f:
cursor.executescript(f.read())
print("Schema applied successfully.")
# Load syllabus
with open(SYLLABUS_PATH, 'r') as f:
syllabus = json.load(f)
# Seed syllabus data
stats = {'subjects': 0, 'subtopics': 0, 'topics': 0, 'qtypes': 0}
for subject in syllabus['subjects']:
cursor.execute(
"INSERT INTO subjects (name, tier, description, target_questions) VALUES (?, ?, ?, ?)",
(subject['name'], subject['tier'], subject['description'], subject.get('target_questions', 0))
)
subject_id = cursor.lastrowid
stats['subjects'] += 1
for subtopic in subject.get('subtopics', []):
cursor.execute(
"INSERT INTO subtopics (subject_id, name, description) VALUES (?, ?, ?)",
(subject_id, subtopic['name'], subtopic.get('description', ''))
)
subtopic_id = cursor.lastrowid
stats['subtopics'] += 1
for topic in subtopic.get('topics', []):
cursor.execute(
"INSERT INTO topics (subtopic_id, name, description) VALUES (?, ?, ?)",
(subtopic_id, topic['name'], topic.get('description', ''))
)
topic_id = cursor.lastrowid
stats['topics'] += 1
for qtype in topic.get('question_types', []):
cursor.execute(
"INSERT INTO question_types (topic_id, name) VALUES (?, ?)",
(topic_id, qtype)
)
stats['qtypes'] += 1
conn.commit()
conn.close()
print(f"\n{'='*50}")
print(f"Database initialized at: {DB_PATH}")
print(f"{'='*50}")
print(f" Subjects: {stats['subjects']}")
print(f" Sub-topics: {stats['subtopics']}")
print(f" Topics: {stats['topics']}")
print(f" Question Types: {stats['qtypes']}")
print(f"\nReady for question generation!")
return stats
def get_question_type_id(conn, subject_name, subtopic_name, topic_name, qtype_name):
"""Look up a question_type_id by hierarchical names."""
row = conn.execute("""
SELECT qt.id FROM question_types qt
JOIN topics t ON qt.topic_id = t.id
JOIN subtopics st ON t.subtopic_id = st.id
JOIN subjects s ON st.subject_id = s.id
WHERE s.name = ? AND st.name = ? AND t.name = ? AND qt.name = ?
""", (subject_name, subtopic_name, topic_name, qtype_name)).fetchone()
return row[0] if row else None
def insert_questions_batch(conn, questions):
"""Insert a batch of questions. Each question is a dict with keys:
question_type_id, question_text, option_a, option_b, option_c, option_d,
correct_option, explanation, difficulty
"""
conn.executemany("""
INSERT INTO questions (question_type_id, question_text, option_a, option_b, option_c, option_d,
correct_option, explanation, difficulty)
VALUES (:question_type_id, :question_text, :option_a, :option_b, :option_c, :option_d,
:correct_option, :explanation, :difficulty)
""", questions)
conn.commit()
def get_stats(conn):
"""Get current question statistics."""
stats = {}
rows = conn.execute("""
SELECT s.name, COUNT(q.id) as count
FROM subjects s
LEFT JOIN subtopics st ON st.subject_id = s.id
LEFT JOIN topics t ON t.subtopic_id = st.id
LEFT JOIN question_types qt ON qt.topic_id = t.id
LEFT JOIN questions q ON q.question_type_id = qt.id
GROUP BY s.id
""").fetchall()
for row in rows:
stats[row[0]] = row[1]
total = conn.execute("SELECT COUNT(*) FROM questions").fetchone()[0]
stats['TOTAL'] = total
return stats
if __name__ == '__main__':
force = '--force' in sys.argv
init_db(force=force)

83
db/schema.sql Normal file
View File

@ -0,0 +1,83 @@
-- SSCTopper Database Schema
-- Complete SSC CGL Question Bank
CREATE TABLE IF NOT EXISTS subjects (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
tier TEXT NOT NULL,
description TEXT,
target_questions INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS subtopics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
subject_id INTEGER NOT NULL,
name TEXT NOT NULL,
description TEXT,
FOREIGN KEY (subject_id) REFERENCES subjects(id),
UNIQUE(subject_id, name)
);
CREATE TABLE IF NOT EXISTS topics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
subtopic_id INTEGER NOT NULL,
name TEXT NOT NULL,
description TEXT,
FOREIGN KEY (subtopic_id) REFERENCES subtopics(id),
UNIQUE(subtopic_id, name)
);
CREATE TABLE IF NOT EXISTS question_types (
id INTEGER PRIMARY KEY AUTOINCREMENT,
topic_id INTEGER NOT NULL,
name TEXT NOT NULL,
FOREIGN KEY (topic_id) REFERENCES topics(id),
UNIQUE(topic_id, name)
);
CREATE TABLE IF NOT EXISTS questions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
question_type_id INTEGER NOT NULL,
question_text TEXT NOT NULL,
option_a TEXT NOT NULL,
option_b TEXT NOT NULL,
option_c TEXT NOT NULL,
option_d TEXT NOT NULL,
correct_option TEXT NOT NULL CHECK(correct_option IN ('A','B','C','D')),
explanation TEXT,
difficulty INTEGER DEFAULT 1 CHECK(difficulty BETWEEN 1 AND 3),
year_appeared TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (question_type_id) REFERENCES question_types(id)
);
-- Performance indexes
CREATE INDEX IF NOT EXISTS idx_questions_type ON questions(question_type_id);
CREATE INDEX IF NOT EXISTS idx_questions_difficulty ON questions(difficulty);
CREATE INDEX IF NOT EXISTS idx_topics_subtopic ON topics(subtopic_id);
CREATE INDEX IF NOT EXISTS idx_subtopics_subject ON subtopics(subject_id);
CREATE INDEX IF NOT EXISTS idx_qtypes_topic ON question_types(topic_id);
-- User Management
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
email TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Progress Tracking (Question level)
CREATE TABLE IF NOT EXISTS user_answers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
question_id INTEGER NOT NULL,
is_correct BOOLEAN NOT NULL,
time_taken REAL DEFAULT 0.0,
answered_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (question_id) REFERENCES questions(id)
);
CREATE INDEX IF NOT EXISTS idx_user_answers_user ON user_answers(user_id);
CREATE INDEX IF NOT EXISTS idx_user_answers_question ON user_answers(question_id);

826
db/syllabus.json Normal file
View File

@ -0,0 +1,826 @@
{
"subjects": [
{
"name": "Quantitative Aptitude",
"tier": "1,2",
"description": "Mathematical skills including arithmetic, algebra, geometry, mensuration, trigonometry, and data interpretation",
"target_questions": 30000,
"subtopics": [
{
"name": "Arithmetic",
"description": "Basic arithmetic operations and number concepts",
"topics": [
{
"name": "Number System",
"description": "Natural numbers, whole numbers, integers, rational & irrational numbers, divisibility, remainders",
"question_types": ["Find the value", "Divisibility test", "Remainder problems", "Unit digit", "LCM/HCF word problems"]
},
{
"name": "HCF and LCM",
"description": "Highest Common Factor and Lowest Common Multiple",
"question_types": ["Find HCF", "Find LCM", "Word problems on HCF/LCM", "Product of two numbers"]
},
{
"name": "Simplification",
"description": "BODMAS, fractions, decimals, surds",
"question_types": ["Simplify expression", "Compare values", "Find missing number"]
},
{
"name": "Decimals and Fractions",
"description": "Conversion, operations on decimals and fractions",
"question_types": ["Convert fraction to decimal", "Operations on fractions", "Recurring decimals", "Ordering fractions"]
}
]
},
{
"name": "Percentage",
"description": "Percentage calculations and applications",
"topics": [
{
"name": "Basic Percentage",
"description": "Finding percentage of a number, expressing as percentage",
"question_types": ["Find X% of Y", "What percent is X of Y", "Percentage change", "Find original value"]
},
{
"name": "Successive Percentage",
"description": "Multiple percentage changes applied successively",
"question_types": ["Net percentage change", "Two successive increases/decreases", "Mixed increase and decrease"]
},
{
"name": "Population and Depreciation",
"description": "Population growth and depreciation of value over time",
"question_types": ["Population after N years", "Depreciation value", "Find rate of growth"]
}
]
},
{
"name": "Profit and Loss",
"description": "Cost price, selling price, profit, loss, discount and marked price",
"topics": [
{
"name": "Basic Profit and Loss",
"description": "Finding profit, loss, cost price, selling price",
"question_types": ["Find profit/loss", "Find profit/loss percentage", "Find CP given SP and profit%", "Find SP given CP and loss%"]
},
{
"name": "Discount and Marked Price",
"description": "Marked price, discount percentage, successive discounts",
"question_types": ["Find selling price after discount", "Find marked price", "Successive discounts", "Find single equivalent discount"]
},
{
"name": "Dishonest Dealings",
"description": "False weights, mixing to gain profit",
"question_types": ["Profit with false weight", "Overall gain/loss in mixing"]
}
]
},
{
"name": "Ratio and Proportion",
"description": "Ratios, proportions, and their applications",
"topics": [
{
"name": "Basic Ratio",
"description": "Simplifying ratios, comparing ratios, dividing in ratio",
"question_types": ["Simplify ratio", "Divide in given ratio", "Find quantity from ratio", "Compare ratios"]
},
{
"name": "Proportion",
"description": "Direct proportion, inverse proportion, mean proportional",
"question_types": ["Find fourth proportional", "Find mean proportional", "Third proportional"]
},
{
"name": "Partnership",
"description": "Profit sharing among partners based on investment and time",
"question_types": ["Divide profit among partners", "Find investment amount", "Find time of investment"]
},
{
"name": "Mixture and Alligation",
"description": "Mixing two or more quantities",
"question_types": ["Find ratio of mixing", "Mean price of mixture", "Replacement problems"]
}
]
},
{
"name": "Average",
"description": "Mean, weighted average and related problems",
"topics": [
{
"name": "Simple Average",
"description": "Average of numbers, runs, marks",
"question_types": ["Find average", "Find sum from average", "Find missing value", "Average after adding/removing"]
},
{
"name": "Weighted Average",
"description": "Average with different weights",
"question_types": ["Weighted mean", "Combined average of groups", "Average speed"]
},
{
"name": "Age Problems",
"description": "Average age related problems",
"question_types": ["Average age of family", "Age ratio problems", "Present/future/past average age"]
}
]
},
{
"name": "Time and Work",
"description": "Work efficiency, combined work, pipes and cisterns",
"topics": [
{
"name": "Basic Time and Work",
"description": "Individual and combined work efficiency",
"question_types": ["Find time to complete work", "Find combined time", "Work done in given days", "Alternate working"]
},
{
"name": "Pipes and Cisterns",
"description": "Filling and emptying tanks through pipes",
"question_types": ["Time to fill tank", "Net filling rate", "Leak problems", "Multiple pipes"]
},
{
"name": "Work and Wages",
"description": "Wages proportional to work done",
"question_types": ["Divide wages", "Find individual wage", "Efficiency-based wages"]
}
]
},
{
"name": "Time, Speed and Distance",
"description": "Speed, distance, time relationships and applications",
"topics": [
{
"name": "Basic Speed and Distance",
"description": "Finding speed, distance, time",
"question_types": ["Find speed", "Find distance", "Find time", "Average speed", "Relative speed"]
},
{
"name": "Trains",
"description": "Problems involving trains passing objects or each other",
"question_types": ["Train passing pole", "Train passing platform", "Two trains passing each other", "Find length of train"]
},
{
"name": "Boats and Streams",
"description": "Upstream and downstream problems",
"question_types": ["Find speed in still water", "Find stream speed", "Time for upstream/downstream journey", "Round trip time"]
},
{
"name": "Races",
"description": "Head start, dead heat, circular track",
"question_types": ["Head start distance", "Dead heat problems", "Meeting point on circular track"]
}
]
},
{
"name": "Interest",
"description": "Simple and compound interest calculations",
"topics": [
{
"name": "Simple Interest",
"description": "SI = PRT/100",
"question_types": ["Find SI", "Find principal", "Find rate", "Find time", "Amount after time"]
},
{
"name": "Compound Interest",
"description": "Interest compounded annually, half-yearly, quarterly",
"question_types": ["Find CI", "Find amount", "Difference between CI and SI", "Half-yearly/quarterly compounding"]
}
]
},
{
"name": "Algebra",
"description": "Algebraic expressions, equations and identities",
"topics": [
{
"name": "Linear Equations",
"description": "One variable and two variable linear equations",
"question_types": ["Solve for x", "Word problems", "System of equations", "Find value of expression"]
},
{
"name": "Quadratic Equations",
"description": "Solving quadratic equations, nature of roots",
"question_types": ["Find roots", "Sum and product of roots", "Nature of roots", "Form equation from roots"]
},
{
"name": "Algebraic Identities",
"description": "Standard identities and their applications",
"question_types": ["Simplify using identity", "Find value of expression", "Factorize"]
},
{
"name": "Surds and Indices",
"description": "Laws of exponents, simplification of surds",
"question_types": ["Simplify expression", "Rationalize denominator", "Compare surds", "Find value"]
}
]
},
{
"name": "Geometry",
"description": "Lines, angles, triangles, circles, quadrilaterals",
"topics": [
{
"name": "Lines and Angles",
"description": "Parallel lines, transversals, angle relationships",
"question_types": ["Find angle value", "Identify angle type", "Parallel line properties"]
},
{
"name": "Triangles",
"description": "Properties, congruence, similarity, centers of triangle",
"question_types": ["Find angle in triangle", "Congruence condition", "Similar triangle ratio", "Centroid/Incenter/Circumcenter problems"]
},
{
"name": "Circles",
"description": "Chord, tangent, arc, sector properties",
"question_types": ["Find angle in circle", "Chord length", "Tangent properties", "Arc/sector angle"]
},
{
"name": "Quadrilaterals and Polygons",
"description": "Properties of parallelogram, rhombus, rectangle, regular polygons",
"question_types": ["Find angle", "Find diagonal", "Properties identification", "Interior/exterior angle sum"]
},
{
"name": "Coordinate Geometry",
"description": "Distance, section formula, area of triangle, equation of line",
"question_types": ["Find distance between points", "Find midpoint", "Area of triangle", "Slope of line"]
}
]
},
{
"name": "Mensuration",
"description": "Area, perimeter, volume, surface area of 2D and 3D shapes",
"topics": [
{
"name": "2D Figures",
"description": "Area and perimeter of plane figures",
"question_types": ["Find area", "Find perimeter", "Find diagonal", "Combined figure area"]
},
{
"name": "3D Figures",
"description": "Volume and surface area of solids",
"question_types": ["Find volume", "Find surface area", "Find curved surface area", "Conversion between shapes"]
}
]
},
{
"name": "Trigonometry",
"description": "Trigonometric ratios, identities, heights and distances",
"topics": [
{
"name": "Trigonometric Ratios",
"description": "sin, cos, tan and their values for standard angles",
"question_types": ["Find value of expression", "Simplify trig expression", "Find angle"]
},
{
"name": "Trigonometric Identities",
"description": "Standard identities and their proofs",
"question_types": ["Prove identity", "Simplify using identity", "Find value given condition"]
},
{
"name": "Heights and Distances",
"description": "Application of trigonometry to find heights and distances",
"question_types": ["Find height of tower", "Find distance", "Angle of elevation/depression", "Two-observer problems"]
}
]
},
{
"name": "Data Interpretation",
"description": "Reading and analyzing data from charts and tables",
"topics": [
{
"name": "Bar Graph",
"description": "Interpreting bar charts",
"question_types": ["Find value", "Calculate ratio", "Find percentage change", "Compare data"]
},
{
"name": "Pie Chart",
"description": "Interpreting pie charts",
"question_types": ["Find degree/value", "Calculate percentage", "Compare sectors"]
},
{
"name": "Line Graph",
"description": "Interpreting line graphs",
"question_types": ["Find trend", "Calculate change", "Average over period"]
},
{
"name": "Table",
"description": "Interpreting tabular data",
"question_types": ["Calculate from table", "Find ratio", "Percentage calculation"]
}
]
},
{
"name": "Statistics",
"description": "Measures of central tendency and dispersion",
"topics": [
{
"name": "Mean, Median and Mode",
"description": "Central tendency measures for grouped and ungrouped data",
"question_types": ["Find mean", "Find median", "Find mode", "Combined mean"]
},
{
"name": "Range and Standard Deviation",
"description": "Measures of spread",
"question_types": ["Find range", "Find standard deviation", "Find variance"]
}
]
}
]
},
{
"name": "General Intelligence and Reasoning",
"tier": "1,2",
"description": "Logical and analytical reasoning covering verbal, non-verbal, and critical thinking",
"target_questions": 25000,
"subtopics": [
{
"name": "Verbal Reasoning",
"description": "Reasoning with words, letters, and numbers",
"topics": [
{
"name": "Analogy",
"description": "Finding relationships between pairs of words/numbers/letters",
"question_types": ["Word analogy", "Number analogy", "Letter analogy", "Mixed analogy"]
},
{
"name": "Classification",
"description": "Finding the odd one out from a group",
"question_types": ["Word classification", "Number classification", "Letter classification", "Mixed classification"]
},
{
"name": "Number Series",
"description": "Finding patterns in number sequences",
"question_types": ["Find next number", "Find missing number", "Find wrong number"]
},
{
"name": "Letter Series",
"description": "Finding patterns in letter sequences",
"question_types": ["Find next letters", "Find missing letters", "Pattern identification"]
},
{
"name": "Alpha-Numeric Series",
"description": "Combined letter-number patterns",
"question_types": ["Find next term", "Find missing term", "Pattern rule"]
},
{
"name": "Coding-Decoding",
"description": "Encoding and decoding messages using rules",
"question_types": ["Letter coding", "Number coding", "Mixed coding", "Conditional coding"]
}
]
},
{
"name": "Logical Reasoning",
"description": "Logical deduction and analytical problems",
"topics": [
{
"name": "Blood Relations",
"description": "Family relationship problems",
"question_types": ["Direct relation", "Coded relation", "Family tree", "Generation problems"]
},
{
"name": "Direction and Distance",
"description": "Navigation and displacement problems",
"question_types": ["Find final direction", "Find distance from start", "Shortest distance", "Shadow-based direction"]
},
{
"name": "Order and Ranking",
"description": "Position-based problems",
"question_types": ["Find rank from top/bottom", "Total number of persons", "Interchange positions", "Between positions"]
},
{
"name": "Syllogism",
"description": "Logical conclusions from given statements",
"question_types": ["All/Some/No conclusions", "Either-or conclusions", "Possibility-based"]
},
{
"name": "Seating Arrangement",
"description": "Linear and circular arrangement puzzles",
"question_types": ["Linear row arrangement", "Circular arrangement", "Two-row arrangement", "Complex multi-variable"]
},
{
"name": "Puzzle",
"description": "Floor, scheduling, and distribution puzzles",
"question_types": ["Floor-based puzzle", "Day/month scheduling", "Distribution puzzle", "Comparison-based ordering"]
}
]
},
{
"name": "Non-Verbal Reasoning",
"description": "Reasoning with figures, patterns and spatial concepts",
"topics": [
{
"name": "Figure Series",
"description": "Finding next figure in a sequence",
"question_types": ["Find next figure", "Find missing figure", "Pattern completion"]
},
{
"name": "Mirror and Water Image",
"description": "Reflection of figures and text",
"question_types": ["Mirror image of figure", "Water image of figure", "Mirror image of text/numbers"]
},
{
"name": "Paper Folding and Cutting",
"description": "Predicting result of folding and cutting paper",
"question_types": ["Find pattern after unfolding", "Number of pieces after cutting"]
},
{
"name": "Embedded Figures",
"description": "Finding figures hidden within complex figures",
"question_types": ["Find embedded figure", "Count embedded shapes"]
},
{
"name": "Dice and Cube",
"description": "Problems on dice faces and cube painting",
"question_types": ["Opposite face of dice", "Adjacent faces", "Painted cube counting"]
}
]
},
{
"name": "Mathematical Reasoning",
"description": "Reasoning using mathematical operations",
"topics": [
{
"name": "Mathematical Operations",
"description": "Substituted operations and symbols",
"question_types": ["Symbol substitution", "Find correct equation", "Balancing equations"]
},
{
"name": "Number Puzzles",
"description": "Missing numbers in grids and figures",
"question_types": ["Find missing number in grid", "Find missing in triangle/circle", "Pattern in figures"]
},
{
"name": "Venn Diagram",
"description": "Representing relationships using Venn diagrams",
"question_types": ["Identify correct Venn diagram", "Count elements in region", "Minimum/maximum in region"]
}
]
},
{
"name": "Critical Thinking",
"description": "Evaluating statements, assumptions and conclusions",
"topics": [
{
"name": "Statement and Conclusion",
"description": "Drawing valid conclusions from statements",
"question_types": ["Which conclusion follows", "Both/neither/either follows"]
},
{
"name": "Statement and Assumption",
"description": "Identifying implicit assumptions",
"question_types": ["Which assumption is implicit", "Both/neither implicit"]
},
{
"name": "Cause and Effect",
"description": "Identifying cause-effect relationships",
"question_types": ["Identify cause", "Identify effect", "Independent/dependent events"]
},
{
"name": "Course of Action",
"description": "Deciding appropriate actions from given situations",
"question_types": ["Which action follows", "Appropriate/inappropriate action"]
}
]
}
]
},
{
"name": "English Language and Comprehension",
"tier": "1,2",
"description": "English vocabulary, grammar, sentence structure, and reading comprehension",
"target_questions": 25000,
"subtopics": [
{
"name": "Vocabulary",
"description": "Word meanings, usage, and recognition",
"topics": [
{
"name": "Synonyms",
"description": "Words with similar meanings",
"question_types": ["Choose synonym", "Most similar meaning in context", "Replace with synonym"]
},
{
"name": "Antonyms",
"description": "Words with opposite meanings",
"question_types": ["Choose antonym", "Most opposite meaning", "Replace with antonym"]
},
{
"name": "One Word Substitution",
"description": "Single word for a group of words or phrase",
"question_types": ["Find one word for phrase", "Identify correct substitution"]
},
{
"name": "Idioms and Phrases",
"description": "Common English idioms and their meanings",
"question_types": ["Meaning of idiom", "Use idiom in context", "Choose correct idiom"]
},
{
"name": "Spelling Correction",
"description": "Identifying correctly/incorrectly spelled words",
"question_types": ["Find misspelled word", "Choose correct spelling", "Correct the spelling"]
},
{
"name": "Foreign Words",
"description": "Commonly used foreign words and phrases in English",
"question_types": ["Meaning of foreign phrase", "Use in context"]
}
]
},
{
"name": "Grammar",
"description": "Rules of English grammar",
"topics": [
{
"name": "Tenses",
"description": "Past, present, future tenses and their forms",
"question_types": ["Fill in correct tense", "Identify tense", "Correct the tense error"]
},
{
"name": "Articles",
"description": "Use of a, an, the",
"question_types": ["Fill in correct article", "Identify article error", "No article needed"]
},
{
"name": "Prepositions",
"description": "Correct use of prepositions",
"question_types": ["Fill in preposition", "Correct preposition error", "Choose appropriate preposition"]
},
{
"name": "Subject-Verb Agreement",
"description": "Matching subjects with correct verb forms",
"question_types": ["Choose correct verb", "Find agreement error", "Correct the sentence"]
},
{
"name": "Modals",
"description": "Can, could, may, might, should, would, etc.",
"question_types": ["Choose correct modal", "Modal usage in context"]
},
{
"name": "Conjunctions",
"description": "Coordinating, subordinating, correlative conjunctions",
"question_types": ["Fill in conjunction", "Choose correct connector"]
}
]
},
{
"name": "Sentence Structure",
"description": "Sentence formation, transformation, and improvement",
"topics": [
{
"name": "Active and Passive Voice",
"description": "Converting between active and passive voice",
"question_types": ["Convert to passive", "Convert to active", "Identify voice"]
},
{
"name": "Direct and Indirect Speech",
"description": "Converting between direct and indirect narration",
"question_types": ["Convert to indirect speech", "Convert to direct speech", "Identify correct conversion"]
},
{
"name": "Sentence Improvement",
"description": "Improving parts of sentences for correctness/style",
"question_types": ["Replace underlined part", "Choose best improvement", "No improvement needed"]
},
{
"name": "Sentence Rearrangement",
"description": "Arranging jumbled sentences or parts in correct order",
"question_types": ["Arrange sentence parts", "Arrange sentences in paragraph", "Find first/last sentence"]
},
{
"name": "Sentence Completion",
"description": "Completing sentences with appropriate words/phrases",
"question_types": ["Fill in the blank (single)", "Fill in the blank (double)", "Choose correct phrase"]
}
]
},
{
"name": "Error Detection",
"description": "Identifying grammatical and usage errors",
"topics": [
{
"name": "Spot the Error",
"description": "Finding errors in given sentences",
"question_types": ["Identify erroneous part", "No error option", "Multiple error identification"]
},
{
"name": "Sentence Correction",
"description": "Correcting given erroneous sentences",
"question_types": ["Choose correct version", "Identify and correct error"]
}
]
},
{
"name": "Comprehension",
"description": "Understanding and analyzing written passages",
"topics": [
{
"name": "Reading Comprehension",
"description": "Answering questions based on passages",
"question_types": ["Main idea", "Inference", "Vocabulary in context", "Detail-based", "Tone/attitude"]
},
{
"name": "Cloze Test",
"description": "Filling blanks in a passage",
"question_types": ["Fill appropriate word", "Grammar-based blank", "Vocabulary-based blank"]
},
{
"name": "Para Jumbles",
"description": "Arranging sentences to form coherent paragraph",
"question_types": ["Arrange in order", "Find opening sentence", "Find closing sentence"]
}
]
}
]
},
{
"name": "General Awareness",
"tier": "1,2",
"description": "General knowledge covering history, geography, polity, economics, science, and current affairs",
"target_questions": 20000,
"subtopics": [
{
"name": "History",
"description": "Indian and world history from ancient to modern times",
"topics": [
{
"name": "Ancient India",
"description": "Indus Valley, Vedic period, Mauryas, Guptas, post-Gupta",
"question_types": ["Who/What/When", "Match the following", "Chronological order", "Identify dynasty/ruler"]
},
{
"name": "Medieval India",
"description": "Delhi Sultanate, Mughal Empire, Vijayanagara, Bhakti/Sufi",
"question_types": ["Who/What/When", "Match ruler with achievement", "Battle identification"]
},
{
"name": "Modern India",
"description": "British rule, freedom movement, post-independence",
"question_types": ["Freedom fighter identification", "Movement/event in order", "Governor General/Viceroy"]
},
{
"name": "World History",
"description": "World wars, revolutions, important treaties",
"question_types": ["Event identification", "Treaty/agreement", "Revolution causes"]
},
{
"name": "Art and Culture",
"description": "Indian art forms, dance, music, architecture, literature",
"question_types": ["Identify art form", "Match dance with state", "Architecture identification"]
}
]
},
{
"name": "Geography",
"description": "Physical, Indian, and world geography",
"topics": [
{
"name": "Physical Geography",
"description": "Earth, atmosphere, lithosphere, hydrosphere",
"question_types": ["Concept identification", "Layer/zone properties", "Natural phenomena"]
},
{
"name": "Indian Geography",
"description": "Rivers, mountains, climate, soil, vegetation, states",
"question_types": ["River system", "Mountain pass/peak", "State capital/boundary", "Climate zone"]
},
{
"name": "World Geography",
"description": "Continents, oceans, countries, capitals",
"question_types": ["Country-capital", "Largest/smallest/longest", "Geographic features"]
},
{
"name": "Climate and Weather",
"description": "Monsoons, cyclones, climate change, seasons",
"question_types": ["Monsoon mechanism", "Climate type", "Weather phenomena"]
},
{
"name": "Resources and Agriculture",
"description": "Natural resources, crops, minerals, industries",
"question_types": ["Crop-region", "Mineral-state", "Industry location", "Resource type"]
}
]
},
{
"name": "Indian Polity",
"description": "Constitution, governance, judiciary, and political system",
"topics": [
{
"name": "Indian Constitution",
"description": "Preamble, fundamental rights, DPSP, duties, amendments",
"question_types": ["Article identification", "Amendment detail", "Right classification", "Feature borrowed from"]
},
{
"name": "Parliament and State Legislature",
"description": "Lok Sabha, Rajya Sabha, state assemblies, bills",
"question_types": ["Functions", "Composition", "Bill passage process", "Speaker/Chairman powers"]
},
{
"name": "Judiciary",
"description": "Supreme Court, High Courts, lower courts, judicial review",
"question_types": ["Jurisdiction", "Appointment process", "Writ identification", "Landmark judgement"]
},
{
"name": "Governance",
"description": "President, PM, Council of Ministers, Governor, CAG",
"question_types": ["Powers and functions", "Appointment", "Removal process", "Emergency provisions"]
},
{
"name": "Panchayati Raj and Local Governance",
"description": "73rd and 74th amendments, three-tier system",
"question_types": ["Structure", "Functions", "Elections", "Constitutional provisions"]
}
]
},
{
"name": "Economics",
"description": "Indian economy, banking, fiscal policy, international trade",
"topics": [
{
"name": "Indian Economy Basics",
"description": "GDP, GNP, NI, sectors of economy, planning",
"question_types": ["Define term", "Sector identification", "Economic indicator", "Five Year Plan"]
},
{
"name": "Banking and Finance",
"description": "RBI, commercial banks, monetary policy, financial markets",
"question_types": ["RBI functions", "Bank rate/repo rate", "Financial term", "Banking instrument"]
},
{
"name": "Budget and Fiscal Policy",
"description": "Union budget, taxes, fiscal deficit, public debt",
"question_types": ["Tax type", "Budget component", "Deficit type", "Revenue/capital"]
},
{
"name": "International Organizations",
"description": "IMF, World Bank, WTO, ASEAN, BRICS, G20",
"question_types": ["Organization HQ", "Member countries", "Functions", "Formation year"]
},
{
"name": "Government Schemes",
"description": "Major government welfare and development schemes",
"question_types": ["Scheme objective", "Launch year", "Ministry responsible", "Beneficiary"]
}
]
},
{
"name": "Science",
"description": "Physics, chemistry, biology, and technology",
"topics": [
{
"name": "Physics",
"description": "Mechanics, heat, light, sound, electricity, magnetism",
"question_types": ["Law/principle identification", "Unit/dimension", "Application of concept", "Inventor/discovery"]
},
{
"name": "Chemistry",
"description": "Elements, compounds, reactions, acids and bases, everyday chemistry",
"question_types": ["Chemical formula", "Reaction type", "Element property", "Everyday chemistry application"]
},
{
"name": "Biology",
"description": "Cell biology, human body, diseases, nutrition, ecology",
"question_types": ["Organ/system function", "Disease-cause", "Vitamin-deficiency", "Ecology concept"]
},
{
"name": "Space and Technology",
"description": "ISRO missions, satellites, recent tech developments",
"question_types": ["Mission identification", "Satellite purpose", "Technology application", "First achievements"]
},
{
"name": "Computer Awareness",
"description": "Computer fundamentals, MS Office, Internet, networking, cybersecurity",
"question_types": ["Term definition", "Shortcut key", "Component function", "Software feature"]
}
]
},
{
"name": "Static GK",
"description": "Fixed factual knowledge that doesn't change frequently",
"topics": [
{
"name": "Books and Authors",
"description": "Famous books and their authors",
"question_types": ["Match book-author", "Identify author", "Award-winning book"]
},
{
"name": "Important Days and Dates",
"description": "National and international important days",
"question_types": ["Date identification", "Theme of the year", "Organizing body"]
},
{
"name": "Awards and Honours",
"description": "Bharat Ratna, Padma awards, Nobel Prize, national awards",
"question_types": ["Award-winner", "Award category", "First recipient", "Recent winner"]
},
{
"name": "Sports",
"description": "Major sports events, trophies, records, personalities",
"question_types": ["Trophy-sport", "Record holder", "Venue/host country", "Player-team"]
},
{
"name": "National Symbols and Firsts",
"description": "National symbols, firsts in India and world",
"question_types": ["Identify symbol", "First person/event", "National identifier"]
}
]
}
]
}
]
}

1
generators/__init__.py Normal file
View File

@ -0,0 +1 @@
# SSCTopper Question Generators

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

89
generators/base.py Normal file
View File

@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Base utilities for question generation."""
import random
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from db.init import get_db, get_question_type_id, insert_questions_batch
def shuffle_options(correct, wrongs, explanation=""):
"""Create options with correct answer randomly placed."""
options = [correct] + wrongs[:3]
# Map original correct to its letter
indices = list(range(4))
random.shuffle(indices)
shuffled = [options[i] for i in indices]
correct_letter = chr(65 + indices.index(0)) # A, B, C, D
return {
'option_a': str(shuffled[0]),
'option_b': str(shuffled[1]),
'option_c': str(shuffled[2]),
'option_d': str(shuffled[3]),
'correct_option': correct_letter,
'explanation': explanation
}
def make_question(qtype_id, text, correct, wrongs, explanation="", difficulty=1):
"""Build a question dict ready for DB insertion."""
q = shuffle_options(correct, wrongs, explanation)
q['question_type_id'] = qtype_id
q['question_text'] = text
q['difficulty'] = difficulty
return q
def get_qtid(conn, subject, subtopic, topic, qtype):
"""Shorthand for get_question_type_id."""
return get_question_type_id(conn, subject, subtopic, topic, qtype)
def nearby_wrong(val, spread=None):
"""Generate plausible wrong answers near the correct value."""
if spread is None:
spread = max(1, abs(val) // 5) if val != 0 else 5
wrongs = set()
attempts = 0
while len(wrongs) < 3 and attempts < 50:
offset = random.randint(1, max(1, spread))
sign = random.choice([-1, 1])
w = val + sign * offset
if w != val and w not in wrongs:
wrongs.add(w)
attempts += 1
# Fallback
while len(wrongs) < 3:
wrongs.add(val + len(wrongs) + 1)
return [str(w) for w in wrongs]
def nearby_wrong_float(val, spread=None, decimals=2):
"""Generate plausible wrong answers for float values."""
if spread is None:
spread = max(0.5, abs(val) * 0.2)
wrongs = set()
attempts = 0
while len(wrongs) < 3 and attempts < 50:
offset = round(random.uniform(0.1, spread), decimals)
sign = random.choice([-1, 1])
w = round(val + sign * offset, decimals)
if w != round(val, decimals) and w not in wrongs and w > 0:
wrongs.add(w)
attempts += 1
while len(wrongs) < 3:
wrongs.add(round(val + (len(wrongs) + 1) * 0.5, decimals))
return [str(w) for w in wrongs]
def frac_str(num, den):
"""Format a fraction as string."""
from math import gcd
g = gcd(abs(num), abs(den))
n, d = num // g, den // g
if d == 1:
return str(n)
return f"{n}/{d}"

View File

@ -0,0 +1,545 @@
#!/usr/bin/env python3
"""
English Language & Comprehension Question Generator for SSC CGL.
Generates ~25,000 questions covering vocabulary, grammar, sentence structure, error detection.
"""
import random
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, get_db, insert_questions_batch
SUBJECT = "English Language and Comprehension"
# ============ WORD BANKS ============
SYNONYMS = [
("Abundant", "Plentiful", ["Scarce", "Meager", "Rare"]),
("Accurate", "Precise", ["Wrong", "Vague", "Incorrect"]),
("Admire", "Respect", ["Despise", "Hate", "Ignore"]),
("Affluent", "Wealthy", ["Poor", "Needy", "Destitute"]),
("Agile", "Nimble", ["Clumsy", "Slow", "Stiff"]),
("Amiable", "Friendly", ["Hostile", "Rude", "Cold"]),
("Ancient", "Old", ["Modern", "New", "Recent"]),
("Arduous", "Difficult", ["Easy", "Simple", "Effortless"]),
("Audacious", "Bold", ["Timid", "Meek", "Cowardly"]),
("Authentic", "Genuine", ["Fake", "False", "Counterfeit"]),
("Benevolent", "Kind", ["Cruel", "Malicious", "Harsh"]),
("Bizarre", "Strange", ["Normal", "Usual", "Ordinary"]),
("Candid", "Frank", ["Deceptive", "Dishonest", "Sly"]),
("Cautious", "Careful", ["Reckless", "Careless", "Rash"]),
("Comprehend", "Understand", ["Misunderstand", "Confuse", "Ignore"]),
("Conceal", "Hide", ["Reveal", "Expose", "Display"]),
("Contempt", "Scorn", ["Respect", "Admiration", "Regard"]),
("Courage", "Bravery", ["Cowardice", "Fear", "Timidity"]),
("Delight", "Joy", ["Sorrow", "Grief", "Misery"]),
("Diligent", "Hardworking", ["Lazy", "Idle", "Indolent"]),
("Diminish", "Reduce", ["Increase", "Enlarge", "Expand"]),
("Eloquent", "Expressive", ["Inarticulate", "Stammering", "Dull"]),
("Enormous", "Huge", ["Tiny", "Small", "Minute"]),
("Eternal", "Everlasting", ["Temporary", "Brief", "Fleeting"]),
("Exquisite", "Beautiful", ["Ugly", "Plain", "Crude"]),
("Feeble", "Weak", ["Strong", "Powerful", "Mighty"]),
("Ferocious", "Fierce", ["Gentle", "Mild", "Tame"]),
("Frigid", "Cold", ["Hot", "Warm", "Tropical"]),
("Generous", "Liberal", ["Stingy", "Miserly", "Tight"]),
("Gratitude", "Thankfulness", ["Ingratitude", "Ungratefulness", "Resentment"]),
("Halt", "Stop", ["Continue", "Proceed", "Advance"]),
("Hazardous", "Dangerous", ["Safe", "Secure", "Harmless"]),
("Hostile", "Unfriendly", ["Friendly", "Warm", "Kind"]),
("Immense", "Vast", ["Tiny", "Small", "Little"]),
("Impeccable", "Flawless", ["Faulty", "Defective", "Imperfect"]),
("Jubilant", "Joyful", ["Sad", "Gloomy", "Depressed"]),
("Keen", "Eager", ["Reluctant", "Unwilling", "Indifferent"]),
("Laudable", "Praiseworthy", ["Blameworthy", "Shameful", "Disgraceful"]),
("Lucid", "Clear", ["Confusing", "Vague", "Obscure"]),
("Magnificent", "Splendid", ["Ordinary", "Plain", "Dull"]),
("Meticulous", "Careful", ["Careless", "Sloppy", "Negligent"]),
("Mundane", "Ordinary", ["Extraordinary", "Unusual", "Special"]),
("Novice", "Beginner", ["Expert", "Veteran", "Professional"]),
("Obstinate", "Stubborn", ["Flexible", "Yielding", "Compliant"]),
("Opulent", "Luxurious", ["Poor", "Shabby", "Modest"]),
("Pacify", "Calm", ["Agitate", "Provoke", "Irritate"]),
("Prudent", "Wise", ["Foolish", "Reckless", "Imprudent"]),
("Replenish", "Refill", ["Drain", "Empty", "Deplete"]),
("Serene", "Calm", ["Turbulent", "Agitated", "Noisy"]),
("Tedious", "Boring", ["Interesting", "Exciting", "Engaging"]),
("Trivial", "Insignificant", ["Important", "Significant", "Vital"]),
("Ubiquitous", "Everywhere", ["Rare", "Scarce", "Uncommon"]),
("Valiant", "Brave", ["Cowardly", "Timid", "Fearful"]),
("Verbose", "Wordy", ["Concise", "Brief", "Terse"]),
("Wrath", "Anger", ["Calm", "Peace", "Happiness"]),
("Zealous", "Enthusiastic", ["Apathetic", "Indifferent", "Passive"]),
]
ANTONYMS = [
("Accept", "Reject"), ("Advance", "Retreat"), ("Ancient", "Modern"),
("Arrival", "Departure"), ("Ascend", "Descend"), ("Bold", "Timid"),
("Brave", "Cowardly"), ("Bright", "Dim"), ("Calm", "Agitated"),
("Create", "Destroy"), ("Dawn", "Dusk"), ("Defend", "Attack"),
("Expand", "Contract"), ("Forget", "Remember"), ("Generous", "Miserly"),
("Guilty", "Innocent"), ("Humble", "Proud"), ("Import", "Export"),
("Joy", "Sorrow"), ("Knowledge", "Ignorance"), ("Liberty", "Captivity"),
("Major", "Minor"), ("Natural", "Artificial"), ("Optimist", "Pessimist"),
("Peace", "War"), ("Rapid", "Slow"), ("Rigid", "Flexible"),
("Simple", "Complex"), ("Temporary", "Permanent"), ("Victory", "Defeat"),
("Wisdom", "Folly"), ("Zenith", "Nadir"), ("Transparent", "Opaque"),
("Voluntary", "Compulsory"), ("Shallow", "Deep"), ("Fertile", "Barren"),
("Concord", "Discord"), ("Benign", "Malignant"), ("Prolific", "Barren"),
("Affluent", "Destitute"),
]
ONE_WORD_SUBS = [
("A person who loves books", "Bibliophile", ["Bibliographer", "Librarian", "Bookworm"]),
("Government by the people", "Democracy", ["Monarchy", "Autocracy", "Oligarchy"]),
("One who hates mankind", "Misanthrope", ["Philanthropist", "Misogynist", "Anthropologist"]),
("A person who speaks two languages", "Bilingual", ["Polyglot", "Monoglot", "Linguist"]),
("A person who walks in sleep", "Somnambulist", ["Insomniac", "Sleepwalker", "Narcoleptic"]),
("Fear of water", "Hydrophobia", ["Aquaphobia", "Claustrophobia", "Acrophobia"]),
("Fear of heights", "Acrophobia", ["Hydrophobia", "Claustrophobia", "Agoraphobia"]),
("One who eats human flesh", "Cannibal", ["Carnivore", "Omnivore", "Herbivore"]),
("A word that is opposite in meaning", "Antonym", ["Synonym", "Homonym", "Acronym"]),
("Killing of a king", "Regicide", ["Homicide", "Genocide", "Fratricide"]),
("A place for keeping bees", "Apiary", ["Aviary", "Aquarium", "Nursery"]),
("One who knows everything", "Omniscient", ["Omnipresent", "Omnipotent", "Omnivore"]),
("Medicine that kills germs", "Antiseptic", ["Antibiotic", "Antidote", "Analgesic"]),
("A person who is 100 years old", "Centenarian", ["Octogenarian", "Nonagenarian", "Septuagenarian"]),
("Study of stars", "Astronomy", ["Astrology", "Cosmology", "Astrophysics"]),
("Government by a single person", "Autocracy", ["Democracy", "Monarchy", "Theocracy"]),
("One who does not believe in God", "Atheist", ["Theist", "Agnostic", "Pagan"]),
("A speech delivered without preparation", "Extempore", ["Impromptu", "Rehearsed", "Deliberate"]),
("One who lives on vegetables", "Vegetarian", ["Vegan", "Carnivore", "Omnivore"]),
("A place for keeping dead bodies", "Mortuary", ["Cemetery", "Crematorium", "Mausoleum"]),
("That which cannot be read", "Illegible", ["Eligible", "Legible", "Indelible"]),
("A person who cannot be corrected", "Incorrigible", ["Incurable", "Invincible", "Inevitable"]),
("One who is present everywhere", "Omnipresent", ["Omniscient", "Omnipotent", "Omnivore"]),
("One who looks on the bright side", "Optimist", ["Pessimist", "Realist", "Fatalist"]),
("Study of ancient things", "Archaeology", ["Anthropology", "Paleontology", "Geology"]),
]
IDIOMS = [
("Break the ice", "To initiate conversation in a social setting", ["To break something", "To melt ice", "To cool down"]),
("Burn the midnight oil", "To work or study late into the night", ["To waste oil", "To start a fire", "To cook at night"]),
("Cry over spilt milk", "To regret something that cannot be undone", ["To cry while drinking milk", "To waste milk", "To be sad about dairy"]),
("Hit the nail on the head", "To be exactly right", ["To do carpentry", "To hurt oneself", "To break something"]),
("A piece of cake", "Something very easy", ["A type of dessert", "A small portion", "A bakery item"]),
("Bite the bullet", "To face a difficult situation bravely", ["To eat ammunition", "To hurt teeth", "To be violent"]),
("Cost an arm and a leg", "Very expensive", ["Physical injury", "Amputation", "Medical procedure"]),
("Let the cat out of the bag", "To reveal a secret", ["To free an animal", "To open a bag", "To go shopping"]),
("Once in a blue moon", "Very rarely", ["During full moon", "Monthly", "Nightly"]),
("Raining cats and dogs", "Raining very heavily", ["Animals falling", "Pet show", "Zoo visit"]),
("Spill the beans", "To reveal secret information", ["To cook", "To waste food", "To plant seeds"]),
("The ball is in your court", "It is your turn to take action", ["Playing tennis", "Court hearing", "Ball game"]),
("Under the weather", "Feeling unwell", ["In rain", "Below clouds", "Bad climate"]),
("Actions speak louder than words", "What you do matters more than what you say", ["Being noisy", "Shouting", "Speaking loudly"]),
("Beat around the bush", "To avoid the main topic", ["Gardening", "Playing in bush", "Walking in forest"]),
("Burning bridges", "Destroying relationships", ["Arson", "Building fire", "Demolition"]),
("Every cloud has a silver lining", "Good things come after bad", ["Weather forecast", "Cloud watching", "Silver mining"]),
("Keep your chin up", "Stay positive", ["Posture advice", "Exercise tip", "Looking upward"]),
("Back to the drawing board", "Start over", ["Art class", "Going backwards", "Drawing pictures"]),
("Barking up the wrong tree", "Making a wrong assumption", ["Disturbing a dog", "Climbing trees", "Forest activity"]),
]
# ============ GENERATORS ============
def gen_synonyms(conn, count=2500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Synonyms", "Choose synonym")
if not qtid: return questions
for _ in range(count):
word, syn, wrongs = random.choice(SYNONYMS)
questions.append(make_question(qtid,
f"Choose the synonym of '{word}':",
syn, wrongs, f"'{word}' means '{syn}'", 1))
return questions
def gen_antonyms(conn, count=2500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Antonyms", "Choose antonym")
if not qtid: return questions
for _ in range(count):
word, ant = random.choice(ANTONYMS)
other_ants = [a[1] for a in random.sample(ANTONYMS, 3) if a[0] != word][:3]
if len(other_ants) < 3:
other_ants = ["None", "All", "Some"][:3]
questions.append(make_question(qtid,
f"Choose the antonym of '{word}':",
ant, other_ants, f"Opposite of '{word}' is '{ant}'", 1))
return questions
def gen_one_word(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Vocabulary", "One Word Substitution", "Find one word for phrase")
if not qtid: return questions
for _ in range(count):
phrase, word, wrongs = random.choice(ONE_WORD_SUBS)
questions.append(make_question(qtid,
f"One word for: '{phrase}'",
word, wrongs, f"'{phrase}' = {word}", 1))
return questions
def gen_idioms(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Idioms and Phrases", "Meaning of idiom")
if not qtid: return questions
for _ in range(count):
idiom, meaning, wrongs = random.choice(IDIOMS)
questions.append(make_question(qtid,
f"What does the idiom '{idiom}' mean?",
meaning, wrongs, f"'{idiom}' = {meaning}", 1))
return questions
def gen_spelling(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Spelling Correction", "Choose correct spelling")
if not qtid: return questions
words = ["Accommodation", "Achievement", "Acknowledge", "Acquaintance", "Aggressive",
"Apparently", "Argument", "Assassination", "Beautiful", "Beginning",
"Believe", "Bureaucracy", "Calendar", "Changeable", "Committed",
"Conscience", "Conscious", "Definitely", "Dilemma", "Disappear",
"Disappoint", "Discipline", "Embarrass", "Environment", "Exaggerate",
"Existence", "Experience", "Fascinate", "February", "Fluorescent",
"Foreign", "Forty", "Government", "Guarantee", "Harass",
"Hierarchy", "Humorous", "Hygiene", "Immediately", "Independent",
"Intelligence", "Jewellery", "Judgement", "Knowledge", "Leisure",
"License", "Maintenance", "Mediterranean", "Millennium", "Necessary",
"Noticeable", "Occasion", "Occurrence", "Parliament", "Perseverance",
"Pneumonia", "Possession", "Privilege", "Pronunciation", "Psychology",
"Questionnaire", "Receive", "Recommend", "Rhythm", "Schedule",
"Separate", "Successful", "Supersede", "Surprise", "Threshold",
"Tomorrow", "Tyranny", "Unnecessary", "Vacuum", "Vegetable",
"Wednesday", "Weird"]
for _ in range(count):
w = random.choice(words)
# Create misspellings
misspells = []
for _ in range(3):
idx = random.randint(1, len(w) - 2)
chars = list(w)
chars[idx] = random.choice('aeiou') if chars[idx] not in 'aeiou' else random.choice('bcdfg')
m = "".join(chars)
if m != w:
misspells.append(m)
while len(misspells) < 3:
misspells.append(w[:-1] + random.choice('aeioust'))
questions.append(make_question(qtid,
f"Choose the correctly spelled word:",
w, misspells[:3], f"Correct spelling: {w}", 1))
return questions
def gen_tenses(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Grammar", "Tenses", "Fill in correct tense")
if not qtid: return questions
templates = [
("She ___ to school every day.", "goes", ["go", "went", "going"], "Simple Present"),
("They ___ playing football yesterday.", "were", ["was", "are", "is"], "Past Continuous"),
("He ___ the work by tomorrow.", "will finish", ["finished", "finishes", "finishing"], "Simple Future"),
("I ___ this book already.", "have read", ["had read", "read", "reading"], "Present Perfect"),
("She ___ dinner when I arrived.", "was cooking", ["cooked", "cooks", "cooking"], "Past Continuous"),
("We ___ here since morning.", "have been", ["are", "were", "was"], "Present Perfect Continuous"),
("The train ___ before we reached.", "had left", ["left", "leaves", "leaving"], "Past Perfect"),
("By next year, I ___ my degree.", "will have completed", ["complete", "completed", "completing"], "Future Perfect"),
("He ___ a letter now.", "is writing", ["writes", "wrote", "written"], "Present Continuous"),
("They ___ the match last week.", "won", ["win", "wins", "winning"], "Simple Past"),
("She ___ the piano since childhood.", "has been playing", ["plays", "played", "play"], "Present Perfect Continuous"),
("I ___ you tomorrow.", "will call", ["called", "call", "calling"], "Simple Future"),
]
for _ in range(count):
q_text, correct, wrongs, tense = random.choice(templates)
questions.append(make_question(qtid, f"Fill in the blank: {q_text}",
correct, wrongs, f"Tense: {tense}", 1))
return questions
def gen_articles(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Grammar", "Articles", "Fill in correct article")
if not qtid: return questions
templates = [
("___ apple a day keeps the doctor away.", "An", ["A", "The", "No article"]),
("He is ___ honest man.", "an", ["a", "the", "no article"]),
("___ sun rises in the east.", "The", ["A", "An", "No article"]),
("She is ___ doctor.", "a", ["an", "the", "no article"]),
("I saw ___ elephant in the zoo.", "an", ["a", "the", "no article"]),
("___ Ganges is a holy river.", "The", ["A", "An", "No article"]),
("He gave me ___ useful tip.", "a", ["an", "the", "no article"]),
("___ gold is a precious metal.", "No article", ["A", "An", "The"]),
("She is ___ European.", "a", ["an", "the", "no article"]),
("I need ___ umbrella.", "an", ["a", "the", "no article"]),
]
for _ in range(count):
q_text, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid, f"Fill in the correct article: {q_text}",
correct, wrongs, f"Article rule applied", 1))
return questions
def gen_prepositions(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Grammar", "Prepositions", "Fill in preposition")
if not qtid: return questions
templates = [
("The book is ___ the table.", "on", ["in", "at", "by"]),
("She arrived ___ Monday.", "on", ["in", "at", "by"]),
("He lives ___ Mumbai.", "in", ["on", "at", "by"]),
("The meeting is ___ 3 PM.", "at", ["in", "on", "by"]),
("I have been waiting ___ morning.", "since", ["for", "from", "by"]),
("She is good ___ mathematics.", "at", ["in", "on", "with"]),
("He is fond ___ music.", "of", ["with", "in", "at"]),
("The cat jumped ___ the wall.", "over", ["on", "in", "at"]),
("She is interested ___ painting.", "in", ["on", "at", "by"]),
("He walked ___ the park.", "through", ["in", "on", "at"]),
("They traveled ___ train.", "by", ["in", "on", "with"]),
("The match starts ___ 5 o'clock.", "at", ["in", "on", "by"]),
]
for _ in range(count):
q_text, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid, f"Fill in the correct preposition: {q_text}",
correct, wrongs, f"Preposition: {correct}", 1))
return questions
def gen_voice(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Active and Passive Voice", "Convert to passive")
if not qtid: return questions
templates = [
("She writes a letter.", "A letter is written by her.", ["A letter was written by her.", "A letter were written by her.", "A letter has written by her."]),
("He plays cricket.", "Cricket is played by him.", ["Cricket was played by him.", "Cricket were played by him.", "Cricket has played by him."]),
("They are building a house.", "A house is being built by them.", ["A house was being built by them.", "A house has been built by them.", "A house is built by them."]),
("She cooked food.", "Food was cooked by her.", ["Food is cooked by her.", "Food has been cooked by her.", "Food was being cooked by her."]),
("I have finished the work.", "The work has been finished by me.", ["The work was finished by me.", "The work is finished by me.", "The work had been finished by me."]),
("The teacher teaches the students.", "The students are taught by the teacher.", ["The students were taught by the teacher.", "The students has been taught by the teacher.", "The students is taught by the teacher."]),
("He will write a book.", "A book will be written by him.", ["A book would be written by him.", "A book shall be written by him.", "A book is written by him."]),
("Ram killed Ravana.", "Ravana was killed by Ram.", ["Ravana is killed by Ram.", "Ravana has been killed by Ram.", "Ravana were killed by Ram."]),
]
for _ in range(count):
active, passive, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Convert to passive voice: '{active}'",
passive, wrongs, f"Passive: {passive}", 1))
return questions
def gen_direct_indirect(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Direct and Indirect Speech", "Convert to indirect speech")
if not qtid: return questions
templates = [
('He said, "I am happy."', 'He said that he was happy.',
['He said that he is happy.', 'He said that I am happy.', 'He told that he was happy.']),
('She said, "I will come tomorrow."', 'She said that she would come the next day.',
['She said that she will come tomorrow.', 'She told she would come next day.', 'She said she will come.']),
('He asked, "Where do you live?"', 'He asked where I lived.',
['He asked where do I live.', 'He asked that where I lived.', 'He asked me where I live.']),
('She said, "I have finished my work."', 'She said that she had finished her work.',
['She said that she has finished her work.', 'She told she had finished work.', 'She said she finished her work.']),
('The teacher said, "The Earth revolves around the Sun."', 'The teacher said that the Earth revolves around the Sun.',
['The teacher said the Earth revolved around the Sun.', 'The teacher told the Earth revolves around Sun.', 'The teacher said Earth revolving around Sun.']),
]
for _ in range(count):
direct, indirect, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Convert to indirect speech: {direct}",
indirect, wrongs, f"Indirect: {indirect}", 1))
return questions
def gen_sentence_improvement(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Improvement", "Replace underlined part")
if not qtid: return questions
templates = [
("He don't know the answer.", "doesn't know", ["don't knows", "didn't knew", "not know"]),
("She is more taller than her sister.", "taller", ["most taller", "more tall", "tallest"]),
("I am going to market.", "to the market", ["in market", "for market", "at market"]),
("He told to me a story.", "told me", ["said to me", "tell me", "telling me"]),
("Each of the boys have done their work.", "has done his", ["have done his", "has did their", "have done their"]),
("One should do his duty.", "one's duty", ["their duty", "your duty", "our duty"]),
("She is knowing the answer.", "knows", ["is know", "was knowing", "has knowing"]),
("I am having a car.", "have", ["is having", "has", "having"]),
("He prevented me to go.", "from going", ["for going", "about going", "of going"]),
("She is elder than me.", "older than I", ["elder than I", "more elder than me", "oldest than me"]),
]
for _ in range(count):
sentence, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Improve the sentence: '{sentence}'",
correct, wrongs, f"Correct: {correct}", 2))
return questions
def gen_error_detection(conn, count=2500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Error Detection", "Spot the Error", "Identify erroneous part")
if not qtid: return questions
templates = [
("He go to school daily.", "Part A: 'go' should be 'goes'", ["Part B: 'to' is wrong", "Part C: 'daily' is wrong", "No error"]),
("She don't like ice cream.", "Part A: 'don't' should be 'doesn't'", ["Part B: 'like' is wrong", "Part C is wrong", "No error"]),
("The news are good.", "Part A: 'are' should be 'is'", ["Part B: 'good' is wrong", "Part C is wrong", "No error"]),
("Mathematics are my favourite subject.", "Part A: 'are' should be 'is'", ["Part B is wrong", "Part C is wrong", "No error"]),
("Each of the students have passed.", "Part B: 'have' should be 'has'", ["Part A is wrong", "Part C is wrong", "No error"]),
("He is more stronger than me.", "Part A: 'more stronger' should be 'stronger'", ["Part B is wrong", "Part C is wrong", "No error"]),
("I am agree with you.", "Part A: 'am agree' should be 'agree'", ["Part B is wrong", "Part C is wrong", "No error"]),
("The furniture are expensive.", "Part A: 'are' should be 'is'", ["Part B is wrong", "Part C is wrong", "No error"]),
("He gave me a advise.", "Part B: 'advise' should be 'advice'", ["Part A is wrong", "Part C is wrong", "No error"]),
("She discuss about the matter.", "Part A: 'discuss about' should be 'discussed'", ["Part B is wrong", "Part C is wrong", "No error"]),
]
for _ in range(count):
sentence, error, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Find the error in: '{sentence}'",
error, wrongs, f"Error: {error}", 2))
return questions
def gen_fill_blanks(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Completion", "Fill in the blank (single)")
if not qtid: return questions
templates = [
("Hard work is the key to ___.", "success", ["failure", "defeat", "loss"]),
("The weather is ___ today.", "pleasant", ["unpleasant", "horrible", "terrible"]),
("She showed great ___ in the face of danger.", "courage", ["cowardice", "fear", "hesitation"]),
("The doctor ___ the patient carefully.", "examined", ["ignored", "neglected", "avoided"]),
("He has a strong ___ for justice.", "passion", ["hatred", "dislike", "disregard"]),
("The students were ___ to learn the new topic.", "eager", ["reluctant", "unwilling", "indifferent"]),
("Her ___ attitude made everyone uncomfortable.", "arrogant", ["humble", "polite", "modest"]),
("The company achieved remarkable ___ this year.", "growth", ["decline", "loss", "failure"]),
("He is ___ of solving complex problems.", "capable", ["incapable", "unable", "unfit"]),
("The ___ of the river was very strong after the rain.", "current", ["calm", "stillness", "silence"]),
("She has an ___ personality that attracts people.", "amiable", ["hostile", "rude", "cold"]),
("The government ___ new policies for education.", "introduced", ["removed", "cancelled", "deleted"]),
]
for _ in range(count):
sentence, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Fill in the blank: {sentence}",
correct, wrongs, f"Answer: {correct}", 1))
return questions
def gen_sentence_rearrangement(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Rearrangement", "Arrange sentence parts")
if not qtid: return questions
templates = [
(["The early bird", "catches", "the worm"], "The early bird catches the worm",
["catches the worm the early bird", "the worm catches the early bird", "the worm the early bird catches"]),
(["Knowledge is", "better than", "wealth"], "Knowledge is better than wealth",
["better than wealth knowledge is", "wealth is better than knowledge", "is knowledge better than wealth"]),
(["Honesty", "is the", "best policy"], "Honesty is the best policy",
["is the best policy honesty", "the best policy is honesty", "policy best is the honesty"]),
(["United we stand", "divided", "we fall"], "United we stand divided we fall",
["divided we fall united we stand", "we fall divided united we stand", "stand united we divided fall we"]),
(["Practice makes", "a man", "perfect"], "Practice makes a man perfect",
["a man perfect practice makes", "makes practice a man perfect", "perfect a man makes practice"]),
]
for _ in range(count):
parts, correct, wrongs = random.choice(templates)
random.shuffle(parts)
questions.append(make_question(qtid,
f"Arrange in correct order: {' / '.join(parts)}",
correct, wrongs, f"Correct order: {correct}", 2))
return questions
def gen_cloze_test(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Comprehension", "Cloze Test", "Fill appropriate word")
if not qtid: return questions
templates = [
("Education is the most powerful ___ to change the world.", "weapon", ["weakness", "problem", "barrier"]),
("The ___ of success is hard work and dedication.", "foundation", ["destruction", "failure", "absence"]),
("A healthy mind lives in a healthy ___.", "body", ["house", "room", "place"]),
("Books are the ___ teachers of all time.", "best", ["worst", "slowest", "latest"]),
("Time and ___ wait for none.", "tide", ["money", "people", "luck"]),
("Prevention is better than ___.", "cure", ["disease", "medicine", "treatment"]),
("All that ___ is not gold.", "glitters", ["shines", "sparkles", "reflects"]),
("A rolling stone gathers no ___.", "moss", ["grass", "dust", "speed"]),
("Where there is a will, there is a ___.", "way", ["wall", "path", "road"]),
("Rome was not built in a ___.", "day", ["week", "month", "hour"]),
]
for _ in range(count):
sentence, correct, wrongs = random.choice(templates)
if isinstance(wrongs, str): # Fix the Rome template
wrongs = [wrongs, "year", "hour"]
questions.append(make_question(qtid,
f"Fill in the blank: {sentence}",
correct, wrongs, f"Answer: {correct}", 1))
return questions
def gen_subject_verb(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Grammar", "Subject-Verb Agreement", "Choose correct verb")
if not qtid: return questions
templates = [
("The team ___ playing well this season.", "is", ["are", "were", "have been"]),
("Neither the teacher nor the students ___ present.", "were", ["was", "is", "has been"]),
("Each of the boys ___ given a prize.", "was", ["were", "are", "have been"]),
("Either you or I ___ going to attend.", "am", ["are", "is", "were"]),
("The quality of these apples ___ good.", "is", ["are", "were", "have been"]),
("Bread and butter ___ my favourite breakfast.", "is", ["are", "were", "have been"]),
("One of my friends ___ from Delhi.", "is", ["are", "were", "have been"]),
("The news ___ very surprising.", "was", ["were", "are", "have been"]),
("No news ___ good news.", "is", ["are", "were", "have been"]),
("Mathematics ___ my favourite subject.", "is", ["are", "were", "have been"]),
]
for _ in range(count):
sentence, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Choose the correct verb: {sentence}",
correct, wrongs, f"Correct: {correct}", 2))
return questions
def generate_all(conn):
"""Generate all English Language questions."""
generators = [
("Synonyms", gen_synonyms, 2500),
("Antonyms", gen_antonyms, 2500),
("One Word Substitution", gen_one_word, 2000),
("Idioms & Phrases", gen_idioms, 2000),
("Spelling", gen_spelling, 1500),
("Tenses", gen_tenses, 1500),
("Articles", gen_articles, 1000),
("Prepositions", gen_prepositions, 1000),
("Subject-Verb Agreement", gen_subject_verb, 1000),
("Active/Passive Voice", gen_voice, 1500),
("Direct/Indirect Speech", gen_direct_indirect, 1500),
("Sentence Improvement", gen_sentence_improvement, 2000),
("Error Detection", gen_error_detection, 2500),
("Fill in Blanks", gen_fill_blanks, 2000),
("Sentence Rearrangement", gen_sentence_rearrangement, 1500),
("Cloze Test", gen_cloze_test, 1500),
]
total = 0
all_questions = []
for name, gen_func, count in generators:
questions = gen_func(conn, count)
all_questions.extend(questions)
print(f" {name}: {len(questions)} questions")
total += len(questions)
batch_size = 5000
for i in range(0, len(all_questions), batch_size):
insert_questions_batch(conn, all_questions[i:i+batch_size])
print(f" TOTAL English: {total}")
return total
if __name__ == '__main__':
conn = get_db()
print("Generating English Language questions...")
generate_all(conn)
conn.close()

354
generators/gk_generator.py Normal file
View File

@ -0,0 +1,354 @@
#!/usr/bin/env python3
"""
General Awareness Question Generator for SSC CGL.
Generates ~20,000+ questions from curated fact banks covering History, Geography,
Polity, Economics, Science, and Static GK.
"""
import random
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, get_db, insert_questions_batch
SUBJECT = "General Awareness"
# ============ FACT BANKS ============
ANCIENT_INDIA = [
("The Indus Valley Civilization was discovered in", "1921", ["1911", "1931", "1901"]),
("Harappa was discovered by", "Daya Ram Sahni", ["R.D. Banerji", "John Marshall", "Mortimer Wheeler"]),
("Mohenjo-daro was discovered by", "R.D. Banerji", ["Daya Ram Sahni", "John Marshall", "Alexander Cunningham"]),
("The Great Bath was found at", "Mohenjo-daro", ["Harappa", "Lothal", "Kalibangan"]),
("The founder of the Maurya dynasty was", "Chandragupta Maurya", ["Ashoka", "Bindusara", "Bimbisara"]),
("Ashoka embraced Buddhism after the battle of", "Kalinga", ["Hydaspes", "Plassey", "Panipat"]),
("The author of Arthashastra was", "Kautilya", ["Kalidasa", "Banabhatta", "Vishakhadatta"]),
("Who was the court poet of Chandragupta Vikramaditya?", "Kalidasa", ["Banabhatta", "Harisena", "Amarsimha"]),
("The capital of the Chola dynasty was", "Thanjavur", ["Madurai", "Kanchipuram", "Hampi"]),
("The Iron Pillar at Delhi was erected by", "Chandragupta II", ["Ashoka", "Kanishka", "Samudragupta"]),
("Nalanda University was founded by", "Kumaragupta I", ["Ashoka", "Harsha", "Chandragupta II"]),
("The Gupta period is known as the", "Golden Age of India", ["Silver Age", "Bronze Age", "Iron Age"]),
("Sangam literature belongs to", "Tamil Nadu", ["Kerala", "Karnataka", "Andhra Pradesh"]),
("The first Jain council was held at", "Pataliputra", ["Vaishali", "Rajgir", "Vallabhi"]),
("Gautama Buddha attained enlightenment at", "Bodh Gaya", ["Sarnath", "Kushinagar", "Lumbini"]),
("The Ajanta caves are famous for", "Paintings", ["Sculptures", "Architecture", "Inscriptions"]),
("Who built the Sanchi Stupa?", "Ashoka", ["Kanishka", "Harsha", "Chandragupta"]),
("The Vedic period is divided into how many parts?", "Two (Early and Later)", ["Three", "Four", "Five"]),
("The oldest Veda is", "Rigveda", ["Samaveda", "Yajurveda", "Atharvaveda"]),
("Alexander invaded India in", "326 BC", ["321 BC", "331 BC", "316 BC"]),
]
MEDIEVAL_INDIA = [
("The Delhi Sultanate was established in", "1206", ["1192", "1210", "1290"]),
("Who founded the Mughal Empire in India?", "Babur", ["Akbar", "Humayun", "Shah Jahan"]),
("The Battle of Panipat (1526) was fought between", "Babur and Ibrahim Lodi", ["Akbar and Hemu", "Babur and Rana Sanga", "Humayun and Sher Shah"]),
("Taj Mahal was built by", "Shah Jahan", ["Akbar", "Jahangir", "Aurangzeb"]),
("Akbar founded the religion", "Din-i-Ilahi", ["Islam", "Sikhism", "Jainism"]),
("The court language of the Mughals was", "Persian", ["Urdu", "Arabic", "Hindi"]),
("Who built the Red Fort in Delhi?", "Shah Jahan", ["Akbar", "Aurangzeb", "Babur"]),
("The last Mughal emperor was", "Bahadur Shah Zafar", ["Aurangzeb", "Shah Alam II", "Muhammad Shah"]),
("Sher Shah Suri introduced", "Rupee coin", ["Gold coin", "Copper coin", "Silver coin"]),
("Who built Qutub Minar?", "Qutb-ud-din Aibak", ["Iltutmish", "Alauddin Khilji", "Muhammad Tughlaq"]),
("The Bhakti movement was started by", "Ramanuja", ["Kabir", "Nanak", "Tulsidas"]),
("Vijayanagara Empire was founded by", "Harihara and Bukka", ["Krishna Deva Raya", "Rama Raya", "Sangama"]),
("The capital of Vijayanagara was", "Hampi", ["Thanjavur", "Madurai", "Warangal"]),
("Who introduced the Mansabdari system?", "Akbar", ["Babur", "Shah Jahan", "Aurangzeb"]),
("The famous poet Amir Khusrau was associated with", "Alauddin Khilji", ["Akbar", "Babur", "Iltutmish"]),
]
MODERN_INDIA = [
("Who is known as the Father of the Nation?", "Mahatma Gandhi", ["Jawaharlal Nehru", "Subhas Chandra Bose", "B.R. Ambedkar"]),
("The first war of Indian Independence was in", "1857", ["1847", "1867", "1877"]),
("Who gave the slogan 'Do or Die'?", "Mahatma Gandhi", ["Subhas Chandra Bose", "Bal Gangadhar Tilak", "Bhagat Singh"]),
("The Indian National Congress was founded in", "1885", ["1875", "1895", "1905"]),
("Who founded the Indian National Congress?", "A.O. Hume", ["Dadabhai Naoroji", "Surendranath Banerjee", "W.C. Bonnerjee"]),
("The Jallianwala Bagh massacre occurred in", "1919", ["1920", "1918", "1921"]),
("The Salt March took place in", "1930", ["1929", "1931", "1932"]),
("India gained independence on", "15 August 1947", ["26 January 1947", "15 August 1946", "26 January 1950"]),
("Who was the first President of India?", "Dr. Rajendra Prasad", ["Dr. S. Radhakrishnan", "Jawaharlal Nehru", "B.R. Ambedkar"]),
("Who was the first Prime Minister of India?", "Jawaharlal Nehru", ["Sardar Patel", "Dr. Rajendra Prasad", "Mahatma Gandhi"]),
("The Quit India Movement was launched in", "1942", ["1940", "1943", "1945"]),
("Who gave the slogan 'Jai Hind'?", "Subhas Chandra Bose", ["Mahatma Gandhi", "Jawaharlal Nehru", "Bhagat Singh"]),
("The Rowlatt Act was passed in", "1919", ["1918", "1920", "1921"]),
("The Non-Cooperation Movement was launched in", "1920", ["1919", "1921", "1922"]),
("Who was known as the Iron Man of India?", "Sardar Vallabhbhai Patel", ["Subhas Chandra Bose", "Bhagat Singh", "Lal Bahadur Shastri"]),
("The Partition of Bengal took place in", "1905", ["1903", "1907", "1909"]),
("Who founded the Arya Samaj?", "Swami Dayanand Saraswati", ["Swami Vivekananda", "Raja Ram Mohan Roy", "Ramakrishna"]),
("The Lucknow Pact was signed in", "1916", ["1915", "1917", "1918"]),
("Who wrote 'Vande Mataram'?", "Bankim Chandra Chattopadhyay", ["Rabindranath Tagore", "Kazi Nazrul Islam", "Sarojini Naidu"]),
("Who composed the National Anthem of India?", "Rabindranath Tagore", ["Bankim Chandra", "Sarojini Naidu", "Muhammad Iqbal"]),
]
GEOGRAPHY_FACTS = [
("The largest continent by area is", "Asia", ["Africa", "North America", "Europe"]),
("The longest river in the world is", "Nile", ["Amazon", "Yangtze", "Mississippi"]),
("The highest peak in the world is", "Mount Everest", ["K2", "Kangchenjunga", "Lhotse"]),
("The largest ocean is", "Pacific Ocean", ["Atlantic Ocean", "Indian Ocean", "Arctic Ocean"]),
("The largest desert in the world is", "Sahara", ["Arabian", "Gobi", "Kalahari"]),
("The longest river in India is", "Ganga", ["Godavari", "Krishna", "Brahmaputra"]),
("The highest peak in India is", "Kangchenjunga", ["Nanda Devi", "K2", "Annapurna"]),
("Which state in India has the longest coastline?", "Gujarat", ["Maharashtra", "Tamil Nadu", "Andhra Pradesh"]),
("The Tropic of Cancer passes through how many Indian states?", "8", ["6", "7", "9"]),
("The largest freshwater lake in India is", "Wular Lake", ["Dal Lake", "Chilika Lake", "Loktak Lake"]),
("Which Indian state is known as the 'Spice Garden of India'?", "Kerala", ["Karnataka", "Tamil Nadu", "Goa"]),
("The Western Ghats are also known as", "Sahyadri", ["Vindhya", "Aravalli", "Nilgiri"]),
("The northernmost point of India is", "Indira Col", ["Kanyakumari", "Rann of Kutch", "Indira Point"]),
("Which river is known as the 'Sorrow of Bengal'?", "Damodar", ["Hooghly", "Teesta", "Mahanadi"]),
("The largest state in India by area is", "Rajasthan", ["Madhya Pradesh", "Maharashtra", "Uttar Pradesh"]),
("The Thar Desert is located in", "Rajasthan", ["Gujarat", "Haryana", "Punjab"]),
("The capital of Arunachal Pradesh is", "Itanagar", ["Shillong", "Imphal", "Kohima"]),
("Chilika Lake is located in", "Odisha", ["Andhra Pradesh", "Tamil Nadu", "Kerala"]),
("The Deccan Plateau lies in", "Southern India", ["Northern India", "Eastern India", "Western India"]),
("The soil best suited for cotton cultivation is", "Black soil", ["Red soil", "Alluvial soil", "Laterite soil"]),
]
POLITY_FACTS = [
("The Constitution of India came into effect on", "26 January 1950", ["15 August 1947", "26 November 1949", "15 August 1950"]),
("How many Fundamental Rights are there?", "6", ["5", "7", "8"]),
("Right to Education was added by which amendment?", "86th Amendment", ["42nd Amendment", "44th Amendment", "91st Amendment"]),
("The Preamble declares India as a", "Sovereign Socialist Secular Democratic Republic", ["Federal Republic", "Parliamentary Republic", "Constitutional Monarchy"]),
("Who is known as the Father of Indian Constitution?", "Dr. B.R. Ambedkar", ["Jawaharlal Nehru", "Mahatma Gandhi", "Rajendra Prasad"]),
("The total number of members in Lok Sabha is", "545", ["250", "500", "552"]),
("Rajya Sabha members are elected for", "6 years", ["5 years", "4 years", "3 years"]),
("The minimum age to become President of India is", "35 years", ["25 years", "30 years", "40 years"]),
("The Chief Justice of India is appointed by", "The President", ["The Prime Minister", "The Parliament", "The Law Minister"]),
("Which Article deals with the Right to Equality?", "Article 14", ["Article 19", "Article 21", "Article 32"]),
("Article 21 deals with", "Right to Life and Personal Liberty", ["Right to Freedom", "Right to Equality", "Right against Exploitation"]),
("The Directive Principles are in which Part of the Constitution?", "Part IV", ["Part III", "Part V", "Part VI"]),
("How many schedules are there in the Indian Constitution?", "12", ["8", "10", "14"]),
("The Vice President is the ex-officio Chairman of", "Rajya Sabha", ["Lok Sabha", "Parliament", "NITI Aayog"]),
("Emergency provisions are in which Article?", "Article 352", ["Article 356", "Article 360", "Article 370"]),
("The 73rd Amendment is related to", "Panchayati Raj", ["Municipal Corporation", "Fundamental Rights", "DPSP"]),
("Who appoints the Governor of a state?", "The President", ["The Prime Minister", "The Chief Minister", "The Parliament"]),
("The Finance Commission is appointed every", "5 years", ["3 years", "4 years", "6 years"]),
("Judicial review is the power of", "Supreme Court", ["Parliament", "President", "Prime Minister"]),
("The CAG is appointed by", "The President", ["The Prime Minister", "The Parliament", "The Finance Minister"]),
]
ECONOMICS_FACTS = [
("The Reserve Bank of India was established in", "1935", ["1947", "1950", "1921"]),
("The current GST council is chaired by", "Union Finance Minister", ["Prime Minister", "RBI Governor", "Revenue Secretary"]),
("Which Five Year Plan focused on rapid industrialization?", "Second Five Year Plan", ["First Plan", "Third Plan", "Fourth Plan"]),
("NITI Aayog was established in", "2015", ["2014", "2016", "2017"]),
("NITI Aayog replaced", "Planning Commission", ["Finance Commission", "UGC", "UPSC"]),
("GDP stands for", "Gross Domestic Product", ["Grand Domestic Product", "General Domestic Product", "Gross Development Product"]),
("The fiscal year in India starts from", "April 1", ["January 1", "March 1", "July 1"]),
("WTO was established in", "1995", ["1947", "1991", "2000"]),
("IMF headquarters is in", "Washington D.C.", ["New York", "Geneva", "London"]),
("World Bank headquarters is in", "Washington D.C.", ["New York", "Geneva", "Paris"]),
("BRICS includes India, Brazil, Russia, China and", "South Africa", ["Sri Lanka", "Singapore", "Saudi Arabia"]),
("National income is calculated by", "Central Statistics Office", ["RBI", "NITI Aayog", "Finance Ministry"]),
("The first bank in India was", "Bank of Hindustan", ["State Bank of India", "Bank of Bombay", "RBI"]),
("SBI was formed from", "Imperial Bank of India", ["Bank of Bengal", "Bank of Bombay", "Bank of Madras"]),
("Repo rate is the rate at which", "RBI lends to commercial banks", ["Banks lend to public", "Government borrows", "FDI flows"]),
("CRR stands for", "Cash Reserve Ratio", ["Central Reserve Ratio", "Cash Recovery Rate", "Credit Reserve Ratio"]),
("The currency of Japan is", "Yen", ["Yuan", "Won", "Baht"]),
("Which organization gives the 'Ease of Doing Business' ranking?", "World Bank", ["IMF", "WTO", "UN"]),
("Make in India was launched in", "2014", ["2015", "2016", "2013"]),
("Digital India was launched in", "2015", ["2014", "2016", "2017"]),
]
SCIENCE_FACTS = [
("The SI unit of force is", "Newton", ["Joule", "Watt", "Pascal"]),
("The speed of light is approximately", "3 × 10⁸ m/s", ["3 × 10⁶ m/s", "3 × 10¹⁰ m/s", "3 × 10⁵ m/s"]),
("The chemical formula of water is", "H₂O", ["H₂O₂", "HO₂", "H₃O"]),
("Photosynthesis occurs in", "Chloroplast", ["Mitochondria", "Ribosome", "Nucleus"]),
("The powerhouse of the cell is", "Mitochondria", ["Nucleus", "Ribosome", "Chloroplast"]),
("The hardest natural substance is", "Diamond", ["Quartz", "Topaz", "Ruby"]),
("The chemical symbol for gold is", "Au", ["Ag", "Fe", "Go"]),
("Blood is purified in", "Kidneys", ["Liver", "Heart", "Lungs"]),
("The largest organ of the human body is", "Skin", ["Liver", "Brain", "Heart"]),
("The total number of bones in an adult human body is", "206", ["208", "204", "210"]),
("Vitamin C deficiency causes", "Scurvy", ["Rickets", "Beriberi", "Night blindness"]),
("Vitamin D deficiency causes", "Rickets", ["Scurvy", "Beriberi", "Pellagra"]),
("The gas responsible for global warming is", "Carbon dioxide", ["Oxygen", "Nitrogen", "Hydrogen"]),
("Sound travels fastest in", "Solids", ["Liquids", "Gases", "Vacuum"]),
("The pH of pure water is", "7", ["5", "8", "6"]),
("Newton's first law is also known as", "Law of Inertia", ["Law of Acceleration", "Law of Action-Reaction", "Law of Gravity"]),
("The center of an atom is called", "Nucleus", ["Electron", "Proton", "Neutron"]),
("Insulin is produced by", "Pancreas", ["Liver", "Kidney", "Thyroid"]),
("The boiling point of water is", "100°C", ["90°C", "110°C", "120°C"]),
("The chemical formula of common salt is", "NaCl", ["KCl", "NaOH", "HCl"]),
("The study of fungi is called", "Mycology", ["Zoology", "Botany", "Virology"]),
("The gas used in fire extinguishers is", "CO₂", ["O₂", "N₂", "H₂"]),
("The lightest gas is", "Hydrogen", ["Helium", "Oxygen", "Nitrogen"]),
("The element with atomic number 1 is", "Hydrogen", ["Helium", "Lithium", "Carbon"]),
("Malaria is caused by", "Plasmodium", ["Bacteria", "Virus", "Fungus"]),
("The human heart has how many chambers?", "4", ["2", "3", "5"]),
("The instrument used to measure atmospheric pressure is", "Barometer", ["Thermometer", "Hygrometer", "Anemometer"]),
("Who discovered Penicillin?", "Alexander Fleming", ["Louis Pasteur", "Edward Jenner", "Robert Koch"]),
("The study of earthquake is called", "Seismology", ["Geology", "Volcanology", "Meteorology"]),
("DNA stands for", "Deoxyribonucleic Acid", ["Deoxyribose Nucleic Acid", "Dinucleic Acid", "Deoxyribo Amino Acid"]),
]
COMPUTER_FACTS = [
("The full form of CPU is", "Central Processing Unit", ["Central Program Unit", "Computer Processing Unit", "Central Process Utility"]),
("RAM stands for", "Random Access Memory", ["Read Access Memory", "Random Alloc Memory", "Read All Memory"]),
("The father of computers is", "Charles Babbage", ["Alan Turing", "John von Neumann", "Tim Berners-Lee"]),
("HTML stands for", "HyperText Markup Language", ["HyperText Machine Language", "High Text Markup Language", "Hyper Transfer Markup Language"]),
("Which shortcut key is used to copy?", "Ctrl + C", ["Ctrl + V", "Ctrl + X", "Ctrl + Z"]),
("Which shortcut key is used to undo?", "Ctrl + Z", ["Ctrl + Y", "Ctrl + X", "Ctrl + C"]),
("The brain of the computer is", "CPU", ["RAM", "Hard Disk", "Monitor"]),
("1 KB equals", "1024 bytes", ["1000 bytes", "512 bytes", "2048 bytes"]),
("An IP address is used to", "Identify a device on a network", ["Store data", "Display web pages", "Send emails"]),
("HTTP stands for", "HyperText Transfer Protocol", ["High Text Transfer Protocol", "Hyper Transfer Text Protocol", "HyperText Transport Protocol"]),
("The extension of a Word document is", ".docx", [".xlsx", ".pptx", ".pdf"]),
("Which software is used for spreadsheets?", "MS Excel", ["MS Word", "MS PowerPoint", "MS Access"]),
("A computer virus is a", "Malicious software program", ["Hardware defect", "Network issue", "Browser plugin"]),
("Wi-Fi stands for", "Wireless Fidelity", ["Wireless Finder", "Wide Fidelity", "Wired Fidelity"]),
("The first search engine on the internet was", "Archie", ["Google", "Yahoo", "Bing"]),
("URL stands for", "Uniform Resource Locator", ["Universal Resource Link", "Uniform Retrieval Locator", "Universal Resource Locator"]),
("LAN stands for", "Local Area Network", ["Large Area Network", "Long Access Network", "Local Access Network"]),
("Which key is used to refresh a web page?", "F5", ["F1", "F2", "F12"]),
("The default file extension for Excel is", ".xlsx", [".docx", ".pptx", ".csv"]),
("Phishing is a type of", "Cyber fraud", ["Computer virus", "Software update", "Network protocol"]),
]
STATIC_GK = [
("The national bird of India is", "Peacock", ["Sparrow", "Parrot", "Eagle"]),
("The national animal of India is", "Tiger", ["Lion", "Elephant", "Leopard"]),
("The national flower of India is", "Lotus", ["Rose", "Jasmine", "Sunflower"]),
("The national game of India is", "Hockey", ["Cricket", "Football", "Badminton"]),
("The national fruit of India is", "Mango", ["Apple", "Banana", "Guava"]),
("The national river of India is", "Ganga", ["Yamuna", "Godavari", "Brahmaputra"]),
("India's national currency is", "Indian Rupee", ["Dollar", "Pound", "Euro"]),
("World Environment Day is celebrated on", "June 5", ["March 22", "April 22", "October 16"]),
("International Women's Day is observed on", "March 8", ["February 14", "May 1", "June 21"]),
("World Health Day is celebrated on", "April 7", ["March 7", "May 7", "June 7"]),
("Teachers' Day in India is celebrated on", "September 5", ["November 14", "October 2", "January 26"]),
("Children's Day in India is celebrated on", "November 14", ["September 5", "October 2", "January 26"]),
("Republic Day is celebrated on", "January 26", ["August 15", "October 2", "November 14"]),
("Who wrote the book 'Wings of Fire'?", "A.P.J. Abdul Kalam", ["Jawaharlal Nehru", "Mahatma Gandhi", "R.K. Narayan"]),
("Who wrote 'Discovery of India'?", "Jawaharlal Nehru", ["Mahatma Gandhi", "Rabindranath Tagore", "S. Radhakrishnan"]),
("The Nobel Prize for Literature was won by Rabindranath Tagore in", "1913", ["1910", "1920", "1930"]),
("The Olympic Games are held every", "4 years", ["2 years", "3 years", "5 years"]),
("The headquarters of UN is in", "New York", ["Geneva", "London", "Washington D.C."]),
("Who was the first Indian woman to win an Olympic medal?", "Karnam Malleswari", ["P.T. Usha", "Saina Nehwal", "Mary Kom"]),
("The Booker Prize is associated with", "Literature", ["Science", "Peace", "Economics"]),
("The Grammy Award is associated with", "Music", ["Films", "Literature", "Sports"]),
("The first Indian satellite was", "Aryabhata", ["Bhaskara", "INSAT-1A", "Rohini"]),
("ISRO headquarters is in", "Bengaluru", ["Chennai", "Hyderabad", "New Delhi"]),
("Who was the first Indian in space?", "Rakesh Sharma", ["Kalpana Chawla", "Sunita Williams", "Ravish Malhotra"]),
("The Bharat Ratna is the highest", "Civilian award", ["Military award", "Sports award", "Literary award"]),
]
GOVT_SCHEMES = [
("Swachh Bharat Mission was launched in", "2014", ["2015", "2016", "2013"]),
("Pradhan Mantri Jan Dhan Yojana provides", "Bank accounts for all", ["Free healthcare", "Education scholarship", "Housing"]),
("Ayushman Bharat scheme provides", "Health insurance coverage", ["Bank accounts", "Education", "Housing"]),
("PM Kisan scheme provides", "₹6000 per year to farmers", ["Free seeds", "Loan waiver", "Insurance"]),
("Beti Bachao Beti Padhao focuses on", "Girl child welfare and education", ["Boy child education", "Senior citizens", "Farmers"]),
("Start-up India was launched in", "2016", ["2015", "2017", "2014"]),
("Skill India Mission aims at", "Training youth in skills", ["Providing jobs", "Building schools", "Health camps"]),
("MUDRA scheme provides", "Loans for small businesses", ["Education loans", "Home loans", "Car loans"]),
("Ujjwala Yojana provides", "Free LPG connections", ["Free electricity", "Free water", "Free internet"]),
("Atal Pension Yojana is for", "Retirement pension for unorganized sector", ["Health insurance", "Education", "Housing"]),
]
SPORTS_FACTS = [
("The Cricket World Cup 2023 was held in", "India", ["England", "Australia", "South Africa"]),
("The FIFA World Cup 2022 was held in", "Qatar", ["Russia", "Brazil", "Japan"]),
("The Ranji Trophy is associated with", "Cricket", ["Football", "Hockey", "Tennis"]),
("The Davis Cup is associated with", "Tennis", ["Cricket", "Football", "Badminton"]),
("The Thomas Cup is associated with", "Badminton", ["Tennis", "Table Tennis", "Cricket"]),
("The Durand Cup is the oldest football tournament in", "Asia", ["Europe", "Africa", "South America"]),
("Wimbledon is played on", "Grass court", ["Clay court", "Hard court", "Carpet court"]),
("A marathon race covers a distance of", "42.195 km", ["40 km", "45 km", "50 km"]),
("The term 'Grand Slam' is used in", "Tennis", ["Cricket", "Football", "Hockey"]),
("How many players are there in a cricket team?", "11", ["9", "13", "15"]),
("How many players are there in a football team?", "11", ["9", "13", "15"]),
("The term 'Checkmate' is used in", "Chess", ["Cricket", "Football", "Hockey"]),
("The Summer Olympics 2024 was held in", "Paris", ["Tokyo", "Los Angeles", "London"]),
("Who holds the record for most centuries in international cricket?", "Sachin Tendulkar", ["Virat Kohli", "Ricky Ponting", "Kumar Sangakkara"]),
("The Dronacharya Award is given to", "Sports coaches", ["Players", "Scientists", "Teachers"]),
]
def _generate_from_facts(conn, facts, subtopic, topic, qtype_name, multiplier=3):
"""Generate questions from a fact bank with variations."""
questions = []
qtid = get_qtid(conn, SUBJECT, subtopic, topic, qtype_name)
if not qtid:
return questions
for _ in range(len(facts) * multiplier):
q_text, correct, wrongs = random.choice(facts)
difficulty = random.choice([1, 1, 2])
questions.append(make_question(qtid, q_text, correct, wrongs, f"Answer: {correct}", difficulty))
return questions
def gen_history(conn, count_per=600):
q = []
q.extend(_generate_from_facts(conn, ANCIENT_INDIA, "History", "Ancient India", "Who/What/When", 50))
q.extend(_generate_from_facts(conn, MEDIEVAL_INDIA, "History", "Medieval India", "Who/What/When", 50))
q.extend(_generate_from_facts(conn, MODERN_INDIA, "History", "Modern India", "Freedom fighter identification", 50))
return q
def gen_geography(conn, count=2000):
return _generate_from_facts(conn, GEOGRAPHY_FACTS, "Geography", "Indian Geography", "River system", 130)
def gen_polity(conn, count=2000):
return _generate_from_facts(conn, POLITY_FACTS, "Indian Polity", "Indian Constitution", "Article identification", 130)
def gen_economics(conn, count=2000):
return _generate_from_facts(conn, ECONOMICS_FACTS, "Economics", "Indian Economy Basics", "Economic indicator", 130)
def gen_science(conn, count=3000):
return _generate_from_facts(conn, SCIENCE_FACTS, "Science", "Physics", "Law/principle identification", 135)
def gen_computer(conn, count=2000):
return _generate_from_facts(conn, COMPUTER_FACTS, "Science", "Computer Awareness", "Term definition", 100)
def gen_static_gk(conn, count=2000):
return _generate_from_facts(conn, STATIC_GK, "Static GK", "National Symbols and Firsts", "Identify symbol", 80)
def gen_schemes(conn, count=1000):
return _generate_from_facts(conn, GOVT_SCHEMES, "Economics", "Government Schemes", "Scheme objective", 100)
def gen_sports(conn, count=1000):
return _generate_from_facts(conn, SPORTS_FACTS, "Static GK", "Sports", "Trophy-sport", 70)
def generate_all(conn):
"""Generate all General Awareness questions."""
generators = [
("History", gen_history),
("Geography", gen_geography),
("Polity", gen_polity),
("Economics", gen_economics),
("Science", gen_science),
("Computer Awareness", gen_computer),
("Static GK", gen_static_gk),
("Govt Schemes", gen_schemes),
("Sports", gen_sports),
]
total = 0
all_questions = []
for name, gen_func in generators:
questions = gen_func(conn)
all_questions.extend(questions)
print(f" {name}: {len(questions)} questions")
total += len(questions)
batch_size = 5000
for i in range(0, len(all_questions), batch_size):
insert_questions_batch(conn, all_questions[i:i+batch_size])
print(f" TOTAL General Awareness: {total}")
return total
if __name__ == '__main__':
conn = get_db()
print("Generating General Awareness questions...")
generate_all(conn)
conn.close()

View File

@ -0,0 +1,721 @@
#!/usr/bin/env python3
"""
Quantitative Aptitude Question Generator for SSC CGL.
Generates ~30,000 template-based math questions across all topics.
"""
import random
import math
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, nearby_wrong, nearby_wrong_float, get_db, insert_questions_batch
SUBJECT = "Quantitative Aptitude"
def gen_number_system(conn, count=800):
questions = []
# Unit digit questions
qtid = get_qtid(conn, SUBJECT, "Arithmetic", "Number System", "Unit digit")
if qtid:
for _ in range(count // 4):
base = random.randint(2, 99)
exp = random.randint(10, 200)
unit = pow(base, exp, 10)
q = make_question(qtid,
f"What is the unit digit of {base}^{exp}?",
str(unit), nearby_wrong(unit, 4),
f"Unit digit of {base}^{exp} = {unit}", random.choice([1,1,2]))
questions.append(q)
# Divisibility
qtid = get_qtid(conn, SUBJECT, "Arithmetic", "Number System", "Divisibility test")
if qtid:
divs = [2, 3, 4, 5, 6, 7, 8, 9, 11]
for _ in range(count // 4):
d = random.choice(divs)
num = random.randint(100, 9999) * d
q = make_question(qtid,
f"Which of the following numbers is divisible by {d}?",
str(num), [str(num + random.randint(1, d-1)) for _ in range(3)],
f"{num} ÷ {d} = {num//d}", 1)
questions.append(q)
# Remainder
qtid = get_qtid(conn, SUBJECT, "Arithmetic", "Number System", "Remainder problems")
if qtid:
for _ in range(count // 4):
divisor = random.randint(3, 20)
quotient = random.randint(10, 200)
remainder = random.randint(0, divisor - 1)
num = divisor * quotient + remainder
q = make_question(qtid,
f"What is the remainder when {num} is divided by {divisor}?",
str(remainder), nearby_wrong(remainder, max(3, divisor)),
f"{num} = {divisor} × {quotient} + {remainder}", 1)
questions.append(q)
# Find the value
qtid = get_qtid(conn, SUBJECT, "Arithmetic", "Number System", "Find the value")
if qtid:
for _ in range(count // 4):
a, b = random.randint(10, 99), random.randint(10, 99)
ops = [(f"{a} × {b}", a*b), (f"{a}² - {b}²", a*a - b*b),
(f"{a}² + {b}²", a*a + b*b)]
expr, ans = random.choice(ops)
q = make_question(qtid, f"Find the value of {expr}.",
str(ans), nearby_wrong(ans),
f"{expr} = {ans}", random.choice([1,2]))
questions.append(q)
return questions
def gen_hcf_lcm(conn, count=500):
questions = []
qtid_hcf = get_qtid(conn, SUBJECT, "Arithmetic", "HCF and LCM", "Find HCF")
qtid_lcm = get_qtid(conn, SUBJECT, "Arithmetic", "HCF and LCM", "Find LCM")
qtid_word = get_qtid(conn, SUBJECT, "Arithmetic", "HCF and LCM", "Word problems on HCF/LCM")
for _ in range(count // 3):
a, b = random.randint(10, 200), random.randint(10, 200)
h = math.gcd(a, b)
l = (a * b) // h
if qtid_hcf:
questions.append(make_question(qtid_hcf,
f"Find the HCF of {a} and {b}.", str(h), nearby_wrong(h),
f"HCF({a}, {b}) = {h}", 1))
if qtid_lcm:
questions.append(make_question(qtid_lcm,
f"Find the LCM of {a} and {b}.", str(l), nearby_wrong(l),
f"LCM({a}, {b}) = {l}", 1))
if qtid_word:
for _ in range(count // 3):
h = random.randint(5, 30)
m1, m2 = random.randint(2, 6), random.randint(2, 6)
a, b = h * m1, h * m2
questions.append(make_question(qtid_word,
f"Two ropes of lengths {a} cm and {b} cm are to be cut into pieces of equal length. What is the maximum length of each piece?",
str(h) + " cm", [str(h+i) + " cm" for i in [1, 2, -1]],
f"HCF({a}, {b}) = {h} cm", 1))
return questions
def gen_simplification(conn, count=600):
questions = []
qtid = get_qtid(conn, SUBJECT, "Arithmetic", "Simplification", "Simplify expression")
if not qtid:
return questions
for _ in range(count):
a, b, c = random.randint(2, 50), random.randint(2, 50), random.randint(1, 30)
templates = [
(f"{a} + {b} × {c}", a + b * c),
(f"{a} × {b} - {c}", a * b - c),
(f"({a} + {b}) × {c}", (a + b) * c),
(f"{a}² - {b} × {c}", a*a - b * c),
(f"{a} + {b}² - {c}", a + b*b - c),
]
expr, ans = random.choice(templates)
questions.append(make_question(qtid, f"Simplify: {expr}",
str(ans), nearby_wrong(ans), f"{expr} = {ans}", random.choice([1,1,2])))
return questions
def gen_percentage(conn, count=2000):
questions = []
# Basic percentage
qtid = get_qtid(conn, SUBJECT, "Percentage", "Basic Percentage", "Find X% of Y")
if qtid:
for _ in range(count // 4):
pct = random.choice([5, 10, 12, 15, 16, 20, 25, 30, 33, 40, 50, 60, 75])
val = random.randint(50, 5000)
ans = val * pct / 100
if ans == int(ans):
ans = int(ans)
questions.append(make_question(qtid,
f"What is {pct}% of {val}?", str(ans), nearby_wrong(ans),
f"{pct}% of {val} = {val} × {pct}/100 = {ans}", 1))
qtid = get_qtid(conn, SUBJECT, "Percentage", "Basic Percentage", "What percent is X of Y")
if qtid:
for _ in range(count // 4):
total = random.randint(50, 1000)
part = random.randint(1, total)
pct = round(part * 100 / total, 2)
questions.append(make_question(qtid,
f"What percentage is {part} of {total}?", str(pct) + "%",
[str(round(pct + random.uniform(-10, 10), 2)) + "%" for _ in range(3)],
f"{part}/{total} × 100 = {pct}%", 1))
qtid = get_qtid(conn, SUBJECT, "Percentage", "Basic Percentage", "Percentage change")
if qtid:
for _ in range(count // 4):
old = random.randint(100, 5000)
change = random.randint(5, 50)
direction = random.choice(["increased", "decreased"])
new_val = old + old * change // 100 if direction == "increased" else old - old * change // 100
questions.append(make_question(qtid,
f"If a value {direction} from {old} to {new_val}, what is the percentage change?",
str(change) + "%", [str(change + i) + "%" for i in [2, -3, 5]],
f"Change = {abs(new_val-old)}/{old} × 100 = {change}%", 1))
# Successive percentage
qtid = get_qtid(conn, SUBJECT, "Percentage", "Successive Percentage", "Net percentage change")
if qtid:
for _ in range(count // 4):
p1, p2 = random.randint(5, 40), random.randint(5, 40)
net = round(p1 + p2 + p1 * p2 / 100, 2)
questions.append(make_question(qtid,
f"If a price increases by {p1}% and then by {p2}%, what is the net percentage increase?",
str(net) + "%", nearby_wrong_float(net),
f"Net = {p1} + {p2} + ({p1}×{p2})/100 = {net}%", 2))
return questions
def gen_profit_loss(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Profit and Loss", "Basic Profit and Loss", "Find profit/loss percentage")
if qtid:
for _ in range(count // 3):
cp = random.randint(100, 5000)
margin = random.randint(5, 50)
is_profit = random.choice([True, False])
sp = cp + cp * margin // 100 if is_profit else cp - cp * margin // 100
word = "profit" if is_profit else "loss"
questions.append(make_question(qtid,
f"An article bought for ₹{cp} is sold for ₹{sp}. Find the {word} percentage.",
str(margin) + "%", [str(margin + i) + "%" for i in [2, -3, 5]],
f"{word.title()} = {abs(sp-cp)}/{cp} × 100 = {margin}%", 1))
qtid = get_qtid(conn, SUBJECT, "Profit and Loss", "Discount and Marked Price", "Find selling price after discount")
if qtid:
for _ in range(count // 3):
mp = random.randint(200, 10000)
d = random.choice([5, 10, 15, 20, 25, 30, 40, 50])
sp = mp - mp * d // 100
questions.append(make_question(qtid,
f"The marked price of an article is ₹{mp}. If a discount of {d}% is given, find the selling price.",
f"{sp}", [f"{sp + i*10}" for i in [1, -2, 3]],
f"SP = {mp} - {d}% of {mp} = ₹{sp}", 1))
qtid = get_qtid(conn, SUBJECT, "Profit and Loss", "Discount and Marked Price", "Find single equivalent discount")
if qtid:
for _ in range(count // 3):
d1, d2 = random.choice([10,15,20,25,30]), random.choice([5,10,15,20])
eq = round(d1 + d2 - d1 * d2 / 100, 2)
questions.append(make_question(qtid,
f"Find the single equivalent discount for successive discounts of {d1}% and {d2}%.",
str(eq) + "%", nearby_wrong_float(eq),
f"Equivalent = {d1} + {d2} - ({d1}×{d2})/100 = {eq}%", 2))
return questions
def gen_ratio(conn, count=1800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Ratio and Proportion", "Basic Ratio", "Divide in given ratio")
if qtid:
for _ in range(count // 4):
a, b = random.randint(1, 10), random.randint(1, 10)
total = (a + b) * random.randint(10, 100)
part_a, part_b = total * a // (a + b), total * b // (a + b)
questions.append(make_question(qtid,
f"Divide ₹{total} in the ratio {a}:{b}. Find the larger share.",
f"{max(part_a, part_b)}", [f"{max(part_a,part_b) + i*10}" for i in [1,-2,3]],
f"Larger share = {total} × {max(a,b)}/{a+b} = ₹{max(part_a,part_b)}", 1))
qtid = get_qtid(conn, SUBJECT, "Ratio and Proportion", "Mixture and Alligation", "Mean price of mixture")
if qtid:
for _ in range(count // 4):
p1, p2 = random.randint(20, 60), random.randint(60, 120)
q1, q2 = random.randint(1, 10), random.randint(1, 10)
mean = round((p1*q1 + p2*q2) / (q1+q2), 2)
questions.append(make_question(qtid,
f"If {q1} kg of rice at ₹{p1}/kg is mixed with {q2} kg of rice at ₹{p2}/kg, find the price of the mixture per kg.",
f"{mean}", [f"{round(mean+i,2)}" for i in [2,-3,5]],
f"Mean = ({p1}×{q1} + {p2}×{q2})/{q1+q2} = ₹{mean}", 2))
qtid = get_qtid(conn, SUBJECT, "Ratio and Proportion", "Partnership", "Divide profit among partners")
if qtid:
for _ in range(count // 4):
inv_a = random.randint(1000, 10000)
inv_b = random.randint(1000, 10000)
profit = random.randint(5000, 50000)
share_a = round(profit * inv_a / (inv_a + inv_b))
questions.append(make_question(qtid,
f"A invests ₹{inv_a} and B invests ₹{inv_b}. Total profit is ₹{profit}. Find A's share.",
f"{share_a}", [f"{share_a + i*100}" for i in [1,-2,3]],
f"A's share = {profit} × {inv_a}/({inv_a}+{inv_b}) = ₹{share_a}", 2))
qtid = get_qtid(conn, SUBJECT, "Ratio and Proportion", "Proportion", "Find fourth proportional")
if qtid:
for _ in range(count // 4):
a, b, c = random.randint(2, 20), random.randint(2, 20), random.randint(2, 20)
d = b * c // a if a != 0 else 1
if a * d == b * c:
questions.append(make_question(qtid,
f"Find the fourth proportional to {a}, {b}, and {c}.",
str(d), nearby_wrong(d),
f"a:b = c:d → d = b×c/a = {b}×{c}/{a} = {d}", 1))
return questions
def gen_average(conn, count=1200):
questions = []
qtid = get_qtid(conn, SUBJECT, "Average", "Simple Average", "Find average")
if qtid:
for _ in range(count // 3):
n = random.randint(3, 8)
nums = [random.randint(10, 100) for _ in range(n)]
avg = round(sum(nums) / n, 2)
nums_str = ", ".join(map(str, nums))
questions.append(make_question(qtid,
f"Find the average of {nums_str}.",
str(avg), nearby_wrong_float(avg),
f"Average = {sum(nums)}/{n} = {avg}", 1))
qtid = get_qtid(conn, SUBJECT, "Average", "Simple Average", "Average after adding/removing")
if qtid:
for _ in range(count // 3):
n = random.randint(5, 15)
avg = random.randint(20, 80)
new_val = random.randint(avg + 5, avg + 50)
new_avg = round((avg * n + new_val) / (n + 1), 2)
questions.append(make_question(qtid,
f"The average of {n} numbers is {avg}. When a new number {new_val} is added, what is the new average?",
str(new_avg), nearby_wrong_float(new_avg),
f"New avg = ({avg}×{n} + {new_val})/{n+1} = {new_avg}", 2))
qtid = get_qtid(conn, SUBJECT, "Average", "Age Problems", "Average age of family")
if qtid:
for _ in range(count // 3):
n = random.randint(3, 6)
avg_age = random.randint(20, 40)
baby_age = random.randint(1, 5)
new_avg = round((avg_age * n + baby_age) / (n + 1), 2)
questions.append(make_question(qtid,
f"The average age of {n} members of a family is {avg_age} years. A baby of {baby_age} year(s) is born. Find the new average age.",
str(new_avg) + " years", [str(round(new_avg+i,2)) + " years" for i in [1,-2,3]],
f"New avg = ({avg_age}×{n} + {baby_age})/{n+1} = {new_avg} years", 1))
return questions
def gen_time_work(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Time and Work", "Basic Time and Work", "Find combined time")
if qtid:
for _ in range(count // 3):
a, b = random.randint(5, 30), random.randint(5, 30)
combined = round(a * b / (a + b), 2)
questions.append(make_question(qtid,
f"A can do a work in {a} days and B can do it in {b} days. In how many days can they do it together?",
str(combined) + " days", [str(round(combined+i,2)) + " days" for i in [1,-2,3]],
f"Together = {a}×{b}/({a}+{b}) = {combined} days", 2))
qtid = get_qtid(conn, SUBJECT, "Time and Work", "Pipes and Cisterns", "Time to fill tank")
if qtid:
for _ in range(count // 3):
a, b = random.randint(5, 30), random.randint(5, 30)
combined = round(a * b / (a + b), 2)
questions.append(make_question(qtid,
f"Two pipes can fill a tank in {a} hours and {b} hours respectively. How long to fill the tank if both are opened?",
str(combined) + " hours", [str(round(combined+i,2)) + " hours" for i in [1,-2,3]],
f"Together = {a}×{b}/({a}+{b}) = {combined} hours", 2))
qtid = get_qtid(conn, SUBJECT, "Time and Work", "Work and Wages", "Divide wages")
if qtid:
for _ in range(count // 3):
da, db = random.randint(5, 20), random.randint(5, 20)
wage = random.randint(1000, 10000)
eff_a, eff_b = 1/da, 1/db
share_a = round(wage * eff_a / (eff_a + eff_b))
questions.append(make_question(qtid,
f"A can do a work in {da} days and B in {db} days. For a total wage of ₹{wage}, find A's share.",
f"{share_a}", [f"{share_a + i*50}" for i in [1,-2,3]],
f"A's share = ₹{wage} × (1/{da}) / (1/{da} + 1/{db}) = ₹{share_a}", 2))
return questions
def gen_speed_distance(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Time, Speed and Distance", "Basic Speed and Distance", "Average speed")
if qtid:
for _ in range(count // 4):
s1, s2 = random.randint(20, 80), random.randint(20, 80)
avg = round(2 * s1 * s2 / (s1 + s2), 2)
questions.append(make_question(qtid,
f"A person goes from A to B at {s1} km/h and returns at {s2} km/h. Find the average speed.",
str(avg) + " km/h", [str(round(avg+i,2)) + " km/h" for i in [2,-3,5]],
f"Avg speed = 2×{s1}×{s2}/({s1}+{s2}) = {avg} km/h", 2))
qtid = get_qtid(conn, SUBJECT, "Time, Speed and Distance", "Trains", "Train passing pole")
if qtid:
for _ in range(count // 4):
length = random.randint(100, 500)
speed_kmh = random.randint(36, 144)
speed_ms = round(speed_kmh * 5 / 18, 2)
time = round(length / speed_ms, 2)
questions.append(make_question(qtid,
f"A train {length}m long passes a pole in {time} seconds. Find its speed in km/h.",
str(speed_kmh) + " km/h", [str(speed_kmh + i) + " km/h" for i in [4,-6,9]],
f"Speed = {length}/{time} m/s = {speed_kmh} km/h", 2))
qtid = get_qtid(conn, SUBJECT, "Time, Speed and Distance", "Boats and Streams", "Find speed in still water")
if qtid:
for _ in range(count // 4):
boat = random.randint(10, 30)
stream = random.randint(2, 8)
ds = boat + stream
us = boat - stream
questions.append(make_question(qtid,
f"A boat goes {ds} km/h downstream and {us} km/h upstream. Find speed in still water.",
str(boat) + " km/h", [str(boat + i) + " km/h" for i in [1, -2, 3]],
f"Speed = ({ds}+{us})/2 = {boat} km/h", 1))
qtid = get_qtid(conn, SUBJECT, "Time, Speed and Distance", "Basic Speed and Distance", "Find distance")
if qtid:
for _ in range(count // 4):
speed = random.randint(20, 100)
time = random.randint(1, 10)
dist = speed * time
questions.append(make_question(qtid,
f"A car travels at {speed} km/h for {time} hours. Find the distance covered.",
str(dist) + " km", nearby_wrong(dist),
f"Distance = {speed} × {time} = {dist} km", 1))
return questions
def gen_interest(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Interest", "Simple Interest", "Find SI")
if qtid:
for _ in range(count // 3):
p = random.choice([1000, 2000, 5000, 8000, 10000, 15000, 20000])
r = random.choice([4, 5, 6, 8, 10, 12, 15])
t = random.randint(1, 5)
si = p * r * t // 100
questions.append(make_question(qtid,
f"Find the simple interest on ₹{p} at {r}% per annum for {t} years.",
f"{si}", [f"{si + i*50}" for i in [1,-2,3]],
f"SI = {p}×{r}×{t}/100 = ₹{si}", 1))
qtid = get_qtid(conn, SUBJECT, "Interest", "Compound Interest", "Find CI")
if qtid:
for _ in range(count // 3):
p = random.choice([1000, 2000, 5000, 10000])
r = random.choice([5, 10, 15, 20])
t = random.choice([1, 2, 3])
amt = round(p * (1 + r/100)**t, 2)
ci = round(amt - p, 2)
questions.append(make_question(qtid,
f"Find the compound interest on ₹{p} at {r}% for {t} year(s).",
f"{ci}", [f"{round(ci+i*20,2)}" for i in [1,-2,3]],
f"CI = {p}(1+{r}/100)^{t} - {p} = ₹{ci}", 2))
qtid = get_qtid(conn, SUBJECT, "Interest", "Compound Interest", "Difference between CI and SI")
if qtid:
for _ in range(count // 3):
p = random.choice([1000, 2000, 5000, 10000])
r = random.choice([5, 10, 15, 20])
si = p * r * 2 // 100
ci = round(p * (1 + r/100)**2 - p, 2)
diff = round(ci - si, 2)
questions.append(make_question(qtid,
f"Find the difference between CI and SI on ₹{p} at {r}% for 2 years.",
f"{diff}", [f"{round(diff+i*5,2)}" for i in [1,-2,3]],
f"Diff = P×(r/100)² = {p}×({r}/100)² = ₹{diff}", 2))
return questions
def gen_algebra(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Algebra", "Linear Equations", "Solve for x")
if qtid:
for _ in range(count // 4):
a = random.randint(2, 15)
x = random.randint(1, 30)
b = random.randint(1, 50)
c = a * x + b
questions.append(make_question(qtid,
f"Solve: {a}x + {b} = {c}",
f"x = {x}", [f"x = {x+i}" for i in [1,-2,3]],
f"{a}x = {c} - {b} = {c-b}, x = {x}", 1))
qtid = get_qtid(conn, SUBJECT, "Algebra", "Algebraic Identities", "Find value of expression")
if qtid:
for _ in range(count // 4):
a, b = random.randint(2, 20), random.randint(1, 15)
val = a*a + b*b + 2*a*b # (a+b)²
questions.append(make_question(qtid,
f"If a = {a} and b = {b}, find a² + b² + 2ab.",
str(val), nearby_wrong(val),
f"a² + b² + 2ab = (a+b)² = ({a}+{b})² = {val}", 1))
qtid = get_qtid(conn, SUBJECT, "Algebra", "Surds and Indices", "Find value")
if qtid:
for _ in range(count // 4):
base = random.randint(2, 10)
exp = random.randint(2, 5)
val = base ** exp
questions.append(make_question(qtid,
f"Find the value of {base}^{exp}.",
str(val), nearby_wrong(val),
f"{base}^{exp} = {val}", 1))
qtid = get_qtid(conn, SUBJECT, "Algebra", "Quadratic Equations", "Sum and product of roots")
if qtid:
for _ in range(count // 4):
r1, r2 = random.randint(-10, 10), random.randint(-10, 10)
a = 1
b = -(r1 + r2)
c = r1 * r2
s = r1 + r2
questions.append(make_question(qtid,
f"Find the sum of roots of x² + ({b})x + ({c}) = 0.",
str(s), nearby_wrong(s),
f"Sum = -b/a = {s}", 1))
return questions
def gen_geometry(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Geometry", "Triangles", "Find angle in triangle")
if qtid:
for _ in range(count // 4):
a = random.randint(20, 80)
b = random.randint(20, 160 - a)
c = 180 - a - b
questions.append(make_question(qtid,
f"In a triangle, two angles are {a}° and {b}°. Find the third angle.",
f"{c}°", [f"{c+i}°" for i in [5,-10,15]],
f"Third angle = 180 - {a} - {b} = {c}°", 1))
qtid = get_qtid(conn, SUBJECT, "Geometry", "Circles", "Tangent properties")
if qtid:
for _ in range(count // 4):
r = random.randint(3, 20)
d = random.randint(r + 5, r + 30)
tangent = round(math.sqrt(d*d - r*r), 2)
questions.append(make_question(qtid,
f"The radius of a circle is {r} cm and a point is {d} cm from the center. Find the length of the tangent.",
str(tangent) + " cm", [str(round(tangent+i,2)) + " cm" for i in [1,-2,3]],
f"Tangent = √({d}²-{r}²) = {tangent} cm", 2))
qtid = get_qtid(conn, SUBJECT, "Geometry", "Coordinate Geometry", "Find distance between points")
if qtid:
for _ in range(count // 4):
x1, y1 = random.randint(-10, 10), random.randint(-10, 10)
x2, y2 = random.randint(-10, 10), random.randint(-10, 10)
dist = round(math.sqrt((x2-x1)**2 + (y2-y1)**2), 2)
questions.append(make_question(qtid,
f"Find the distance between ({x1}, {y1}) and ({x2}, {y2}).",
str(dist), nearby_wrong_float(dist),
f"Distance = √[({x2}-{x1})² + ({y2}-{y1})²] = {dist}", 2))
qtid = get_qtid(conn, SUBJECT, "Geometry", "Lines and Angles", "Find angle value")
if qtid:
for _ in range(count // 4):
angle = random.randint(10, 170)
supp = 180 - angle
questions.append(make_question(qtid,
f"Find the supplement of {angle}°.",
f"{supp}°", [f"{supp+i}°" for i in [5,-10,15]],
f"Supplement = 180 - {angle} = {supp}°", 1))
return questions
def gen_mensuration(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mensuration", "2D Figures", "Find area")
if qtid:
for _ in range(count // 4):
l, w = random.randint(5, 50), random.randint(5, 50)
area = l * w
questions.append(make_question(qtid,
f"Find the area of a rectangle with length {l} cm and width {w} cm.",
f"{area} cm²", [f"{area+i*5} cm²" for i in [1,-2,3]],
f"Area = {l} × {w} = {area} cm²", 1))
for _ in range(count // 4):
r = random.randint(3, 25)
area = round(math.pi * r * r, 2)
questions.append(make_question(qtid,
f"Find the area of a circle with radius {r} cm. (Use π = 3.14)",
str(round(3.14 * r * r, 2)) + " cm²",
[str(round(3.14 * r * r + i * 5, 2)) + " cm²" for i in [1, -2, 3]],
f"Area = πr² = 3.14 × {r}² = {round(3.14*r*r,2)} cm²", 1))
qtid = get_qtid(conn, SUBJECT, "Mensuration", "3D Figures", "Find volume")
if qtid:
for _ in range(count // 4):
l, w, h = random.randint(3, 20), random.randint(3, 20), random.randint(3, 20)
vol = l * w * h
questions.append(make_question(qtid,
f"Find the volume of a cuboid with dimensions {l}×{w}×{h} cm.",
f"{vol} cm³", [f"{vol+i*10} cm³" for i in [1,-2,3]],
f"Volume = {l}×{w}×{h} = {vol} cm³", 1))
qtid = get_qtid(conn, SUBJECT, "Mensuration", "3D Figures", "Find surface area")
if qtid:
for _ in range(count // 4):
a = random.randint(3, 20)
sa = 6 * a * a
questions.append(make_question(qtid,
f"Find the total surface area of a cube with side {a} cm.",
f"{sa} cm²", [f"{sa+i*6} cm²" for i in [1,-2,3]],
f"TSA = 6a² = 6×{a}² = {sa} cm²", 1))
return questions
def gen_trigonometry(conn, count=1500):
questions = []
trig_vals = {0: {'sin': 0, 'cos': 1, 'tan': 0},
30: {'sin': 0.5, 'cos': 0.866, 'tan': 0.577},
45: {'sin': 0.707, 'cos': 0.707, 'tan': 1},
60: {'sin': 0.866, 'cos': 0.5, 'tan': 1.732},
90: {'sin': 1, 'cos': 0, 'tan': 'undefined'}}
qtid = get_qtid(conn, SUBJECT, "Trigonometry", "Trigonometric Ratios", "Find value of expression")
if qtid:
for _ in range(count // 3):
ang = random.choice([0, 30, 45, 60])
func = random.choice(['sin', 'cos', 'tan'])
val = trig_vals[ang][func]
if isinstance(val, (int, float)):
questions.append(make_question(qtid,
f"Find the value of {func}({ang}°).",
str(val), nearby_wrong_float(val if val != 0 else 0.5),
f"{func}({ang}°) = {val}", 1))
qtid = get_qtid(conn, SUBJECT, "Trigonometry", "Trigonometric Identities", "Find value given condition")
if qtid:
for _ in range(count // 3):
ang = random.choice([30, 45, 60])
val = trig_vals[ang]['sin']**2 + trig_vals[ang]['cos']**2
questions.append(make_question(qtid,
f"Find the value of sin²({ang}°) + cos²({ang}°).",
"1", ["0", "2", "0.5"],
f"sin²θ + cos²θ = 1 (identity)", 1))
qtid = get_qtid(conn, SUBJECT, "Trigonometry", "Heights and Distances", "Find height of tower")
if qtid:
for _ in range(count // 3):
dist = random.randint(20, 100)
ang = random.choice([30, 45, 60])
height = round(dist * trig_vals[ang]['tan'], 2) if ang != 90 else dist
questions.append(make_question(qtid,
f"From a point {dist}m from the base of a tower, the angle of elevation is {ang}°. Find the height.",
str(height) + " m", [str(round(height+i,2)) + " m" for i in [2,-5,8]],
f"h = {dist} × tan({ang}°) = {height} m", 2))
return questions
def gen_di(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Data Interpretation", "Table", "Calculate from table")
if qtid:
for _ in range(count // 2):
vals = [random.randint(100, 1000) for _ in range(5)]
total = sum(vals)
avg = round(total / 5, 2)
years = ["2019", "2020", "2021", "2022", "2023"]
table = ", ".join(f"{y}: {v}" for y, v in zip(years, vals))
questions.append(make_question(qtid,
f"Production (in units) - {table}. Find the total production.",
str(total), nearby_wrong(total),
f"Total = {'+'.join(map(str, vals))} = {total}", 1))
qtid = get_qtid(conn, SUBJECT, "Data Interpretation", "Table", "Percentage calculation")
if qtid:
for _ in range(count // 2):
vals = [random.randint(100, 1000) for _ in range(5)]
idx = random.randint(0, 4)
total = sum(vals)
pct = round(vals[idx] * 100 / total, 2)
years = ["2019", "2020", "2021", "2022", "2023"]
table = ", ".join(f"{y}: {v}" for y, v in zip(years, vals))
questions.append(make_question(qtid,
f"Sales - {table}. What percentage of total sales occurred in {years[idx]}?",
str(pct) + "%", [str(round(pct+i,2)) + "%" for i in [2,-3,5]],
f"{vals[idx]}/{total} × 100 = {pct}%", 2))
return questions
def gen_statistics(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Statistics", "Mean, Median and Mode", "Find mean")
if qtid:
for _ in range(count // 3):
n = random.randint(5, 10)
nums = sorted([random.randint(1, 100) for _ in range(n)])
mean = round(sum(nums) / n, 2)
questions.append(make_question(qtid,
f"Find the mean of: {', '.join(map(str, nums))}",
str(mean), nearby_wrong_float(mean),
f"Mean = {sum(nums)}/{n} = {mean}", 1))
qtid = get_qtid(conn, SUBJECT, "Statistics", "Mean, Median and Mode", "Find median")
if qtid:
for _ in range(count // 3):
n = random.choice([5, 7, 9])
nums = sorted([random.randint(1, 100) for _ in range(n)])
median = nums[n // 2]
questions.append(make_question(qtid,
f"Find the median of: {', '.join(map(str, nums))}",
str(median), nearby_wrong(median),
f"Sorted: {nums}, Median = {median}", 1))
qtid = get_qtid(conn, SUBJECT, "Statistics", "Mean, Median and Mode", "Find mode")
if qtid:
for _ in range(count // 3):
mode_val = random.randint(1, 50)
nums = [mode_val] * 3 + [random.randint(1, 100) for _ in range(4)]
random.shuffle(nums)
questions.append(make_question(qtid,
f"Find the mode of: {', '.join(map(str, nums))}",
str(mode_val), nearby_wrong(mode_val),
f"Mode = {mode_val} (appears most)", 1))
return questions
def generate_all(conn):
"""Generate all Quantitative Aptitude questions."""
generators = [
("Number System", gen_number_system, 800),
("HCF/LCM", gen_hcf_lcm, 500),
("Simplification", gen_simplification, 600),
("Percentage", gen_percentage, 2500),
("Profit & Loss", gen_profit_loss, 2500),
("Ratio & Proportion", gen_ratio, 2200),
("Average", gen_average, 1500),
("Time & Work", gen_time_work, 1800),
("Speed & Distance", gen_speed_distance, 2500),
("Interest", gen_interest, 2000),
("Algebra", gen_algebra, 2500),
("Geometry", gen_geometry, 2500),
("Mensuration", gen_mensuration, 2500),
("Trigonometry", gen_trigonometry, 2000),
("Data Interpretation", gen_di, 2000),
("Statistics", gen_statistics, 1200),
]
total = 0
all_questions = []
for name, gen_func, count in generators:
questions = gen_func(conn, count)
all_questions.extend(questions)
print(f" {name}: {len(questions)} questions")
total += len(questions)
# Insert in batches
batch_size = 5000
for i in range(0, len(all_questions), batch_size):
insert_questions_batch(conn, all_questions[i:i+batch_size])
print(f" TOTAL Quantitative Aptitude: {total}")
return total
if __name__ == '__main__':
conn = get_db()
print("Generating Quantitative Aptitude questions...")
generate_all(conn)
conn.close()

View File

@ -0,0 +1,510 @@
#!/usr/bin/env python3
"""
General Intelligence & Reasoning Question Generator for SSC CGL.
Generates ~25,000 template-based reasoning questions.
"""
import random
import string
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, nearby_wrong, get_db, insert_questions_batch
SUBJECT = "General Intelligence and Reasoning"
# ============ VERBAL REASONING ============
def gen_number_analogy(conn, count=700):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Number analogy")
if not qtid: return questions
for _ in range(count):
a = random.randint(2, 20)
ops = [
(a*a, lambda x: x*x, "square"),
(a*a*a, lambda x: x*x*x, "cube"),
(a*2, lambda x: x*2, "double"),
(a+5, lambda x: x+5, "add 5"),
]
result_a, func, rule = random.choice(ops)
b = random.randint(2, 20)
while b == a:
b = random.randint(2, 20)
result_b = func(b)
questions.append(make_question(qtid,
f"{a} : {result_a} :: {b} : ?",
str(result_b), nearby_wrong(result_b),
f"Rule: {rule}. {a}{result_a}, {b}{result_b}", 1))
return questions
def gen_letter_analogy(conn, count=500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Letter analogy")
if not qtid: return questions
for _ in range(count):
shift = random.randint(1, 5)
a = random.randint(0, 20)
pair1 = chr(65+a) + chr(65+a+shift)
b = random.randint(0, 20)
while b == a:
b = random.randint(0, 20)
pair2_q = chr(65+b)
pair2_a = chr(65+b+shift)
wrongs = [chr(65 + (b+shift+i) % 26) for i in [1, 2, -1]]
questions.append(make_question(qtid,
f"{pair1} : {pair2_q}?",
pair2_q + pair2_a, [pair2_q + w for w in wrongs],
f"Shift by {shift}: {pair1}{pair2_q}{pair2_a}", 1))
return questions
def gen_classification(conn, count=1500):
questions = []
# Number classification (odd one out)
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Number classification")
if qtid:
for _ in range(count // 2):
t = random.choice(["even", "odd", "prime", "square"])
if t == "even":
group = [random.randint(1, 50) * 2 for _ in range(3)]
odd_one = random.randint(1, 50) * 2 + 1
elif t == "odd":
group = [random.randint(0, 49) * 2 + 1 for _ in range(3)]
odd_one = random.randint(1, 50) * 2
elif t == "prime":
primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
group = random.sample(primes, 3)
odd_one = random.choice([4,6,8,9,10,12,14,15,16,18,20,21,22])
else:
squares = [1,4,9,16,25,36,49,64,81,100]
group = random.sample(squares, 3)
odd_one = random.choice([2,3,5,6,7,8,10,11,12,13,14,15])
all_opts = group + [odd_one]
random.shuffle(all_opts)
questions.append(make_question(qtid,
f"Find the odd one out: {', '.join(map(str, all_opts))}",
str(odd_one), [str(x) for x in group],
f"{odd_one} is not {t}", 1))
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Word classification")
if qtid:
word_groups = [
(["Apple", "Mango", "Banana", "Orange"], "Carrot", "Fruits"),
(["Dog", "Cat", "Lion", "Tiger"], "Eagle", "Mammals"),
(["Red", "Blue", "Green", "Yellow"], "Square", "Colors"),
(["Delhi", "Mumbai", "Chennai", "Kolkata"], "India", "Cities"),
(["Pen", "Pencil", "Marker", "Crayon"], "Book", "Writing tools"),
(["Piano", "Guitar", "Violin", "Flute"], "Painting", "Instruments"),
(["January", "March", "May", "July"], "Monday", "Months"),
(["Mercury", "Venus", "Mars", "Jupiter"], "Moon", "Planets"),
(["Nile", "Amazon", "Ganges", "Thames"], "Sahara", "Rivers"),
(["Football", "Cricket", "Tennis", "Hockey"], "Chess", "Outdoor sports"),
]
for _ in range(count // 2):
group, odd, reason = random.choice(word_groups)
display = random.sample(group[:3], 3) + [odd]
random.shuffle(display)
questions.append(make_question(qtid,
f"Find the odd one out: {', '.join(display)}",
odd, [x for x in display if x != odd][:3],
f"{odd} is not in the category: {reason}", 1))
return questions
def gen_number_series(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Number Series", "Find next number")
if not qtid: return questions
for _ in range(count):
series_type = random.choice(["add", "multiply", "square", "alternate", "diff"])
if series_type == "add":
start = random.randint(1, 50)
d = random.randint(2, 15)
series = [start + i * d for i in range(5)]
ans = start + 5 * d
elif series_type == "multiply":
start = random.randint(1, 5)
r = random.choice([2, 3])
series = [start * (r ** i) for i in range(5)]
ans = start * (r ** 5)
elif series_type == "square":
start = random.randint(1, 8)
series = [(start + i) ** 2 for i in range(5)]
ans = (start + 5) ** 2
elif series_type == "alternate":
a, b = random.randint(1, 10), random.randint(1, 10)
series = []
for i in range(5):
series.append(series[-1] + a if i % 2 == 0 else series[-1] + b) if series else series.append(random.randint(1, 20))
if i == 0:
continue
if i % 2 == 1:
series[-1] = series[-2] + a
else:
series[-1] = series[-2] + b
ans = series[-1] + (a if len(series) % 2 == 1 else b)
else: # increasing difference
start = random.randint(1, 10)
series = [start]
d = random.randint(1, 5)
for i in range(4):
series.append(series[-1] + d + i)
ans = series[-1] + d + 4
series_str = ", ".join(map(str, series))
questions.append(make_question(qtid,
f"Find the next number in the series: {series_str}, ?",
str(ans), nearby_wrong(ans),
f"Pattern: {series_type}. Next = {ans}", random.choice([1, 2])))
return questions
def gen_coding_decoding(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Letter coding")
if qtid:
for _ in range(count // 2):
shift = random.randint(1, 5)
word = random.choice(["COME", "GONE", "HELP", "LOVE", "MIND", "PLAY", "ROSE", "SING", "TALK", "WIND",
"BACK", "DEEP", "FAST", "GIRL", "HOME", "JUST", "KING", "LAMP", "NAME", "OPEN"])
coded = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word)
word2 = random.choice(["BALL", "CAKE", "DARK", "EASY", "FISH", "GOOD", "HAND", "IDOL", "JOKE", "KEEP"])
coded2 = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word2)
wrongs = []
for s in [shift+1, shift-1, shift+2]:
wrongs.append("".join(chr((ord(c) - 65 + s) % 26 + 65) for c in word2))
questions.append(make_question(qtid,
f"If {word} is coded as {coded}, then {word2} is coded as?",
coded2, wrongs,
f"Each letter shifted by +{shift}", 2))
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Number coding")
if qtid:
for _ in range(count // 2):
word = random.choice(["CAT", "DOG", "SUN", "PEN", "CUP", "BOX", "HAT", "MAP", "JAR", "FAN"])
code = [random.randint(1, 9) for _ in word]
code_str = "".join(map(str, code))
word2 = random.choice(["BAT", "LOG", "RUN", "HEN", "BUS", "FOX", "RAT", "TAP"])
# Same position mapping
mapping = {c: str(v) for c, v in zip(word, code)}
code2 = "".join(mapping.get(c, str(random.randint(1, 9))) for c in word2)
wrongs = [str(int(code2) + i) for i in [11, -22, 33]]
questions.append(make_question(qtid,
f"If {word} = {code_str}, then {word2} = ?",
code2, wrongs,
f"Letter-to-number mapping from {word}={code_str}", 2))
return questions
# ============ LOGICAL REASONING ============
def gen_blood_relations(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Blood Relations", "Direct relation")
if not qtid: return questions
templates = [
("A is the father of B. B is the sister of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]),
("A is the mother of B. B is the brother of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]),
("A is the brother of B. B is the son of C. What is A to C?", "Son", ["Nephew", "Brother", "Father"]),
("A is the sister of B. B is the daughter of C. What is A to C?", "Daughter", ["Niece", "Sister", "Mother"]),
("A is the husband of B. B is the mother of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]),
("A is the wife of B. B is the father of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]),
("A's father is B's son. What is B to A?", "Grandfather", ["Father", "Uncle", "Brother"]),
("A's mother is B's daughter. What is B to A?", "Grandmother", ["Mother", "Aunt", "Sister"]),
("A is B's brother's wife. What is A to B?", "Sister-in-law", ["Sister", "Cousin", "Aunt"]),
("A is B's father's brother. What is A to B?", "Uncle", ["Father", "Cousin", "Grandfather"]),
]
for _ in range(count):
q_text, correct, wrongs = random.choice(templates)
names = random.sample(["P", "Q", "R", "S", "T", "M", "N", "X", "Y", "Z"], 3)
q_text = q_text.replace("A", names[0]).replace("B", names[1]).replace("C", names[2])
questions.append(make_question(qtid, q_text, correct, wrongs,
f"Following family relationships, the answer is {correct}", 2))
return questions
def gen_direction(conn, count=1200):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Direction and Distance", "Find final direction")
if not qtid: return questions
directions = ["North", "South", "East", "West"]
turns = {"North": {"right": "East", "left": "West"},
"South": {"right": "West", "left": "East"},
"East": {"right": "South", "left": "North"},
"West": {"right": "North", "left": "South"}}
for _ in range(count):
start = random.choice(directions)
num_turns = random.randint(1, 3)
current = start
steps_desc = [f"starts facing {start}"]
for _ in range(num_turns):
turn = random.choice(["right", "left"])
current = turns[current][turn]
steps_desc.append(f"turns {turn}")
wrong_dirs = [d for d in directions if d != current]
questions.append(make_question(qtid,
f"A person {', '.join(steps_desc)}. Which direction is the person facing now?",
current, wrong_dirs[:3],
f"After turns: {current}", 1))
return questions
def gen_ranking(conn, count=1200):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Order and Ranking", "Find rank from top/bottom")
if not qtid: return questions
for _ in range(count):
total = random.randint(20, 60)
from_top = random.randint(1, total)
from_bottom = total - from_top + 1
ask = random.choice(["top", "bottom"])
if ask == "top":
questions.append(make_question(qtid,
f"In a row of {total} students, a student is {from_bottom}th from the bottom. What is the student's position from the top?",
str(from_top), nearby_wrong(from_top),
f"From top = Total - From bottom + 1 = {total} - {from_bottom} + 1 = {from_top}", 1))
else:
questions.append(make_question(qtid,
f"In a row of {total} students, a student is {from_top}th from the top. What is the student's position from the bottom?",
str(from_bottom), nearby_wrong(from_bottom),
f"From bottom = Total - From top + 1 = {total} - {from_top} + 1 = {from_bottom}", 1))
return questions
def gen_syllogism(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Syllogism", "All/Some/No conclusions")
if not qtid: return questions
templates = [
("All A are B. All B are C.", "All A are C", ["No A is C", "Some A are not C", "All C are A"]),
("All A are B. Some B are C.", "Some A may be C", ["All A are C", "No A is C", "All C are A"]),
("No A is B. All B are C.", "Some C are not A", ["All A are C", "No C is A", "All C are A"]),
("Some A are B. All B are C.", "Some A are C", ["All A are C", "No A is C", "All C are A"]),
("All A are B. No B is C.", "No A is C", ["Some A are C", "All A are C", "All C are A"]),
]
categories = ["dogs", "cats", "birds", "students", "teachers", "doctors", "players", "singers",
"dancers", "painters", "writers", "engineers", "lawyers", "flowers", "trees"]
for _ in range(count):
template, correct, wrongs = random.choice(templates)
cats = random.sample(categories, 3)
stmt = template.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title())
ans = correct.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title())
wrong_list = [w.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title()) for w in wrongs]
questions.append(make_question(qtid,
f"Statements: {stmt}\nConclusion: Which follows?",
ans, wrong_list, f"Based on Venn diagram logic", 2))
return questions
# ============ NON-VERBAL REASONING ============
def gen_mirror_image(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Mirror and Water Image", "Mirror image of text/numbers")
if not qtid: return questions
for _ in range(count):
num = random.randint(100, 9999)
mirror = str(num)[::-1]
wrongs = [str(num + random.randint(1, 100)) for _ in range(3)]
questions.append(make_question(qtid,
f"What is the mirror image of the number {num} when a mirror is placed on the right side?",
mirror, wrongs,
f"Mirror reverses left-right: {num}{mirror}", 1))
return questions
def gen_dice(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Opposite face of dice")
if not qtid: return questions
for _ in range(count):
faces = list(range(1, 7))
# Standard dice: opposite faces sum to 7
num = random.randint(1, 6)
opp = 7 - num
questions.append(make_question(qtid,
f"On a standard die, what number is opposite to {num}?",
str(opp), [str(x) for x in range(1, 7) if x != num and x != opp][:3],
f"On a standard die, opposite faces sum to 7: {num} + {opp} = 7", 1))
return questions
def gen_cube_painting(conn, count=600):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Painted cube counting")
if not qtid: return questions
for _ in range(count):
n = random.randint(2, 6)
total = n ** 3
three_face = 8 # corners
two_face = (n - 2) * 12 if n > 2 else 0
one_face = (n - 2) ** 2 * 6 if n > 2 else 0
no_face = (n - 2) ** 3 if n > 2 else 0
ask = random.choice(["three", "two", "one", "no"])
ans_map = {"three": three_face, "two": two_face, "one": one_face, "no": no_face}
ans = ans_map[ask]
questions.append(make_question(qtid,
f"A cube of side {n} is painted on all faces and then cut into {total} unit cubes. How many cubes have {ask} face(s) painted?",
str(ans), nearby_wrong(ans),
f"For {n}×{n}×{n} cube: {ask} faces painted = {ans}", 2))
return questions
# ============ MATHEMATICAL REASONING ============
def gen_math_operations(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Mathematical Operations", "Symbol substitution")
if not qtid: return questions
ops = {'+': lambda a, b: a + b, '-': lambda a, b: a - b,
'×': lambda a, b: a * b, '÷': lambda a, b: a // b}
symbols = ['@', '#', '$', '&', '*', '!']
for _ in range(count):
op_pairs = random.sample(list(ops.keys()), 2)
sym_pairs = random.sample(symbols, 2)
a, b, c = random.randint(2, 20), random.randint(2, 20), random.randint(2, 20)
mapping_text = f"{sym_pairs[0]} means '{op_pairs[0]}' and {sym_pairs[1]} means '{op_pairs[1]}'"
expr_text = f"{a} {sym_pairs[0]} {b} {sym_pairs[1]} {c}"
result = ops[op_pairs[1]](ops[op_pairs[0]](a, b), c)
questions.append(make_question(qtid,
f"If {mapping_text}, find: {expr_text}",
str(result), nearby_wrong(result),
f"Replace symbols: {a} {op_pairs[0]} {b} {op_pairs[1]} {c} = {result}", 2))
return questions
def gen_number_puzzles(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Number Puzzles", "Find missing number in grid")
if not qtid: return questions
for _ in range(count):
# 3x3 grid where rows/cols sum to same value
a, b = random.randint(1, 20), random.randint(1, 20)
c = a + b
d = random.randint(1, 20)
e = c - d + random.randint(1, 10)
missing = a + d - e + b # Some pattern
# Simpler: row sums are equal
r1 = [random.randint(1, 20) for _ in range(3)]
target = sum(r1)
r2_a, r2_b = random.randint(1, 15), random.randint(1, 15)
r2_c = target - r2_a - r2_b
if r2_c > 0:
questions.append(make_question(qtid,
f"In a grid, row 1 is [{r1[0]}, {r1[1]}, {r1[2]}] (sum={target}). Row 2 is [{r2_a}, {r2_b}, ?]. Find the missing number if row sums are equal.",
str(r2_c), nearby_wrong(r2_c),
f"? = {target} - {r2_a} - {r2_b} = {r2_c}", 1))
return questions
def gen_venn_diagram(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Venn Diagram", "Count elements in region")
if not qtid: return questions
for _ in range(count):
total_a = random.randint(20, 100)
total_b = random.randint(20, 100)
both = random.randint(5, min(total_a, total_b))
only_a = total_a - both
only_b = total_b - both
questions.append(make_question(qtid,
f"In a group, {total_a} like tea, {total_b} like coffee, and {both} like both. How many like only tea?",
str(only_a), nearby_wrong(only_a),
f"Only tea = {total_a} - {both} = {only_a}", 1))
return questions
# ============ CRITICAL THINKING ============
def gen_statement_conclusion(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Critical Thinking", "Statement and Conclusion", "Which conclusion follows")
if not qtid: return questions
templates = [
("All students who study hard pass the exam.", "Some who pass studied hard", ["No one studies hard", "Everyone fails", "Studying is not needed"]),
("Regular exercise improves health.", "People who exercise are healthier", ["Exercise is harmful", "Health has no relation to exercise", "Only medicine improves health"]),
("Reading improves vocabulary.", "People who read more have better vocabulary", ["Reading is useless", "Vocabulary cannot be improved", "TV improves vocabulary more"]),
("Smoking causes cancer.", "Smokers are at higher risk of cancer", ["All smokers get cancer", "Cancer has no cause", "Smoking is healthy"]),
("Water pollution affects marine life.", "Marine life is harmed by water pollution", ["Marine life thrives in pollution", "Pollution has no effect", "Only air pollution matters"]),
]
for _ in range(count):
stmt, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Statement: {stmt}\nWhich conclusion logically follows?",
correct, wrongs, f"Direct logical inference", 2))
return questions
def gen_letter_series(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Letter Series", "Find next letters")
if not qtid: return questions
for _ in range(count):
start = random.randint(0, 15)
skip = random.randint(1, 4)
series = [chr(65 + start + i * skip) for i in range(4) if start + i * skip < 26]
if len(series) < 4:
continue
nxt_idx = start + 4 * skip
if nxt_idx < 26:
ans = chr(65 + nxt_idx)
wrongs = [chr(65 + (nxt_idx + i) % 26) for i in [1, 2, -1]]
questions.append(make_question(qtid,
f"Find the next letter: {', '.join(series)}, ?",
ans, wrongs,
f"Skip {skip}: next = {ans}", 1))
return questions
def generate_all(conn):
"""Generate all Reasoning questions."""
generators = [
("Number Analogy", gen_number_analogy, 700),
("Letter Analogy", gen_letter_analogy, 500),
("Classification", gen_classification, 2000),
("Number Series", gen_number_series, 2000),
("Letter Series", gen_letter_series, 1500),
("Coding-Decoding", gen_coding_decoding, 2500),
("Blood Relations", gen_blood_relations, 2000),
("Direction & Distance", gen_direction, 1500),
("Order & Ranking", gen_ranking, 1500),
("Syllogism", gen_syllogism, 1500),
("Mirror Image", gen_mirror_image, 1000),
("Dice", gen_dice, 1000),
("Cube Painting", gen_cube_painting, 800),
("Math Operations", gen_math_operations, 1500),
("Number Puzzles", gen_number_puzzles, 1200),
("Venn Diagram", gen_venn_diagram, 1200),
("Statement & Conclusion", gen_statement_conclusion, 1200),
]
total = 0
all_questions = []
for name, gen_func, count in generators:
questions = gen_func(conn, count)
all_questions.extend(questions)
print(f" {name}: {len(questions)} questions")
total += len(questions)
batch_size = 5000
for i in range(0, len(all_questions), batch_size):
insert_questions_batch(conn, all_questions[i:i+batch_size])
print(f" TOTAL Reasoning: {total}")
return total
if __name__ == '__main__':
conn = get_db()
print("Generating Reasoning questions...")
generate_all(conn)
conn.close()

63
generators/run_all.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""
Master script to generate all 100,000 SSC CGL questions.
Usage: python3 generators/run_all.py [--force]
"""
import sys
import os
import time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from db.init import init_db, get_db, get_stats
def main():
force = '--force' in sys.argv
start = time.time()
print("=" * 60)
print("SSCTopper - Question Generation Pipeline")
print("=" * 60)
# Step 1: Initialize DB
print("\n[1/5] Initializing database...")
init_db(force=force)
conn = get_db()
# Step 2: Quantitative Aptitude
print("\n[2/5] Generating Quantitative Aptitude questions...")
from generators.quant_generator import generate_all as gen_quant
gen_quant(conn)
# Step 3: Reasoning
print("\n[3/5] Generating Reasoning questions...")
from generators.reasoning_generator import generate_all as gen_reason
gen_reason(conn)
# Step 4: English
print("\n[4/5] Generating English Language questions...")
from generators.english_generator import generate_all as gen_eng
gen_eng(conn)
# Step 5: General Awareness
print("\n[5/5] Generating General Awareness questions...")
from generators.gk_generator import generate_all as gen_gk
gen_gk(conn)
# Final stats
stats = get_stats(conn)
elapsed = time.time() - start
print("\n" + "=" * 60)
print("GENERATION COMPLETE")
print("=" * 60)
for subject, count in stats.items():
print(f" {subject}: {count:,}")
print(f"\n Time taken: {elapsed:.1f} seconds")
print("=" * 60)
conn.close()
if __name__ == '__main__':
main()

492
server.py Normal file
View File

@ -0,0 +1,492 @@
#!/usr/bin/env python3
"""
SSCTopper Web Application - Zero-dependency Python web server.
Serves the SSC CGL question bank with syllabus browser and practice interface.
"""
import http.server
import json
import sqlite3
import os
import sys
import urllib.parse
import hashlib
import uuid
import http.cookies
import urllib.request
import signal
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
from db.init import DB_PATH, init_db, get_db
PORT = 8080
ROOT = os.path.dirname(os.path.abspath(__file__))
GOOGLE_CLIENT_ID = "273072123939-dd82h4o1rt3k7811sri6qgsig73b3916.apps.googleusercontent.com"
class SSCHandler(http.server.BaseHTTPRequestHandler):
"""HTTP request handler for SSCTopper."""
sessions = {} # session_id -> user_id
def get_user_id(self):
cookie_header = self.headers.get('Cookie')
if not cookie_header:
return None
cookie = http.cookies.SimpleCookie(cookie_header)
session_id = cookie.get('session_id')
if not session_id:
return None
return self.sessions.get(session_id.value)
def do_POST(self):
parsed = urllib.parse.urlparse(self.path)
path = parsed.path
content_length = int(self.headers.get('Content-Length', 0))
post_data = self.rfile.read(content_length).decode('utf-8')
try:
data = json.loads(post_data)
except:
data = {}
if path == '/api/auth/signup':
self.handle_signup(data)
elif path == '/api/auth/login':
self.handle_login(data)
elif path == '/api/auth/google':
self.handle_google_login(data)
elif path == '/api/user/progress':
self.handle_progress(data)
else:
self.send_error(404)
def handle_google_login(self, data):
id_token = data.get('id_token')
if not id_token:
return self.json_response({'error': 'Missing ID token'}, 400)
# Verify token with Google API (Zero-dependency way)
try:
url = f"https://oauth2.googleapis.com/tokeninfo?id_token={id_token}"
with urllib.request.urlopen(url) as response:
google_data = json.loads(response.read().decode())
# Check for error in Google response
if 'error_description' in google_data:
return self.json_response({'error': google_data['error_description']}, 401)
# Security check: Verify audience (aud) matches our Client ID
aud = google_data.get('aud')
if aud != GOOGLE_CLIENT_ID:
return self.json_response({'error': 'Token was not issued for this application'}, 401)
email = google_data.get('email')
name = google_data.get('name', email.split('@')[0])
if not email:
return self.json_response({'error': 'Email not provided by Google'}, 400)
conn = get_db()
user = conn.execute("SELECT id, username FROM users WHERE email=?", (email,)).fetchone()
if not user:
# Create new user with random password (cannot be guessed)
random_pass = str(uuid.uuid4())
pass_hash = hashlib.sha256(random_pass.encode()).hexdigest()
try:
# Use email handle as username if possible, otherwise use full email
username = email.split('@')[0]
# Ensure username uniqueness (this is simple, could be better)
cursor = conn.execute("INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)",
(username, email, pass_hash))
user_id = cursor.lastrowid
conn.commit()
username_final = username
except sqlite3.IntegrityError:
# Fallback to email as username
cursor = conn.execute("INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)",
(email, email, pass_hash))
user_id = cursor.lastrowid
conn.commit()
username_final = email
else:
user_id = user[0]
username_final = user[1]
conn.close()
# Set session
session_id = str(uuid.uuid4())
self.sessions[session_id] = user_id
cookie = http.cookies.SimpleCookie()
cookie['session_id'] = session_id
cookie['session_id']['path'] = '/'
cookie['session_id']['httponly'] = True
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.send_header('Set-Cookie', cookie.output(header=''))
self.end_headers()
self.wfile.write(json.dumps({'success': True, 'username': username_final}).encode())
except Exception as e:
print(f"Google Auth Error: {e}")
self.json_response({'error': 'Failed to verify Google account'}, 500)
def handle_signup(self, data):
username = data.get('username')
email = data.get('email')
password = data.get('password')
if not all([username, email, password]):
return self.json_response({'error': 'Missing fields'}, 400)
password_hash = hashlib.sha256(password.encode()).hexdigest()
conn = get_db()
try:
conn.execute("INSERT INTO users (username, email, password_hash) VALUES (?, ?, ?)",
(username, email, password_hash))
conn.commit()
self.json_response({'success': True})
except sqlite3.IntegrityError:
self.json_response({'error': 'Username or email already exists'}, 400)
finally:
conn.close()
def handle_login(self, data):
username = data.get('username')
password = data.get('password')
if not all([username, password]):
return self.json_response({'error': 'Missing fields'}, 400)
password_hash = hashlib.sha256(password.encode()).hexdigest()
conn = get_db()
user = conn.execute("SELECT id FROM users WHERE username=? AND password_hash=?",
(username, password_hash)).fetchone()
conn.close()
if user:
session_id = str(uuid.uuid4())
self.sessions[session_id] = user[0]
cookie = http.cookies.SimpleCookie()
cookie['session_id'] = session_id
cookie['session_id']['path'] = '/'
cookie['session_id']['httponly'] = True
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.send_header('Set-Cookie', cookie.output(header=''))
self.end_headers()
self.wfile.write(json.dumps({'success': True, 'username': username}).encode())
else:
self.json_response({'error': 'Invalid credentials'}, 401)
def handle_progress(self, data):
user_id = self.get_user_id()
if not user_id:
return self.json_response({'error': 'Unauthorized'}, 401)
question_id = data.get('question_id')
is_correct = data.get('is_correct')
time_taken = data.get('time_taken', 0.0)
if question_id is None or is_correct is None:
return self.json_response({'error': 'Missing fields'}, 400)
conn = get_db()
conn.execute("INSERT INTO user_answers (user_id, question_id, is_correct, time_taken) VALUES (?, ?, ?, ?)",
(user_id, int(question_id), bool(is_correct), float(time_taken)))
conn.commit()
conn.close()
self.json_response({'success': True})
def do_GET(self):
parsed = urllib.parse.urlparse(self.path)
path = parsed.path
params = urllib.parse.parse_qs(parsed.query)
if path == '/' or path == '/index.html':
self.serve_html()
elif path == '/robots.txt':
self.serve_static('robots.txt', 'text/plain')
elif path == '/sitemap.xml':
self.serve_static('sitemap.xml', 'application/xml')
elif path == '/api/syllabus':
self.api_syllabus()
elif path == '/api/questions':
self.api_questions(params)
elif path == '/api/stats':
self.api_stats()
elif path == '/api/mock-test':
self.api_mock_test(params)
elif path == '/api/user/profile':
self.api_user_profile()
else:
self.send_error(404)
def api_user_profile(self):
user_id = self.get_user_id()
if not user_id:
return self.json_response({'error': 'Unauthorized'}, 401)
# Parse timeframe
parsed = urllib.parse.urlparse(self.path)
params = urllib.parse.parse_qs(parsed.query)
timeframe = params.get('timeframe', ['overall'])[0]
time_filter = ""
if timeframe == 'daily':
time_filter = "AND ua.answered_at >= datetime('now', '-1 day')"
elif timeframe == 'weekly':
time_filter = "AND ua.answered_at >= datetime('now', '-7 days')"
elif timeframe == 'monthly':
time_filter = "AND ua.answered_at >= datetime('now', 'start of month')"
conn = get_db()
user = conn.execute("SELECT username, email, created_at FROM users WHERE id=?", (user_id,)).fetchone()
# Get overall stats for timeframe
stats_query = f"SELECT COUNT(*), SUM(CASE WHEN is_correct=1 THEN 1 ELSE 0 END), AVG(time_taken) FROM user_answers ua WHERE user_id=? {time_filter}"
stats_row = conn.execute(stats_query, (user_id,)).fetchone()
total_attempts = stats_row[0] or 0
correct_attempts = stats_row[1] or 0
avg_time_overall = round(stats_row[2] or 0, 1)
# Topic-wise progress with time tracking
topic_progress = []
rows = conn.execute(f"""
SELECT t.id, t.name, st.name as subtopic, s.name as subject,
COUNT(DISTINCT q.id) as total_questions,
COUNT(DISTINCT ua.question_id) as answered_questions,
AVG(ua.time_taken) as avg_time
FROM topics t
JOIN subtopics st ON t.subtopic_id = st.id
JOIN subjects s ON st.subject_id = s.id
LEFT JOIN question_types qt ON qt.topic_id = t.id
LEFT JOIN questions q ON q.question_type_id = qt.id
LEFT JOIN user_answers ua ON ua.question_id = q.id AND ua.user_id = ? AND ua.is_correct = 1 {time_filter}
GROUP BY t.id
""", (user_id,)).fetchall()
for r in rows:
topic_progress.append({
'topic_id': r[0], 'topic': r[1], 'subtopic': r[2], 'subject': r[3],
'total': r[4], 'answered': r[5],
'percent': round(r[5] * 100 / r[4], 1) if r[4] > 0 else 0,
'avg_time': round(r[6] or 0, 1)
})
conn.close()
self.json_response({
'username': user[0],
'email': user[1],
'joined': user[2],
'stats': {
'total_attempts': total_attempts,
'correct_attempts': correct_attempts,
'accuracy': round(correct_attempts * 100 / total_attempts, 1) if total_attempts > 0 else 0,
'avg_time': avg_time_overall
},
'topic_progress': topic_progress
})
def serve_html(self):
html_path = os.path.join(ROOT, 'static', 'index.html')
with open(html_path, 'r') as f:
content = f.read()
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(content.encode('utf-8'))
def serve_static(self, filename, content_type):
file_path = os.path.join(ROOT, 'static', filename)
if not os.path.exists(file_path):
self.send_error(404)
return
with open(file_path, 'rb') as f:
content = f.read()
self.send_response(200)
self.send_header('Content-Type', content_type)
self.end_headers()
self.wfile.write(content)
def json_response(self, data, status=200):
self.send_response(status)
self.send_header('Content-Type', 'application/json; charset=utf-8')
self.send_header('Access-Control-Allow-Origin', '*')
self.end_headers()
self.wfile.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
def api_syllabus(self):
conn = get_db()
subjects = []
for s in conn.execute("SELECT id, name, tier, description, target_questions FROM subjects").fetchall():
subject = {'id': s[0], 'name': s[1], 'tier': s[2], 'description': s[3], 'target': s[4], 'subtopics': []}
for st in conn.execute("SELECT id, name, description FROM subtopics WHERE subject_id=?", (s[0],)).fetchall():
subtopic = {'id': st[0], 'name': st[1], 'description': st[2], 'topics': []}
for t in conn.execute("SELECT id, name, description FROM topics WHERE subtopic_id=?", (st[0],)).fetchall():
q_count = conn.execute("SELECT COUNT(*) FROM questions q JOIN question_types qt ON q.question_type_id=qt.id WHERE qt.topic_id=?", (t[0],)).fetchone()[0]
qtypes = [{'id': qt[0], 'name': qt[1]} for qt in conn.execute("SELECT id, name FROM question_types WHERE topic_id=?", (t[0],)).fetchall()]
subtopic['topics'].append({'id': t[0], 'name': t[1], 'description': t[2], 'question_count': q_count, 'question_types': qtypes})
subject['subtopics'].append(subtopic)
# Count total questions for subject
subject['question_count'] = conn.execute("""
SELECT COUNT(*) FROM questions q
JOIN question_types qt ON q.question_type_id = qt.id
JOIN topics t ON qt.topic_id = t.id
JOIN subtopics st ON t.subtopic_id = st.id
WHERE st.subject_id = ?
""", (s[0],)).fetchone()[0]
subjects.append(subject)
conn.close()
self.json_response(subjects)
def api_questions(self, params):
conn = get_db()
topic_id = params.get('topic_id', [None])[0]
qtype_id = params.get('qtype_id', [None])[0]
subject_id = params.get('subject_id', [None])[0]
difficulty = params.get('difficulty', [None])[0]
limit = int(params.get('limit', ['20'])[0])
offset = int(params.get('offset', ['0'])[0])
query = """SELECT q.id, q.question_text, q.option_a, q.option_b, q.option_c, q.option_d,
q.correct_option, q.explanation, q.difficulty,
qt.name as qtype_name, t.name as topic_name, st.name as subtopic_name, s.name as subject_name
FROM questions q
JOIN question_types qt ON q.question_type_id = qt.id
JOIN topics t ON qt.topic_id = t.id
JOIN subtopics st ON t.subtopic_id = st.id
JOIN subjects s ON st.subject_id = s.id
WHERE 1=1"""
args = []
if topic_id:
query += " AND t.id = ?"
args.append(int(topic_id))
if qtype_id:
query += " AND qt.id = ?"
args.append(int(qtype_id))
if subject_id:
query += " AND s.id = ?"
args.append(int(subject_id))
if difficulty:
query += " AND q.difficulty = ?"
args.append(int(difficulty))
# Get total count
count_query = query.replace("SELECT q.id, q.question_text, q.option_a, q.option_b, q.option_c, q.option_d,\n q.correct_option, q.explanation, q.difficulty,\n qt.name as qtype_name, t.name as topic_name, st.name as subtopic_name, s.name as subject_name", "SELECT COUNT(*)")
total = conn.execute(count_query, args).fetchone()[0]
query += " ORDER BY RANDOM() LIMIT ? OFFSET ?"
args.extend([limit, offset])
rows = conn.execute(query, args).fetchall()
questions = []
for r in rows:
questions.append({
'id': r[0], 'question_text': r[1],
'options': {'A': r[2], 'B': r[3], 'C': r[4], 'D': r[5]},
'correct_option': r[6], 'explanation': r[7], 'difficulty': r[8],
'qtype': r[9], 'topic': r[10], 'subtopic': r[11], 'subject': r[12]
})
conn.close()
self.json_response({'total': total, 'questions': questions})
def api_stats(self):
conn = get_db()
stats = {}
rows = conn.execute("""
SELECT s.name, COUNT(q.id) FROM subjects s
LEFT JOIN subtopics st ON st.subject_id = s.id
LEFT JOIN topics t ON t.subtopic_id = st.id
LEFT JOIN question_types qt ON qt.topic_id = t.id
LEFT JOIN questions q ON q.question_type_id = qt.id
GROUP BY s.id
""").fetchall()
for r in rows:
stats[r[0]] = r[1]
total = conn.execute("SELECT COUNT(*) FROM questions").fetchone()[0]
stats['total'] = total
topic_count = conn.execute("SELECT COUNT(*) FROM topics").fetchone()[0]
stats['topic_count'] = topic_count
stats['subject_count'] = conn.execute("SELECT COUNT(*) FROM subjects").fetchone()[0]
conn.close()
self.json_response(stats)
def api_mock_test(self, params):
conn = get_db()
subject_id = params.get('subject_id', [None])[0]
num = int(params.get('num', ['25'])[0])
query = """SELECT q.id, q.question_text, q.option_a, q.option_b, q.option_c, q.option_d,
q.correct_option, q.explanation, q.difficulty,
qt.name, t.name, st.name, s.name
FROM questions q
JOIN question_types qt ON q.question_type_id = qt.id
JOIN topics t ON qt.topic_id = t.id
JOIN subtopics st ON t.subtopic_id = st.id
JOIN subjects s ON st.subject_id = s.id"""
args = []
if subject_id:
query += " WHERE s.id = ?"
args.append(int(subject_id))
query += " ORDER BY RANDOM() LIMIT ?"
args.append(num)
rows = conn.execute(query, args).fetchall()
questions = []
for r in rows:
questions.append({
'id': r[0], 'question_text': r[1],
'options': {'A': r[2], 'B': r[3], 'C': r[4], 'D': r[5]},
'correct_option': r[6], 'explanation': r[7], 'difficulty': r[8],
'qtype': r[9], 'topic': r[10], 'subtopic': r[11], 'subject': r[12]
})
conn.close()
self.json_response({'questions': questions, 'total': len(questions)})
def log_message(self, format, *args):
pass # Suppress access logs
def main():
# Initialize DB if needed
if not os.path.exists(DB_PATH):
print("Database not found. Running generation pipeline...")
init_db()
# Check if we have questions, if not, generate some
conn = get_db()
try:
count = conn.execute("SELECT COUNT(*) FROM questions").fetchone()[0]
if count == 0:
print("Database is empty. Generating question bank...")
from generators.run_all import main as generate
generate()
except Exception as e:
print(f"Error checking question count: {e}")
finally:
conn.close()
server = http.server.HTTPServer(('0.0.0.0', PORT), SSCHandler)
print(f"\n🚀 SSCTopper running at http://localhost:{PORT}")
print(f" Database: {DB_PATH}")
print(f" Press Ctrl+C to stop\n")
signal.signal(signal.SIGINT, lambda s, f: (server.shutdown(), sys.exit(0)))
try:
server.serve_forever()
except KeyboardInterrupt:
server.shutdown()
if __name__ == '__main__':
main()

1036
static/index.html Normal file

File diff suppressed because it is too large Load Diff

4
static/robots.txt Normal file
View File

@ -0,0 +1,4 @@
User-agent: *
Allow: /
Sitemap: https://ssctopper.com/sitemap.xml

9
static/sitemap.xml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.1">
<url>
<loc>https://ssctopper.com/</loc>
<lastmod>2026-03-29</lastmod>
<changefreq>daily</changefreq>
<priority>1.0</priority>
</url>
</urlset>