ssctopper/generators/reasoning_generator.py

511 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
General Intelligence & Reasoning Question Generator for SSC CGL.
Generates ~25,000 template-based reasoning questions.
"""
import random
import string
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, nearby_wrong, get_db, insert_questions_batch
SUBJECT = "General Intelligence and Reasoning"
# ============ VERBAL REASONING ============
def gen_number_analogy(conn, count=700):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Number analogy")
if not qtid: return questions
for _ in range(count):
a = random.randint(2, 20)
ops = [
(a*a, lambda x: x*x, "square"),
(a*a*a, lambda x: x*x*x, "cube"),
(a*2, lambda x: x*2, "double"),
(a+5, lambda x: x+5, "add 5"),
]
result_a, func, rule = random.choice(ops)
b = random.randint(2, 20)
while b == a:
b = random.randint(2, 20)
result_b = func(b)
questions.append(make_question(qtid,
f"{a} : {result_a} :: {b} : ?",
str(result_b), nearby_wrong(result_b),
f"Rule: {rule}. {a}{result_a}, {b}{result_b}", 1))
return questions
def gen_letter_analogy(conn, count=500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Letter analogy")
if not qtid: return questions
for _ in range(count):
shift = random.randint(1, 5)
a = random.randint(0, 20)
pair1 = chr(65+a) + chr(65+a+shift)
b = random.randint(0, 20)
while b == a:
b = random.randint(0, 20)
pair2_q = chr(65+b)
pair2_a = chr(65+b+shift)
wrongs = [chr(65 + (b+shift+i) % 26) for i in [1, 2, -1]]
questions.append(make_question(qtid,
f"{pair1} : {pair2_q}?",
pair2_q + pair2_a, [pair2_q + w for w in wrongs],
f"Shift by {shift}: {pair1}{pair2_q}{pair2_a}", 1))
return questions
def gen_classification(conn, count=1500):
questions = []
# Number classification (odd one out)
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Number classification")
if qtid:
for _ in range(count // 2):
t = random.choice(["even", "odd", "prime", "square"])
if t == "even":
group = [random.randint(1, 50) * 2 for _ in range(3)]
odd_one = random.randint(1, 50) * 2 + 1
elif t == "odd":
group = [random.randint(0, 49) * 2 + 1 for _ in range(3)]
odd_one = random.randint(1, 50) * 2
elif t == "prime":
primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
group = random.sample(primes, 3)
odd_one = random.choice([4,6,8,9,10,12,14,15,16,18,20,21,22])
else:
squares = [1,4,9,16,25,36,49,64,81,100]
group = random.sample(squares, 3)
odd_one = random.choice([2,3,5,6,7,8,10,11,12,13,14,15])
all_opts = group + [odd_one]
random.shuffle(all_opts)
questions.append(make_question(qtid,
f"Find the odd one out: {', '.join(map(str, all_opts))}",
str(odd_one), [str(x) for x in group],
f"{odd_one} is not {t}", 1))
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Word classification")
if qtid:
word_groups = [
(["Apple", "Mango", "Banana", "Orange"], "Carrot", "Fruits"),
(["Dog", "Cat", "Lion", "Tiger"], "Eagle", "Mammals"),
(["Red", "Blue", "Green", "Yellow"], "Square", "Colors"),
(["Delhi", "Mumbai", "Chennai", "Kolkata"], "India", "Cities"),
(["Pen", "Pencil", "Marker", "Crayon"], "Book", "Writing tools"),
(["Piano", "Guitar", "Violin", "Flute"], "Painting", "Instruments"),
(["January", "March", "May", "July"], "Monday", "Months"),
(["Mercury", "Venus", "Mars", "Jupiter"], "Moon", "Planets"),
(["Nile", "Amazon", "Ganges", "Thames"], "Sahara", "Rivers"),
(["Football", "Cricket", "Tennis", "Hockey"], "Chess", "Outdoor sports"),
]
for _ in range(count // 2):
group, odd, reason = random.choice(word_groups)
display = random.sample(group[:3], 3) + [odd]
random.shuffle(display)
questions.append(make_question(qtid,
f"Find the odd one out: {', '.join(display)}",
odd, [x for x in display if x != odd][:3],
f"{odd} is not in the category: {reason}", 1))
return questions
def gen_number_series(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Number Series", "Find next number")
if not qtid: return questions
for _ in range(count):
series_type = random.choice(["add", "multiply", "square", "alternate", "diff"])
if series_type == "add":
start = random.randint(1, 50)
d = random.randint(2, 15)
series = [start + i * d for i in range(5)]
ans = start + 5 * d
elif series_type == "multiply":
start = random.randint(1, 5)
r = random.choice([2, 3])
series = [start * (r ** i) for i in range(5)]
ans = start * (r ** 5)
elif series_type == "square":
start = random.randint(1, 8)
series = [(start + i) ** 2 for i in range(5)]
ans = (start + 5) ** 2
elif series_type == "alternate":
a, b = random.randint(1, 10), random.randint(1, 10)
series = []
for i in range(5):
series.append(series[-1] + a if i % 2 == 0 else series[-1] + b) if series else series.append(random.randint(1, 20))
if i == 0:
continue
if i % 2 == 1:
series[-1] = series[-2] + a
else:
series[-1] = series[-2] + b
ans = series[-1] + (a if len(series) % 2 == 1 else b)
else: # increasing difference
start = random.randint(1, 10)
series = [start]
d = random.randint(1, 5)
for i in range(4):
series.append(series[-1] + d + i)
ans = series[-1] + d + 4
series_str = ", ".join(map(str, series))
questions.append(make_question(qtid,
f"Find the next number in the series: {series_str}, ?",
str(ans), nearby_wrong(ans),
f"Pattern: {series_type}. Next = {ans}", random.choice([1, 2])))
return questions
def gen_coding_decoding(conn, count=2000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Letter coding")
if qtid:
for _ in range(count // 2):
shift = random.randint(1, 5)
word = random.choice(["COME", "GONE", "HELP", "LOVE", "MIND", "PLAY", "ROSE", "SING", "TALK", "WIND",
"BACK", "DEEP", "FAST", "GIRL", "HOME", "JUST", "KING", "LAMP", "NAME", "OPEN"])
coded = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word)
word2 = random.choice(["BALL", "CAKE", "DARK", "EASY", "FISH", "GOOD", "HAND", "IDOL", "JOKE", "KEEP"])
coded2 = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word2)
wrongs = []
for s in [shift+1, shift-1, shift+2]:
wrongs.append("".join(chr((ord(c) - 65 + s) % 26 + 65) for c in word2))
questions.append(make_question(qtid,
f"If {word} is coded as {coded}, then {word2} is coded as?",
coded2, wrongs,
f"Each letter shifted by +{shift}", 2))
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Number coding")
if qtid:
for _ in range(count // 2):
word = random.choice(["CAT", "DOG", "SUN", "PEN", "CUP", "BOX", "HAT", "MAP", "JAR", "FAN"])
code = [random.randint(1, 9) for _ in word]
code_str = "".join(map(str, code))
word2 = random.choice(["BAT", "LOG", "RUN", "HEN", "BUS", "FOX", "RAT", "TAP"])
# Same position mapping
mapping = {c: str(v) for c, v in zip(word, code)}
code2 = "".join(mapping.get(c, str(random.randint(1, 9))) for c in word2)
wrongs = [str(int(code2) + i) for i in [11, -22, 33]]
questions.append(make_question(qtid,
f"If {word} = {code_str}, then {word2} = ?",
code2, wrongs,
f"Letter-to-number mapping from {word}={code_str}", 2))
return questions
# ============ LOGICAL REASONING ============
def gen_blood_relations(conn, count=1500):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Blood Relations", "Direct relation")
if not qtid: return questions
templates = [
("A is the father of B. B is the sister of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]),
("A is the mother of B. B is the brother of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]),
("A is the brother of B. B is the son of C. What is A to C?", "Son", ["Nephew", "Brother", "Father"]),
("A is the sister of B. B is the daughter of C. What is A to C?", "Daughter", ["Niece", "Sister", "Mother"]),
("A is the husband of B. B is the mother of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]),
("A is the wife of B. B is the father of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]),
("A's father is B's son. What is B to A?", "Grandfather", ["Father", "Uncle", "Brother"]),
("A's mother is B's daughter. What is B to A?", "Grandmother", ["Mother", "Aunt", "Sister"]),
("A is B's brother's wife. What is A to B?", "Sister-in-law", ["Sister", "Cousin", "Aunt"]),
("A is B's father's brother. What is A to B?", "Uncle", ["Father", "Cousin", "Grandfather"]),
]
for _ in range(count):
q_text, correct, wrongs = random.choice(templates)
names = random.sample(["P", "Q", "R", "S", "T", "M", "N", "X", "Y", "Z"], 3)
q_text = q_text.replace("A", names[0]).replace("B", names[1]).replace("C", names[2])
questions.append(make_question(qtid, q_text, correct, wrongs,
f"Following family relationships, the answer is {correct}", 2))
return questions
def gen_direction(conn, count=1200):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Direction and Distance", "Find final direction")
if not qtid: return questions
directions = ["North", "South", "East", "West"]
turns = {"North": {"right": "East", "left": "West"},
"South": {"right": "West", "left": "East"},
"East": {"right": "South", "left": "North"},
"West": {"right": "North", "left": "South"}}
for _ in range(count):
start = random.choice(directions)
num_turns = random.randint(1, 3)
current = start
steps_desc = [f"starts facing {start}"]
for _ in range(num_turns):
turn = random.choice(["right", "left"])
current = turns[current][turn]
steps_desc.append(f"turns {turn}")
wrong_dirs = [d for d in directions if d != current]
questions.append(make_question(qtid,
f"A person {', '.join(steps_desc)}. Which direction is the person facing now?",
current, wrong_dirs[:3],
f"After turns: {current}", 1))
return questions
def gen_ranking(conn, count=1200):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Order and Ranking", "Find rank from top/bottom")
if not qtid: return questions
for _ in range(count):
total = random.randint(20, 60)
from_top = random.randint(1, total)
from_bottom = total - from_top + 1
ask = random.choice(["top", "bottom"])
if ask == "top":
questions.append(make_question(qtid,
f"In a row of {total} students, a student is {from_bottom}th from the bottom. What is the student's position from the top?",
str(from_top), nearby_wrong(from_top),
f"From top = Total - From bottom + 1 = {total} - {from_bottom} + 1 = {from_top}", 1))
else:
questions.append(make_question(qtid,
f"In a row of {total} students, a student is {from_top}th from the top. What is the student's position from the bottom?",
str(from_bottom), nearby_wrong(from_bottom),
f"From bottom = Total - From top + 1 = {total} - {from_top} + 1 = {from_bottom}", 1))
return questions
def gen_syllogism(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Syllogism", "All/Some/No conclusions")
if not qtid: return questions
templates = [
("All A are B. All B are C.", "All A are C", ["No A is C", "Some A are not C", "All C are A"]),
("All A are B. Some B are C.", "Some A may be C", ["All A are C", "No A is C", "All C are A"]),
("No A is B. All B are C.", "Some C are not A", ["All A are C", "No C is A", "All C are A"]),
("Some A are B. All B are C.", "Some A are C", ["All A are C", "No A is C", "All C are A"]),
("All A are B. No B is C.", "No A is C", ["Some A are C", "All A are C", "All C are A"]),
]
categories = ["dogs", "cats", "birds", "students", "teachers", "doctors", "players", "singers",
"dancers", "painters", "writers", "engineers", "lawyers", "flowers", "trees"]
for _ in range(count):
template, correct, wrongs = random.choice(templates)
cats = random.sample(categories, 3)
stmt = template.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title())
ans = correct.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title())
wrong_list = [w.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title()) for w in wrongs]
questions.append(make_question(qtid,
f"Statements: {stmt}\nConclusion: Which follows?",
ans, wrong_list, f"Based on Venn diagram logic", 2))
return questions
# ============ NON-VERBAL REASONING ============
def gen_mirror_image(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Mirror and Water Image", "Mirror image of text/numbers")
if not qtid: return questions
for _ in range(count):
num = random.randint(100, 9999)
mirror = str(num)[::-1]
wrongs = [str(num + random.randint(1, 100)) for _ in range(3)]
questions.append(make_question(qtid,
f"What is the mirror image of the number {num} when a mirror is placed on the right side?",
mirror, wrongs,
f"Mirror reverses left-right: {num}{mirror}", 1))
return questions
def gen_dice(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Opposite face of dice")
if not qtid: return questions
for _ in range(count):
faces = list(range(1, 7))
# Standard dice: opposite faces sum to 7
num = random.randint(1, 6)
opp = 7 - num
questions.append(make_question(qtid,
f"On a standard die, what number is opposite to {num}?",
str(opp), [str(x) for x in range(1, 7) if x != num and x != opp][:3],
f"On a standard die, opposite faces sum to 7: {num} + {opp} = 7", 1))
return questions
def gen_cube_painting(conn, count=600):
questions = []
qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Painted cube counting")
if not qtid: return questions
for _ in range(count):
n = random.randint(2, 6)
total = n ** 3
three_face = 8 # corners
two_face = (n - 2) * 12 if n > 2 else 0
one_face = (n - 2) ** 2 * 6 if n > 2 else 0
no_face = (n - 2) ** 3 if n > 2 else 0
ask = random.choice(["three", "two", "one", "no"])
ans_map = {"three": three_face, "two": two_face, "one": one_face, "no": no_face}
ans = ans_map[ask]
questions.append(make_question(qtid,
f"A cube of side {n} is painted on all faces and then cut into {total} unit cubes. How many cubes have {ask} face(s) painted?",
str(ans), nearby_wrong(ans),
f"For {n}×{n}×{n} cube: {ask} faces painted = {ans}", 2))
return questions
# ============ MATHEMATICAL REASONING ============
def gen_math_operations(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Mathematical Operations", "Symbol substitution")
if not qtid: return questions
ops = {'+': lambda a, b: a + b, '-': lambda a, b: a - b,
'×': lambda a, b: a * b, '÷': lambda a, b: a // b}
symbols = ['@', '#', '$', '&', '*', '!']
for _ in range(count):
op_pairs = random.sample(list(ops.keys()), 2)
sym_pairs = random.sample(symbols, 2)
a, b, c = random.randint(2, 20), random.randint(2, 20), random.randint(2, 20)
mapping_text = f"{sym_pairs[0]} means '{op_pairs[0]}' and {sym_pairs[1]} means '{op_pairs[1]}'"
expr_text = f"{a} {sym_pairs[0]} {b} {sym_pairs[1]} {c}"
result = ops[op_pairs[1]](ops[op_pairs[0]](a, b), c)
questions.append(make_question(qtid,
f"If {mapping_text}, find: {expr_text}",
str(result), nearby_wrong(result),
f"Replace symbols: {a} {op_pairs[0]} {b} {op_pairs[1]} {c} = {result}", 2))
return questions
def gen_number_puzzles(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Number Puzzles", "Find missing number in grid")
if not qtid: return questions
for _ in range(count):
# 3x3 grid where rows/cols sum to same value
a, b = random.randint(1, 20), random.randint(1, 20)
c = a + b
d = random.randint(1, 20)
e = c - d + random.randint(1, 10)
missing = a + d - e + b # Some pattern
# Simpler: row sums are equal
r1 = [random.randint(1, 20) for _ in range(3)]
target = sum(r1)
r2_a, r2_b = random.randint(1, 15), random.randint(1, 15)
r2_c = target - r2_a - r2_b
if r2_c > 0:
questions.append(make_question(qtid,
f"In a grid, row 1 is [{r1[0]}, {r1[1]}, {r1[2]}] (sum={target}). Row 2 is [{r2_a}, {r2_b}, ?]. Find the missing number if row sums are equal.",
str(r2_c), nearby_wrong(r2_c),
f"? = {target} - {r2_a} - {r2_b} = {r2_c}", 1))
return questions
def gen_venn_diagram(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Venn Diagram", "Count elements in region")
if not qtid: return questions
for _ in range(count):
total_a = random.randint(20, 100)
total_b = random.randint(20, 100)
both = random.randint(5, min(total_a, total_b))
only_a = total_a - both
only_b = total_b - both
questions.append(make_question(qtid,
f"In a group, {total_a} like tea, {total_b} like coffee, and {both} like both. How many like only tea?",
str(only_a), nearby_wrong(only_a),
f"Only tea = {total_a} - {both} = {only_a}", 1))
return questions
# ============ CRITICAL THINKING ============
def gen_statement_conclusion(conn, count=800):
questions = []
qtid = get_qtid(conn, SUBJECT, "Critical Thinking", "Statement and Conclusion", "Which conclusion follows")
if not qtid: return questions
templates = [
("All students who study hard pass the exam.", "Some who pass studied hard", ["No one studies hard", "Everyone fails", "Studying is not needed"]),
("Regular exercise improves health.", "People who exercise are healthier", ["Exercise is harmful", "Health has no relation to exercise", "Only medicine improves health"]),
("Reading improves vocabulary.", "People who read more have better vocabulary", ["Reading is useless", "Vocabulary cannot be improved", "TV improves vocabulary more"]),
("Smoking causes cancer.", "Smokers are at higher risk of cancer", ["All smokers get cancer", "Cancer has no cause", "Smoking is healthy"]),
("Water pollution affects marine life.", "Marine life is harmed by water pollution", ["Marine life thrives in pollution", "Pollution has no effect", "Only air pollution matters"]),
]
for _ in range(count):
stmt, correct, wrongs = random.choice(templates)
questions.append(make_question(qtid,
f"Statement: {stmt}\nWhich conclusion logically follows?",
correct, wrongs, f"Direct logical inference", 2))
return questions
def gen_letter_series(conn, count=1000):
questions = []
qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Letter Series", "Find next letters")
if not qtid: return questions
for _ in range(count):
start = random.randint(0, 15)
skip = random.randint(1, 4)
series = [chr(65 + start + i * skip) for i in range(4) if start + i * skip < 26]
if len(series) < 4:
continue
nxt_idx = start + 4 * skip
if nxt_idx < 26:
ans = chr(65 + nxt_idx)
wrongs = [chr(65 + (nxt_idx + i) % 26) for i in [1, 2, -1]]
questions.append(make_question(qtid,
f"Find the next letter: {', '.join(series)}, ?",
ans, wrongs,
f"Skip {skip}: next = {ans}", 1))
return questions
def generate_all(conn):
"""Generate all Reasoning questions."""
generators = [
("Number Analogy", gen_number_analogy, 700),
("Letter Analogy", gen_letter_analogy, 500),
("Classification", gen_classification, 2000),
("Number Series", gen_number_series, 2000),
("Letter Series", gen_letter_series, 1500),
("Coding-Decoding", gen_coding_decoding, 2500),
("Blood Relations", gen_blood_relations, 2000),
("Direction & Distance", gen_direction, 1500),
("Order & Ranking", gen_ranking, 1500),
("Syllogism", gen_syllogism, 1500),
("Mirror Image", gen_mirror_image, 1000),
("Dice", gen_dice, 1000),
("Cube Painting", gen_cube_painting, 800),
("Math Operations", gen_math_operations, 1500),
("Number Puzzles", gen_number_puzzles, 1200),
("Venn Diagram", gen_venn_diagram, 1200),
("Statement & Conclusion", gen_statement_conclusion, 1200),
]
total = 0
all_questions = []
for name, gen_func, count in generators:
questions = gen_func(conn, count)
all_questions.extend(questions)
print(f" {name}: {len(questions)} questions")
total += len(questions)
batch_size = 5000
for i in range(0, len(all_questions), batch_size):
insert_questions_batch(conn, all_questions[i:i+batch_size])
print(f" TOTAL Reasoning: {total}")
return total
if __name__ == '__main__':
conn = get_db()
print("Generating Reasoning questions...")
generate_all(conn)
conn.close()