#!/usr/bin/env python3 """ General Intelligence & Reasoning Question Generator for SSC CGL. Generates ~25,000 template-based reasoning questions. """ import random import string import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from generators.base import make_question, get_qtid, nearby_wrong, get_db, insert_questions_batch SUBJECT = "General Intelligence and Reasoning" # ============ VERBAL REASONING ============ def gen_number_analogy(conn, count=700): questions = [] qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Number analogy") if not qtid: return questions for _ in range(count): a = random.randint(2, 20) ops = [ (a*a, lambda x: x*x, "square"), (a*a*a, lambda x: x*x*x, "cube"), (a*2, lambda x: x*2, "double"), (a+5, lambda x: x+5, "add 5"), ] result_a, func, rule = random.choice(ops) b = random.randint(2, 20) while b == a: b = random.randint(2, 20) result_b = func(b) questions.append(make_question(qtid, f"{a} : {result_a} :: {b} : ?", str(result_b), nearby_wrong(result_b), f"Rule: {rule}. {a}→{result_a}, {b}→{result_b}", 1)) return questions def gen_letter_analogy(conn, count=500): questions = [] qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Analogy", "Letter analogy") if not qtid: return questions for _ in range(count): shift = random.randint(1, 5) a = random.randint(0, 20) pair1 = chr(65+a) + chr(65+a+shift) b = random.randint(0, 20) while b == a: b = random.randint(0, 20) pair2_q = chr(65+b) pair2_a = chr(65+b+shift) wrongs = [chr(65 + (b+shift+i) % 26) for i in [1, 2, -1]] questions.append(make_question(qtid, f"{pair1} : {pair2_q}?", pair2_q + pair2_a, [pair2_q + w for w in wrongs], f"Shift by {shift}: {pair1} → {pair2_q}{pair2_a}", 1)) return questions def gen_classification(conn, count=1500): questions = [] # Number classification (odd one out) qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Number classification") if qtid: for _ in range(count // 2): t = random.choice(["even", "odd", "prime", "square"]) if t == "even": group = [random.randint(1, 50) * 2 for _ in range(3)] odd_one = random.randint(1, 50) * 2 + 1 elif t == "odd": group = [random.randint(0, 49) * 2 + 1 for _ in range(3)] odd_one = random.randint(1, 50) * 2 elif t == "prime": primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47] group = random.sample(primes, 3) odd_one = random.choice([4,6,8,9,10,12,14,15,16,18,20,21,22]) else: squares = [1,4,9,16,25,36,49,64,81,100] group = random.sample(squares, 3) odd_one = random.choice([2,3,5,6,7,8,10,11,12,13,14,15]) all_opts = group + [odd_one] random.shuffle(all_opts) questions.append(make_question(qtid, f"Find the odd one out: {', '.join(map(str, all_opts))}", str(odd_one), [str(x) for x in group], f"{odd_one} is not {t}", 1)) qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Classification", "Word classification") if qtid: word_groups = [ (["Apple", "Mango", "Banana", "Orange"], "Carrot", "Fruits"), (["Dog", "Cat", "Lion", "Tiger"], "Eagle", "Mammals"), (["Red", "Blue", "Green", "Yellow"], "Square", "Colors"), (["Delhi", "Mumbai", "Chennai", "Kolkata"], "India", "Cities"), (["Pen", "Pencil", "Marker", "Crayon"], "Book", "Writing tools"), (["Piano", "Guitar", "Violin", "Flute"], "Painting", "Instruments"), (["January", "March", "May", "July"], "Monday", "Months"), (["Mercury", "Venus", "Mars", "Jupiter"], "Moon", "Planets"), (["Nile", "Amazon", "Ganges", "Thames"], "Sahara", "Rivers"), (["Football", "Cricket", "Tennis", "Hockey"], "Chess", "Outdoor sports"), ] for _ in range(count // 2): group, odd, reason = random.choice(word_groups) display = random.sample(group[:3], 3) + [odd] random.shuffle(display) questions.append(make_question(qtid, f"Find the odd one out: {', '.join(display)}", odd, [x for x in display if x != odd][:3], f"{odd} is not in the category: {reason}", 1)) return questions def gen_number_series(conn, count=1500): questions = [] qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Number Series", "Find next number") if not qtid: return questions for _ in range(count): series_type = random.choice(["add", "multiply", "square", "alternate", "diff"]) if series_type == "add": start = random.randint(1, 50) d = random.randint(2, 15) series = [start + i * d for i in range(5)] ans = start + 5 * d elif series_type == "multiply": start = random.randint(1, 5) r = random.choice([2, 3]) series = [start * (r ** i) for i in range(5)] ans = start * (r ** 5) elif series_type == "square": start = random.randint(1, 8) series = [(start + i) ** 2 for i in range(5)] ans = (start + 5) ** 2 elif series_type == "alternate": a, b = random.randint(1, 10), random.randint(1, 10) series = [] for i in range(5): series.append(series[-1] + a if i % 2 == 0 else series[-1] + b) if series else series.append(random.randint(1, 20)) if i == 0: continue if i % 2 == 1: series[-1] = series[-2] + a else: series[-1] = series[-2] + b ans = series[-1] + (a if len(series) % 2 == 1 else b) else: # increasing difference start = random.randint(1, 10) series = [start] d = random.randint(1, 5) for i in range(4): series.append(series[-1] + d + i) ans = series[-1] + d + 4 series_str = ", ".join(map(str, series)) questions.append(make_question(qtid, f"Find the next number in the series: {series_str}, ?", str(ans), nearby_wrong(ans), f"Pattern: {series_type}. Next = {ans}", random.choice([1, 2]))) return questions def gen_coding_decoding(conn, count=2000): questions = [] qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Letter coding") if qtid: for _ in range(count // 2): shift = random.randint(1, 5) word = random.choice(["COME", "GONE", "HELP", "LOVE", "MIND", "PLAY", "ROSE", "SING", "TALK", "WIND", "BACK", "DEEP", "FAST", "GIRL", "HOME", "JUST", "KING", "LAMP", "NAME", "OPEN"]) coded = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word) word2 = random.choice(["BALL", "CAKE", "DARK", "EASY", "FISH", "GOOD", "HAND", "IDOL", "JOKE", "KEEP"]) coded2 = "".join(chr((ord(c) - 65 + shift) % 26 + 65) for c in word2) wrongs = [] for s in [shift+1, shift-1, shift+2]: wrongs.append("".join(chr((ord(c) - 65 + s) % 26 + 65) for c in word2)) questions.append(make_question(qtid, f"If {word} is coded as {coded}, then {word2} is coded as?", coded2, wrongs, f"Each letter shifted by +{shift}", 2)) qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Coding-Decoding", "Number coding") if qtid: for _ in range(count // 2): word = random.choice(["CAT", "DOG", "SUN", "PEN", "CUP", "BOX", "HAT", "MAP", "JAR", "FAN"]) code = [random.randint(1, 9) for _ in word] code_str = "".join(map(str, code)) word2 = random.choice(["BAT", "LOG", "RUN", "HEN", "BUS", "FOX", "RAT", "TAP"]) # Same position mapping mapping = {c: str(v) for c, v in zip(word, code)} code2 = "".join(mapping.get(c, str(random.randint(1, 9))) for c in word2) wrongs = [str(int(code2) + i) for i in [11, -22, 33]] questions.append(make_question(qtid, f"If {word} = {code_str}, then {word2} = ?", code2, wrongs, f"Letter-to-number mapping from {word}={code_str}", 2)) return questions # ============ LOGICAL REASONING ============ def gen_blood_relations(conn, count=1500): questions = [] qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Blood Relations", "Direct relation") if not qtid: return questions templates = [ ("A is the father of B. B is the sister of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]), ("A is the mother of B. B is the brother of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]), ("A is the brother of B. B is the son of C. What is A to C?", "Son", ["Nephew", "Brother", "Father"]), ("A is the sister of B. B is the daughter of C. What is A to C?", "Daughter", ["Niece", "Sister", "Mother"]), ("A is the husband of B. B is the mother of C. What is A to C?", "Father", ["Uncle", "Brother", "Grandfather"]), ("A is the wife of B. B is the father of C. What is A to C?", "Mother", ["Aunt", "Sister", "Grandmother"]), ("A's father is B's son. What is B to A?", "Grandfather", ["Father", "Uncle", "Brother"]), ("A's mother is B's daughter. What is B to A?", "Grandmother", ["Mother", "Aunt", "Sister"]), ("A is B's brother's wife. What is A to B?", "Sister-in-law", ["Sister", "Cousin", "Aunt"]), ("A is B's father's brother. What is A to B?", "Uncle", ["Father", "Cousin", "Grandfather"]), ] for _ in range(count): q_text, correct, wrongs = random.choice(templates) names = random.sample(["P", "Q", "R", "S", "T", "M", "N", "X", "Y", "Z"], 3) q_text = q_text.replace("A", names[0]).replace("B", names[1]).replace("C", names[2]) questions.append(make_question(qtid, q_text, correct, wrongs, f"Following family relationships, the answer is {correct}", 2)) return questions def gen_direction(conn, count=1200): questions = [] qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Direction and Distance", "Find final direction") if not qtid: return questions directions = ["North", "South", "East", "West"] turns = {"North": {"right": "East", "left": "West"}, "South": {"right": "West", "left": "East"}, "East": {"right": "South", "left": "North"}, "West": {"right": "North", "left": "South"}} for _ in range(count): start = random.choice(directions) num_turns = random.randint(1, 3) current = start steps_desc = [f"starts facing {start}"] for _ in range(num_turns): turn = random.choice(["right", "left"]) current = turns[current][turn] steps_desc.append(f"turns {turn}") wrong_dirs = [d for d in directions if d != current] questions.append(make_question(qtid, f"A person {', '.join(steps_desc)}. Which direction is the person facing now?", current, wrong_dirs[:3], f"After turns: {current}", 1)) return questions def gen_ranking(conn, count=1200): questions = [] qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Order and Ranking", "Find rank from top/bottom") if not qtid: return questions for _ in range(count): total = random.randint(20, 60) from_top = random.randint(1, total) from_bottom = total - from_top + 1 ask = random.choice(["top", "bottom"]) if ask == "top": questions.append(make_question(qtid, f"In a row of {total} students, a student is {from_bottom}th from the bottom. What is the student's position from the top?", str(from_top), nearby_wrong(from_top), f"From top = Total - From bottom + 1 = {total} - {from_bottom} + 1 = {from_top}", 1)) else: questions.append(make_question(qtid, f"In a row of {total} students, a student is {from_top}th from the top. What is the student's position from the bottom?", str(from_bottom), nearby_wrong(from_bottom), f"From bottom = Total - From top + 1 = {total} - {from_top} + 1 = {from_bottom}", 1)) return questions def gen_syllogism(conn, count=1000): questions = [] qtid = get_qtid(conn, SUBJECT, "Logical Reasoning", "Syllogism", "All/Some/No conclusions") if not qtid: return questions templates = [ ("All A are B. All B are C.", "All A are C", ["No A is C", "Some A are not C", "All C are A"]), ("All A are B. Some B are C.", "Some A may be C", ["All A are C", "No A is C", "All C are A"]), ("No A is B. All B are C.", "Some C are not A", ["All A are C", "No C is A", "All C are A"]), ("Some A are B. All B are C.", "Some A are C", ["All A are C", "No A is C", "All C are A"]), ("All A are B. No B is C.", "No A is C", ["Some A are C", "All A are C", "All C are A"]), ] categories = ["dogs", "cats", "birds", "students", "teachers", "doctors", "players", "singers", "dancers", "painters", "writers", "engineers", "lawyers", "flowers", "trees"] for _ in range(count): template, correct, wrongs = random.choice(templates) cats = random.sample(categories, 3) stmt = template.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title()) ans = correct.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title()) wrong_list = [w.replace("A", cats[0].title()).replace("B", cats[1].title()).replace("C", cats[2].title()) for w in wrongs] questions.append(make_question(qtid, f"Statements: {stmt}\nConclusion: Which follows?", ans, wrong_list, f"Based on Venn diagram logic", 2)) return questions # ============ NON-VERBAL REASONING ============ def gen_mirror_image(conn, count=800): questions = [] qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Mirror and Water Image", "Mirror image of text/numbers") if not qtid: return questions for _ in range(count): num = random.randint(100, 9999) mirror = str(num)[::-1] wrongs = [str(num + random.randint(1, 100)) for _ in range(3)] questions.append(make_question(qtid, f"What is the mirror image of the number {num} when a mirror is placed on the right side?", mirror, wrongs, f"Mirror reverses left-right: {num} → {mirror}", 1)) return questions def gen_dice(conn, count=800): questions = [] qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Opposite face of dice") if not qtid: return questions for _ in range(count): faces = list(range(1, 7)) # Standard dice: opposite faces sum to 7 num = random.randint(1, 6) opp = 7 - num questions.append(make_question(qtid, f"On a standard die, what number is opposite to {num}?", str(opp), [str(x) for x in range(1, 7) if x != num and x != opp][:3], f"On a standard die, opposite faces sum to 7: {num} + {opp} = 7", 1)) return questions def gen_cube_painting(conn, count=600): questions = [] qtid = get_qtid(conn, SUBJECT, "Non-Verbal Reasoning", "Dice and Cube", "Painted cube counting") if not qtid: return questions for _ in range(count): n = random.randint(2, 6) total = n ** 3 three_face = 8 # corners two_face = (n - 2) * 12 if n > 2 else 0 one_face = (n - 2) ** 2 * 6 if n > 2 else 0 no_face = (n - 2) ** 3 if n > 2 else 0 ask = random.choice(["three", "two", "one", "no"]) ans_map = {"three": three_face, "two": two_face, "one": one_face, "no": no_face} ans = ans_map[ask] questions.append(make_question(qtid, f"A cube of side {n} is painted on all faces and then cut into {total} unit cubes. How many cubes have {ask} face(s) painted?", str(ans), nearby_wrong(ans), f"For {n}×{n}×{n} cube: {ask} faces painted = {ans}", 2)) return questions # ============ MATHEMATICAL REASONING ============ def gen_math_operations(conn, count=1000): questions = [] qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Mathematical Operations", "Symbol substitution") if not qtid: return questions ops = {'+': lambda a, b: a + b, '-': lambda a, b: a - b, '×': lambda a, b: a * b, '÷': lambda a, b: a // b} symbols = ['@', '#', '$', '&', '*', '!'] for _ in range(count): op_pairs = random.sample(list(ops.keys()), 2) sym_pairs = random.sample(symbols, 2) a, b, c = random.randint(2, 20), random.randint(2, 20), random.randint(2, 20) mapping_text = f"{sym_pairs[0]} means '{op_pairs[0]}' and {sym_pairs[1]} means '{op_pairs[1]}'" expr_text = f"{a} {sym_pairs[0]} {b} {sym_pairs[1]} {c}" result = ops[op_pairs[1]](ops[op_pairs[0]](a, b), c) questions.append(make_question(qtid, f"If {mapping_text}, find: {expr_text}", str(result), nearby_wrong(result), f"Replace symbols: {a} {op_pairs[0]} {b} {op_pairs[1]} {c} = {result}", 2)) return questions def gen_number_puzzles(conn, count=800): questions = [] qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Number Puzzles", "Find missing number in grid") if not qtid: return questions for _ in range(count): # 3x3 grid where rows/cols sum to same value a, b = random.randint(1, 20), random.randint(1, 20) c = a + b d = random.randint(1, 20) e = c - d + random.randint(1, 10) missing = a + d - e + b # Some pattern # Simpler: row sums are equal r1 = [random.randint(1, 20) for _ in range(3)] target = sum(r1) r2_a, r2_b = random.randint(1, 15), random.randint(1, 15) r2_c = target - r2_a - r2_b if r2_c > 0: questions.append(make_question(qtid, f"In a grid, row 1 is [{r1[0]}, {r1[1]}, {r1[2]}] (sum={target}). Row 2 is [{r2_a}, {r2_b}, ?]. Find the missing number if row sums are equal.", str(r2_c), nearby_wrong(r2_c), f"? = {target} - {r2_a} - {r2_b} = {r2_c}", 1)) return questions def gen_venn_diagram(conn, count=800): questions = [] qtid = get_qtid(conn, SUBJECT, "Mathematical Reasoning", "Venn Diagram", "Count elements in region") if not qtid: return questions for _ in range(count): total_a = random.randint(20, 100) total_b = random.randint(20, 100) both = random.randint(5, min(total_a, total_b)) only_a = total_a - both only_b = total_b - both questions.append(make_question(qtid, f"In a group, {total_a} like tea, {total_b} like coffee, and {both} like both. How many like only tea?", str(only_a), nearby_wrong(only_a), f"Only tea = {total_a} - {both} = {only_a}", 1)) return questions # ============ CRITICAL THINKING ============ def gen_statement_conclusion(conn, count=800): questions = [] qtid = get_qtid(conn, SUBJECT, "Critical Thinking", "Statement and Conclusion", "Which conclusion follows") if not qtid: return questions templates = [ ("All students who study hard pass the exam.", "Some who pass studied hard", ["No one studies hard", "Everyone fails", "Studying is not needed"]), ("Regular exercise improves health.", "People who exercise are healthier", ["Exercise is harmful", "Health has no relation to exercise", "Only medicine improves health"]), ("Reading improves vocabulary.", "People who read more have better vocabulary", ["Reading is useless", "Vocabulary cannot be improved", "TV improves vocabulary more"]), ("Smoking causes cancer.", "Smokers are at higher risk of cancer", ["All smokers get cancer", "Cancer has no cause", "Smoking is healthy"]), ("Water pollution affects marine life.", "Marine life is harmed by water pollution", ["Marine life thrives in pollution", "Pollution has no effect", "Only air pollution matters"]), ] for _ in range(count): stmt, correct, wrongs = random.choice(templates) questions.append(make_question(qtid, f"Statement: {stmt}\nWhich conclusion logically follows?", correct, wrongs, f"Direct logical inference", 2)) return questions def gen_letter_series(conn, count=1000): questions = [] qtid = get_qtid(conn, SUBJECT, "Verbal Reasoning", "Letter Series", "Find next letters") if not qtid: return questions for _ in range(count): start = random.randint(0, 15) skip = random.randint(1, 4) series = [chr(65 + start + i * skip) for i in range(4) if start + i * skip < 26] if len(series) < 4: continue nxt_idx = start + 4 * skip if nxt_idx < 26: ans = chr(65 + nxt_idx) wrongs = [chr(65 + (nxt_idx + i) % 26) for i in [1, 2, -1]] questions.append(make_question(qtid, f"Find the next letter: {', '.join(series)}, ?", ans, wrongs, f"Skip {skip}: next = {ans}", 1)) return questions def generate_all(conn): """Generate all Reasoning questions.""" generators = [ ("Number Analogy", gen_number_analogy, 700), ("Letter Analogy", gen_letter_analogy, 500), ("Classification", gen_classification, 2000), ("Number Series", gen_number_series, 2000), ("Letter Series", gen_letter_series, 1500), ("Coding-Decoding", gen_coding_decoding, 2500), ("Blood Relations", gen_blood_relations, 2000), ("Direction & Distance", gen_direction, 1500), ("Order & Ranking", gen_ranking, 1500), ("Syllogism", gen_syllogism, 1500), ("Mirror Image", gen_mirror_image, 1000), ("Dice", gen_dice, 1000), ("Cube Painting", gen_cube_painting, 800), ("Math Operations", gen_math_operations, 1500), ("Number Puzzles", gen_number_puzzles, 1200), ("Venn Diagram", gen_venn_diagram, 1200), ("Statement & Conclusion", gen_statement_conclusion, 1200), ] total = 0 all_questions = [] for name, gen_func, count in generators: questions = gen_func(conn, count) all_questions.extend(questions) print(f" {name}: {len(questions)} questions") total += len(questions) batch_size = 5000 for i in range(0, len(all_questions), batch_size): insert_questions_batch(conn, all_questions[i:i+batch_size]) print(f" TOTAL Reasoning: {total}") return total if __name__ == '__main__': conn = get_db() print("Generating Reasoning questions...") generate_all(conn) conn.close()