ssctopper/generators/english_generator.py

#!/usr/bin/env python3
"""
English Language & Comprehension Question Generator for SSC CGL.
Generates ~25,000 questions covering vocabulary, grammar, sentence structure, error detection.
"""
import random
import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from generators.base import make_question, get_qtid, get_db, insert_questions_batch

SUBJECT = "English Language and Comprehension"

# ============ WORD BANKS ============

SYNONYMS = [
    ("Abundant", "Plentiful", ["Scarce", "Meager", "Rare"]),
    ("Accurate", "Precise", ["Wrong", "Vague", "Incorrect"]),
    ("Admire", "Respect", ["Despise", "Hate", "Ignore"]),
    ("Affluent", "Wealthy", ["Poor", "Needy", "Destitute"]),
    ("Agile", "Nimble", ["Clumsy", "Slow", "Stiff"]),
    ("Amiable", "Friendly", ["Hostile", "Rude", "Cold"]),
    ("Ancient", "Old", ["Modern", "New", "Recent"]),
    ("Arduous", "Difficult", ["Easy", "Simple", "Effortless"]),
    ("Audacious", "Bold", ["Timid", "Meek", "Cowardly"]),
    ("Authentic", "Genuine", ["Fake", "False", "Counterfeit"]),
    ("Benevolent", "Kind", ["Cruel", "Malicious", "Harsh"]),
    ("Bizarre", "Strange", ["Normal", "Usual", "Ordinary"]),
    ("Candid", "Frank", ["Deceptive", "Dishonest", "Sly"]),
    ("Cautious", "Careful", ["Reckless", "Careless", "Rash"]),
    ("Comprehend", "Understand", ["Misunderstand", "Confuse", "Ignore"]),
    ("Conceal", "Hide", ["Reveal", "Expose", "Display"]),
    ("Contempt", "Scorn", ["Respect", "Admiration", "Regard"]),
    ("Courage", "Bravery", ["Cowardice", "Fear", "Timidity"]),
    ("Delight", "Joy", ["Sorrow", "Grief", "Misery"]),
    ("Diligent", "Hardworking", ["Lazy", "Idle", "Indolent"]),
    ("Diminish", "Reduce", ["Increase", "Enlarge", "Expand"]),
    ("Eloquent", "Expressive", ["Inarticulate", "Stammering", "Dull"]),
    ("Enormous", "Huge", ["Tiny", "Small", "Minute"]),
    ("Eternal", "Everlasting", ["Temporary", "Brief", "Fleeting"]),
    ("Exquisite", "Beautiful", ["Ugly", "Plain", "Crude"]),
    ("Feeble", "Weak", ["Strong", "Powerful", "Mighty"]),
    ("Ferocious", "Fierce", ["Gentle", "Mild", "Tame"]),
    ("Frigid", "Cold", ["Hot", "Warm", "Tropical"]),
    ("Generous", "Liberal", ["Stingy", "Miserly", "Tight"]),
    ("Gratitude", "Thankfulness", ["Ingratitude", "Ungratefulness", "Resentment"]),
    ("Halt", "Stop", ["Continue", "Proceed", "Advance"]),
    ("Hazardous", "Dangerous", ["Safe", "Secure", "Harmless"]),
    ("Hostile", "Unfriendly", ["Friendly", "Warm", "Kind"]),
    ("Immense", "Vast", ["Tiny", "Small", "Little"]),
    ("Impeccable", "Flawless", ["Faulty", "Defective", "Imperfect"]),
    ("Jubilant", "Joyful", ["Sad", "Gloomy", "Depressed"]),
    ("Keen", "Eager", ["Reluctant", "Unwilling", "Indifferent"]),
    ("Laudable", "Praiseworthy", ["Blameworthy", "Shameful", "Disgraceful"]),
    ("Lucid", "Clear", ["Confusing", "Vague", "Obscure"]),
    ("Magnificent", "Splendid", ["Ordinary", "Plain", "Dull"]),
    ("Meticulous", "Careful", ["Careless", "Sloppy", "Negligent"]),
    ("Mundane", "Ordinary", ["Extraordinary", "Unusual", "Special"]),
    ("Novice", "Beginner", ["Expert", "Veteran", "Professional"]),
    ("Obstinate", "Stubborn", ["Flexible", "Yielding", "Compliant"]),
    ("Opulent", "Luxurious", ["Poor", "Shabby", "Modest"]),
    ("Pacify", "Calm", ["Agitate", "Provoke", "Irritate"]),
    ("Prudent", "Wise", ["Foolish", "Reckless", "Imprudent"]),
    ("Replenish", "Refill", ["Drain", "Empty", "Deplete"]),
    ("Serene", "Calm", ["Turbulent", "Agitated", "Noisy"]),
    ("Tedious", "Boring", ["Interesting", "Exciting", "Engaging"]),
    ("Trivial", "Insignificant", ["Important", "Significant", "Vital"]),
    ("Ubiquitous", "Everywhere", ["Rare", "Scarce", "Uncommon"]),
    ("Valiant", "Brave", ["Cowardly", "Timid", "Fearful"]),
    ("Verbose", "Wordy", ["Concise", "Brief", "Terse"]),
    ("Wrath", "Anger", ["Calm", "Peace", "Happiness"]),
    ("Zealous", "Enthusiastic", ["Apathetic", "Indifferent", "Passive"]),
]

ANTONYMS = [
    ("Accept", "Reject"), ("Advance", "Retreat"), ("Ancient", "Modern"),
    ("Arrival", "Departure"), ("Ascend", "Descend"), ("Bold", "Timid"),
    ("Brave", "Cowardly"), ("Bright", "Dim"), ("Calm", "Agitated"),
    ("Create", "Destroy"), ("Dawn", "Dusk"), ("Defend", "Attack"),
    ("Expand", "Contract"), ("Forget", "Remember"), ("Generous", "Miserly"),
    ("Guilty", "Innocent"), ("Humble", "Proud"), ("Import", "Export"),
    ("Joy", "Sorrow"), ("Knowledge", "Ignorance"), ("Liberty", "Captivity"),
    ("Major", "Minor"), ("Natural", "Artificial"), ("Optimist", "Pessimist"),
    ("Peace", "War"), ("Rapid", "Slow"), ("Rigid", "Flexible"),
    ("Simple", "Complex"), ("Temporary", "Permanent"), ("Victory", "Defeat"),
    ("Wisdom", "Folly"), ("Zenith", "Nadir"), ("Transparent", "Opaque"),
    ("Voluntary", "Compulsory"), ("Shallow", "Deep"), ("Fertile", "Barren"),
    ("Concord", "Discord"), ("Benign", "Malignant"), ("Prolific", "Barren"),
    ("Affluent", "Destitute"),
]

ONE_WORD_SUBS = [
    ("A person who loves books", "Bibliophile", ["Bibliographer", "Librarian", "Bookworm"]),
    ("Government by the people", "Democracy", ["Monarchy", "Autocracy", "Oligarchy"]),
    ("One who hates mankind", "Misanthrope", ["Philanthropist", "Misogynist", "Anthropologist"]),
    ("A person who speaks two languages", "Bilingual", ["Polyglot", "Monoglot", "Linguist"]),
    ("A person who walks in sleep", "Somnambulist", ["Insomniac", "Sleepwalker", "Narcoleptic"]),
    ("Fear of water", "Hydrophobia", ["Aquaphobia", "Claustrophobia", "Acrophobia"]),
    ("Fear of heights", "Acrophobia", ["Hydrophobia", "Claustrophobia", "Agoraphobia"]),
    ("One who eats human flesh", "Cannibal", ["Carnivore", "Omnivore", "Herbivore"]),
    ("A word that is opposite in meaning", "Antonym", ["Synonym", "Homonym", "Acronym"]),
    ("Killing of a king", "Regicide", ["Homicide", "Genocide", "Fratricide"]),
    ("A place for keeping bees", "Apiary", ["Aviary", "Aquarium", "Nursery"]),
    ("One who knows everything", "Omniscient", ["Omnipresent", "Omnipotent", "Omnivore"]),
    ("Medicine that kills germs", "Antiseptic", ["Antibiotic", "Antidote", "Analgesic"]),
    ("A person who is 100 years old", "Centenarian", ["Octogenarian", "Nonagenarian", "Septuagenarian"]),
    ("Study of stars", "Astronomy", ["Astrology", "Cosmology", "Astrophysics"]),
    ("Government by a single person", "Autocracy", ["Democracy", "Monarchy", "Theocracy"]),
    ("One who does not believe in God", "Atheist", ["Theist", "Agnostic", "Pagan"]),
    ("A speech delivered without preparation", "Extempore", ["Impromptu", "Rehearsed", "Deliberate"]),
    ("One who lives on vegetables", "Vegetarian", ["Vegan", "Carnivore", "Omnivore"]),
    ("A place for keeping dead bodies", "Mortuary", ["Cemetery", "Crematorium", "Mausoleum"]),
    ("That which cannot be read", "Illegible", ["Eligible", "Legible", "Indelible"]),
    ("A person who cannot be corrected", "Incorrigible", ["Incurable", "Invincible", "Inevitable"]),
    ("One who is present everywhere", "Omnipresent", ["Omniscient", "Omnipotent", "Omnivore"]),
    ("One who looks on the bright side", "Optimist", ["Pessimist", "Realist", "Fatalist"]),
    ("Study of ancient things", "Archaeology", ["Anthropology", "Paleontology", "Geology"]),
]

IDIOMS = [
    ("Break the ice", "To initiate conversation in a social setting", ["To break something", "To melt ice", "To cool down"]),
    ("Burn the midnight oil", "To work or study late into the night", ["To waste oil", "To start a fire", "To cook at night"]),
    ("Cry over spilt milk", "To regret something that cannot be undone", ["To cry while drinking milk", "To waste milk", "To be sad about dairy"]),
    ("Hit the nail on the head", "To be exactly right", ["To do carpentry", "To hurt oneself", "To break something"]),
    ("A piece of cake", "Something very easy", ["A type of dessert", "A small portion", "A bakery item"]),
    ("Bite the bullet", "To face a difficult situation bravely", ["To eat ammunition", "To hurt teeth", "To be violent"]),
    ("Cost an arm and a leg", "Very expensive", ["Physical injury", "Amputation", "Medical procedure"]),
    ("Let the cat out of the bag", "To reveal a secret", ["To free an animal", "To open a bag", "To go shopping"]),
    ("Once in a blue moon", "Very rarely", ["During full moon", "Monthly", "Nightly"]),
    ("Raining cats and dogs", "Raining very heavily", ["Animals falling", "Pet show", "Zoo visit"]),
    ("Spill the beans", "To reveal secret information", ["To cook", "To waste food", "To plant seeds"]),
    ("The ball is in your court", "It is your turn to take action", ["Playing tennis", "Court hearing", "Ball game"]),
    ("Under the weather", "Feeling unwell", ["In rain", "Below clouds", "Bad climate"]),
    ("Actions speak louder than words", "What you do matters more than what you say", ["Being noisy", "Shouting", "Speaking loudly"]),
    ("Beat around the bush", "To avoid the main topic", ["Gardening", "Playing in bush", "Walking in forest"]),
    ("Burning bridges", "Destroying relationships", ["Arson", "Building fire", "Demolition"]),
    ("Every cloud has a silver lining", "Good things come after bad", ["Weather forecast", "Cloud watching", "Silver mining"]),
    ("Keep your chin up", "Stay positive", ["Posture advice", "Exercise tip", "Looking upward"]),
    ("Back to the drawing board", "Start over", ["Art class", "Going backwards", "Drawing pictures"]),
    ("Barking up the wrong tree", "Making a wrong assumption", ["Disturbing a dog", "Climbing trees", "Forest activity"]),
]

# ============ GENERATORS ============

def gen_synonyms(conn, count=2500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Synonyms", "Choose synonym")
    if not qtid: return questions
    for _ in range(count):
        word, syn, wrongs = random.choice(SYNONYMS)
        questions.append(make_question(qtid,
            f"Choose the synonym of '{word}':",
            syn, wrongs, f"'{word}' means '{syn}'", 1))
    return questions


def gen_antonyms(conn, count=2500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Antonyms", "Choose antonym")
    if not qtid: return questions
    for _ in range(count):
        word, ant = random.choice(ANTONYMS)
        other_ants = [a[1] for a in random.sample(ANTONYMS, 3) if a[0] != word][:3]
        if len(other_ants) < 3:
            other_ants = ["None", "All", "Some"][:3]
        questions.append(make_question(qtid,
            f"Choose the antonym of '{word}':",
            ant, other_ants, f"Opposite of '{word}' is '{ant}'", 1))
    return questions


def gen_one_word(conn, count=2000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Vocabulary", "One Word Substitution", "Find one word for phrase")
    if not qtid: return questions
    for _ in range(count):
        phrase, word, wrongs = random.choice(ONE_WORD_SUBS)
        questions.append(make_question(qtid,
            f"One word for: '{phrase}'",
            word, wrongs, f"'{phrase}' = {word}", 1))
    return questions


def gen_idioms(conn, count=2000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Idioms and Phrases", "Meaning of idiom")
    if not qtid: return questions
    for _ in range(count):
        idiom, meaning, wrongs = random.choice(IDIOMS)
        questions.append(make_question(qtid,
            f"What does the idiom '{idiom}' mean?",
            meaning, wrongs, f"'{idiom}' = {meaning}", 1))
    return questions


def gen_spelling(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Vocabulary", "Spelling Correction", "Choose correct spelling")
    if not qtid: return questions
    words = ["Accommodation", "Achievement", "Acknowledge", "Acquaintance", "Aggressive",
             "Apparently", "Argument", "Assassination", "Beautiful", "Beginning",
             "Believe", "Bureaucracy", "Calendar", "Changeable", "Committed",
             "Conscience", "Conscious", "Definitely", "Dilemma", "Disappear",
             "Disappoint", "Discipline", "Embarrass", "Environment", "Exaggerate",
             "Existence", "Experience", "Fascinate", "February", "Fluorescent",
             "Foreign", "Forty", "Government", "Guarantee", "Harass",
             "Hierarchy", "Humorous", "Hygiene", "Immediately", "Independent",
             "Intelligence", "Jewellery", "Judgement", "Knowledge", "Leisure",
             "License", "Maintenance", "Mediterranean", "Millennium", "Necessary",
             "Noticeable", "Occasion", "Occurrence", "Parliament", "Perseverance",
             "Pneumonia", "Possession", "Privilege", "Pronunciation", "Psychology",
             "Questionnaire", "Receive", "Recommend", "Rhythm", "Schedule",
             "Separate", "Successful", "Supersede", "Surprise", "Threshold",
             "Tomorrow", "Tyranny", "Unnecessary", "Vacuum", "Vegetable",
             "Wednesday", "Weird"]
    for _ in range(count):
        w = random.choice(words)
        # Create misspellings
        misspells = []
        for _ in range(3):
            idx = random.randint(1, len(w) - 2)
            chars = list(w)
            chars[idx] = random.choice('aeiou') if chars[idx] not in 'aeiou' else random.choice('bcdfg')
            m = "".join(chars)
            if m != w:
                misspells.append(m)
        while len(misspells) < 3:
            misspells.append(w[:-1] + random.choice('aeioust'))
        questions.append(make_question(qtid,
            f"Choose the correctly spelled word:",
            w, misspells[:3], f"Correct spelling: {w}", 1))
    return questions


def gen_tenses(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Grammar", "Tenses", "Fill in correct tense")
    if not qtid: return questions
    templates = [
        ("She ___ to school every day.", "goes", ["go", "went", "going"], "Simple Present"),
        ("They ___ playing football yesterday.", "were", ["was", "are", "is"], "Past Continuous"),
        ("He ___ the work by tomorrow.", "will finish", ["finished", "finishes", "finishing"], "Simple Future"),
        ("I ___ this book already.", "have read", ["had read", "read", "reading"], "Present Perfect"),
        ("She ___ dinner when I arrived.", "was cooking", ["cooked", "cooks", "cooking"], "Past Continuous"),
        ("We ___ here since morning.", "have been", ["are", "were", "was"], "Present Perfect Continuous"),
        ("The train ___ before we reached.", "had left", ["left", "leaves", "leaving"], "Past Perfect"),
        ("By next year, I ___ my degree.", "will have completed", ["complete", "completed", "completing"], "Future Perfect"),
        ("He ___ a letter now.", "is writing", ["writes", "wrote", "written"], "Present Continuous"),
        ("They ___ the match last week.", "won", ["win", "wins", "winning"], "Simple Past"),
        ("She ___ the piano since childhood.", "has been playing", ["plays", "played", "play"], "Present Perfect Continuous"),
        ("I ___ you tomorrow.", "will call", ["called", "call", "calling"], "Simple Future"),
    ]
    for _ in range(count):
        q_text, correct, wrongs, tense = random.choice(templates)
        questions.append(make_question(qtid, f"Fill in the blank: {q_text}",
            correct, wrongs, f"Tense: {tense}", 1))
    return questions


def gen_articles(conn, count=1000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Grammar", "Articles", "Fill in correct article")
    if not qtid: return questions
    templates = [
        ("___ apple a day keeps the doctor away.", "An", ["A", "The", "No article"]),
        ("He is ___ honest man.", "an", ["a", "the", "no article"]),
        ("___ sun rises in the east.", "The", ["A", "An", "No article"]),
        ("She is ___ doctor.", "a", ["an", "the", "no article"]),
        ("I saw ___ elephant in the zoo.", "an", ["a", "the", "no article"]),
        ("___ Ganges is a holy river.", "The", ["A", "An", "No article"]),
        ("He gave me ___ useful tip.", "a", ["an", "the", "no article"]),
        ("___ gold is a precious metal.", "No article", ["A", "An", "The"]),
        ("She is ___ European.", "a", ["an", "the", "no article"]),
        ("I need ___ umbrella.", "an", ["a", "the", "no article"]),
    ]
    for _ in range(count):
        q_text, correct, wrongs = random.choice(templates)
        questions.append(make_question(qtid, f"Fill in the correct article: {q_text}",
            correct, wrongs, f"Article rule applied", 1))
    return questions


def gen_prepositions(conn, count=1000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Grammar", "Prepositions", "Fill in preposition")
    if not qtid: return questions
    templates = [
        ("The book is ___ the table.", "on", ["in", "at", "by"]),
        ("She arrived ___ Monday.", "on", ["in", "at", "by"]),
        ("He lives ___ Mumbai.", "in", ["on", "at", "by"]),
        ("The meeting is ___ 3 PM.", "at", ["in", "on", "by"]),
        ("I have been waiting ___ morning.", "since", ["for", "from", "by"]),
        ("She is good ___ mathematics.", "at", ["in", "on", "with"]),
        ("He is fond ___ music.", "of", ["with", "in", "at"]),
        ("The cat jumped ___ the wall.", "over", ["on", "in", "at"]),
        ("She is interested ___ painting.", "in", ["on", "at", "by"]),
        ("He walked ___ the park.", "through", ["in", "on", "at"]),
        ("They traveled ___ train.", "by", ["in", "on", "with"]),
        ("The match starts ___ 5 o'clock.", "at", ["in", "on", "by"]),
    ]
    for _ in range(count):
        q_text, correct, wrongs = random.choice(templates)
        questions.append(make_question(qtid, f"Fill in the correct preposition: {q_text}",
            correct, wrongs, f"Preposition: {correct}", 1))
    return questions


def gen_voice(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Active and Passive Voice", "Convert to passive")
    if not qtid: return questions
    templates = [
        ("She writes a letter.", "A letter is written by her.", ["A letter was written by her.", "A letter were written by her.", "A letter has written by her."]),
        ("He plays cricket.", "Cricket is played by him.", ["Cricket was played by him.", "Cricket were played by him.", "Cricket has played by him."]),
        ("They are building a house.", "A house is being built by them.", ["A house was being built by them.", "A house has been built by them.", "A house is built by them."]),
        ("She cooked food.", "Food was cooked by her.", ["Food is cooked by her.", "Food has been cooked by her.", "Food was being cooked by her."]),
        ("I have finished the work.", "The work has been finished by me.", ["The work was finished by me.", "The work is finished by me.", "The work had been finished by me."]),
        ("The teacher teaches the students.", "The students are taught by the teacher.", ["The students were taught by the teacher.", "The students has been taught by the teacher.", "The students is taught by the teacher."]),
        ("He will write a book.", "A book will be written by him.", ["A book would be written by him.", "A book shall be written by him.", "A book is written by him."]),
        ("Ram killed Ravana.", "Ravana was killed by Ram.", ["Ravana is killed by Ram.", "Ravana has been killed by Ram.", "Ravana were killed by Ram."]),
    ]
    for _ in range(count):
        active, passive, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Convert to passive voice: '{active}'",
            passive, wrongs, f"Passive: {passive}", 1))
    return questions


def gen_direct_indirect(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Direct and Indirect Speech", "Convert to indirect speech")
    if not qtid: return questions
    templates = [
        ('He said, "I am happy."', 'He said that he was happy.',
         ['He said that he is happy.', 'He said that I am happy.', 'He told that he was happy.']),
        ('She said, "I will come tomorrow."', 'She said that she would come the next day.',
         ['She said that she will come tomorrow.', 'She told she would come next day.', 'She said she will come.']),
        ('He asked, "Where do you live?"', 'He asked where I lived.',
         ['He asked where do I live.', 'He asked that where I lived.', 'He asked me where I live.']),
        ('She said, "I have finished my work."', 'She said that she had finished her work.',
         ['She said that she has finished her work.', 'She told she had finished work.', 'She said she finished her work.']),
        ('The teacher said, "The Earth revolves around the Sun."', 'The teacher said that the Earth revolves around the Sun.',
         ['The teacher said the Earth revolved around the Sun.', 'The teacher told the Earth revolves around Sun.', 'The teacher said Earth revolving around Sun.']),
    ]
    for _ in range(count):
        direct, indirect, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Convert to indirect speech: {direct}",
            indirect, wrongs, f"Indirect: {indirect}", 1))
    return questions


def gen_sentence_improvement(conn, count=2000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Improvement", "Replace underlined part")
    if not qtid: return questions
    templates = [
        ("He don't know the answer.", "doesn't know", ["don't knows", "didn't knew", "not know"]),
        ("She is more taller than her sister.", "taller", ["most taller", "more tall", "tallest"]),
        ("I am going to market.", "to the market", ["in market", "for market", "at market"]),
        ("He told to me a story.", "told me", ["said to me", "tell me", "telling me"]),
        ("Each of the boys have done their work.", "has done his", ["have done his", "has did their", "have done their"]),
        ("One should do his duty.", "one's duty", ["their duty", "your duty", "our duty"]),
        ("She is knowing the answer.", "knows", ["is know", "was knowing", "has knowing"]),
        ("I am having a car.", "have", ["is having", "has", "having"]),
        ("He prevented me to go.", "from going", ["for going", "about going", "of going"]),
        ("She is elder than me.", "older than I", ["elder than I", "more elder than me", "oldest than me"]),
    ]
    for _ in range(count):
        sentence, correct, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Improve the sentence: '{sentence}'",
            correct, wrongs, f"Correct: {correct}", 2))
    return questions


def gen_error_detection(conn, count=2500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Error Detection", "Spot the Error", "Identify erroneous part")
    if not qtid: return questions
    templates = [
        ("He go to school daily.", "Part A: 'go' should be 'goes'", ["Part B: 'to' is wrong", "Part C: 'daily' is wrong", "No error"]),
        ("She don't like ice cream.", "Part A: 'don't' should be 'doesn't'", ["Part B: 'like' is wrong", "Part C is wrong", "No error"]),
        ("The news are good.", "Part A: 'are' should be 'is'", ["Part B: 'good' is wrong", "Part C is wrong", "No error"]),
        ("Mathematics are my favourite subject.", "Part A: 'are' should be 'is'", ["Part B is wrong", "Part C is wrong", "No error"]),
        ("Each of the students have passed.", "Part B: 'have' should be 'has'", ["Part A is wrong", "Part C is wrong", "No error"]),
        ("He is more stronger than me.", "Part A: 'more stronger' should be 'stronger'", ["Part B is wrong", "Part C is wrong", "No error"]),
        ("I am agree with you.", "Part A: 'am agree' should be 'agree'", ["Part B is wrong", "Part C is wrong", "No error"]),
        ("The furniture are expensive.", "Part A: 'are' should be 'is'", ["Part B is wrong", "Part C is wrong", "No error"]),
        ("He gave me a advise.", "Part B: 'advise' should be 'advice'", ["Part A is wrong", "Part C is wrong", "No error"]),
        ("She discuss about the matter.", "Part A: 'discuss about' should be 'discussed'", ["Part B is wrong", "Part C is wrong", "No error"]),
    ]
    for _ in range(count):
        sentence, error, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Find the error in: '{sentence}'",
            error, wrongs, f"Error: {error}", 2))
    return questions


def gen_fill_blanks(conn, count=2000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Completion", "Fill in the blank (single)")
    if not qtid: return questions
    templates = [
        ("Hard work is the key to ___.", "success", ["failure", "defeat", "loss"]),
        ("The weather is ___ today.", "pleasant", ["unpleasant", "horrible", "terrible"]),
        ("She showed great ___ in the face of danger.", "courage", ["cowardice", "fear", "hesitation"]),
        ("The doctor ___ the patient carefully.", "examined", ["ignored", "neglected", "avoided"]),
        ("He has a strong ___ for justice.", "passion", ["hatred", "dislike", "disregard"]),
        ("The students were ___ to learn the new topic.", "eager", ["reluctant", "unwilling", "indifferent"]),
        ("Her ___ attitude made everyone uncomfortable.", "arrogant", ["humble", "polite", "modest"]),
        ("The company achieved remarkable ___ this year.", "growth", ["decline", "loss", "failure"]),
        ("He is ___ of solving complex problems.", "capable", ["incapable", "unable", "unfit"]),
        ("The ___ of the river was very strong after the rain.", "current", ["calm", "stillness", "silence"]),
        ("She has an ___ personality that attracts people.", "amiable", ["hostile", "rude", "cold"]),
        ("The government ___ new policies for education.", "introduced", ["removed", "cancelled", "deleted"]),
    ]
    for _ in range(count):
        sentence, correct, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Fill in the blank: {sentence}",
            correct, wrongs, f"Answer: {correct}", 1))
    return questions


def gen_sentence_rearrangement(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Sentence Structure", "Sentence Rearrangement", "Arrange sentence parts")
    if not qtid: return questions
    templates = [
        (["The early bird", "catches", "the worm"], "The early bird catches the worm",
         ["catches the worm the early bird", "the worm catches the early bird", "the worm the early bird catches"]),
        (["Knowledge is", "better than", "wealth"], "Knowledge is better than wealth",
         ["better than wealth knowledge is", "wealth is better than knowledge", "is knowledge better than wealth"]),
        (["Honesty", "is the", "best policy"], "Honesty is the best policy",
         ["is the best policy honesty", "the best policy is honesty", "policy best is the honesty"]),
        (["United we stand", "divided", "we fall"], "United we stand divided we fall",
         ["divided we fall united we stand", "we fall divided united we stand", "stand united we divided fall we"]),
        (["Practice makes", "a man", "perfect"], "Practice makes a man perfect",
         ["a man perfect practice makes", "makes practice a man perfect", "perfect a man makes practice"]),
    ]
    for _ in range(count):
        parts, correct, wrongs = random.choice(templates)
        random.shuffle(parts)
        questions.append(make_question(qtid,
            f"Arrange in correct order: {' / '.join(parts)}",
            correct, wrongs, f"Correct order: {correct}", 2))
    return questions


def gen_cloze_test(conn, count=1500):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Comprehension", "Cloze Test", "Fill appropriate word")
    if not qtid: return questions
    templates = [
        ("Education is the most powerful ___ to change the world.", "weapon", ["weakness", "problem", "barrier"]),
        ("The ___ of success is hard work and dedication.", "foundation", ["destruction", "failure", "absence"]),
        ("A healthy mind lives in a healthy ___.", "body", ["house", "room", "place"]),
        ("Books are the ___ teachers of all time.", "best", ["worst", "slowest", "latest"]),
        ("Time and ___ wait for none.", "tide", ["money", "people", "luck"]),
        ("Prevention is better than ___.", "cure", ["disease", "medicine", "treatment"]),
        ("All that ___ is not gold.", "glitters", ["shines", "sparkles", "reflects"]),
        ("A rolling stone gathers no ___.", "moss", ["grass", "dust", "speed"]),
        ("Where there is a will, there is a ___.", "way", ["wall", "path", "road"]),
        ("Rome was not built in a ___.", "day", ["week", "month", "hour"]),
    ]
    for _ in range(count):
        sentence, correct, wrongs = random.choice(templates)
        if isinstance(wrongs, str):  # Fix the Rome template
            wrongs = [wrongs, "year", "hour"]
        questions.append(make_question(qtid,
            f"Fill in the blank: {sentence}",
            correct, wrongs, f"Answer: {correct}", 1))
    return questions


def gen_subject_verb(conn, count=1000):
    questions = []
    qtid = get_qtid(conn, SUBJECT, "Grammar", "Subject-Verb Agreement", "Choose correct verb")
    if not qtid: return questions
    templates = [
        ("The team ___ playing well this season.", "is", ["are", "were", "have been"]),
        ("Neither the teacher nor the students ___ present.", "were", ["was", "is", "has been"]),
        ("Each of the boys ___ given a prize.", "was", ["were", "are", "have been"]),
        ("Either you or I ___ going to attend.", "am", ["are", "is", "were"]),
        ("The quality of these apples ___ good.", "is", ["are", "were", "have been"]),
        ("Bread and butter ___ my favourite breakfast.", "is", ["are", "were", "have been"]),
        ("One of my friends ___ from Delhi.", "is", ["are", "were", "have been"]),
        ("The news ___ very surprising.", "was", ["were", "are", "have been"]),
        ("No news ___ good news.", "is", ["are", "were", "have been"]),
        ("Mathematics ___ my favourite subject.", "is", ["are", "were", "have been"]),
    ]
    for _ in range(count):
        sentence, correct, wrongs = random.choice(templates)
        questions.append(make_question(qtid,
            f"Choose the correct verb: {sentence}",
            correct, wrongs, f"Correct: {correct}", 2))
    return questions


def generate_all(conn):
    """Generate all English Language questions."""
    generators = [
        ("Synonyms", gen_synonyms, 2500),
        ("Antonyms", gen_antonyms, 2500),
        ("One Word Substitution", gen_one_word, 2000),
        ("Idioms & Phrases", gen_idioms, 2000),
        ("Spelling", gen_spelling, 1500),
        ("Tenses", gen_tenses, 1500),
        ("Articles", gen_articles, 1000),
        ("Prepositions", gen_prepositions, 1000),
        ("Subject-Verb Agreement", gen_subject_verb, 1000),
        ("Active/Passive Voice", gen_voice, 1500),
        ("Direct/Indirect Speech", gen_direct_indirect, 1500),
        ("Sentence Improvement", gen_sentence_improvement, 2000),
        ("Error Detection", gen_error_detection, 2500),
        ("Fill in Blanks", gen_fill_blanks, 2000),
        ("Sentence Rearrangement", gen_sentence_rearrangement, 1500),
        ("Cloze Test", gen_cloze_test, 1500),
    ]

    total = 0
    all_questions = []
    for name, gen_func, count in generators:
        questions = gen_func(conn, count)
        all_questions.extend(questions)
        print(f"  {name}: {len(questions)} questions")
        total += len(questions)

    batch_size = 5000
    for i in range(0, len(all_questions), batch_size):
        insert_questions_batch(conn, all_questions[i:i+batch_size])

    print(f"  TOTAL English: {total}")
    return total


if __name__ == '__main__':
    conn = get_db()
    print("Generating English Language questions...")
    generate_all(conn)
    conn.close()