from __future__ import annotations

from datetime import date, datetime
import json
import re
from pathlib import Path
from typing import Any

from openpyxl import load_workbook


WORKBOOK_NAME = "Copia de CEFyT ALUMNOS - .xlsx"
CAREER_CS = "Profesorado de Ciencias Sagradas"
CAREER_BT = "Bachillerato en Teología"


def normalize_text(value: Any) -> str:
    if value is None:
        return ""
    return re.sub(r"\s+", " ", str(value)).strip()


def career_from_text(value: str) -> str | None:
    text = normalize_text(value).lower()
    if not text:
        return None
    if "profesorado" in text or "ciencias sagradas" in text:
        return CAREER_CS
    if "bachill" in text:
        return CAREER_BT
    return None


def is_bachillerato_plan(plan_name: str | None) -> bool:
    return "quinquenio" in normalize_text(plan_name).lower()


def infer_first_year_career(subject_name: str) -> str:
    subject = normalize_text(subject_name).lower()
    if subject.startswith("pedagog"):
        return CAREER_CS
    return CAREER_BT


def year_sort(label: str) -> int:
    clean = normalize_text(label).upper()
    mapping = {
        "AÑO PROPEDÉUTICO": 0,
        "AÃ‘O PROPEDÃ‰UTICO": 0,
        "PRIMER AÑO": 1,
        "PRIMER AÃ‘O": 1,
        "1° AÑO": 1,
        "1Â° AÃ‘O": 1,
        "SEGUNDO AÑO": 2,
        "SEGUNDO AÃ‘O": 2,
        "2° AÑO": 2,
        "2Â° AÃ‘O": 2,
        "TERCER AÑO": 3,
        "TERCER AÃ‘O": 3,
        "3° AÑO": 3,
        "3Â° AÃ‘O": 3,
        "CUARTO AÑO": 4,
        "CUARTO AÃ‘O": 4,
        "4° AÑO": 4,
        "4Â° AÃ‘O": 4,
        "QUINTO AÑO": 5,
        "QUINTO AÃ‘O": 5,
        "5° AÑO": 5,
        "5Â° AÃ‘O": 5,
    }
    return mapping.get(clean, 999)


def grade_to_string(value: Any) -> str | None:
    if value is None:
        return None
    if isinstance(value, int):
        return str(value)
    if isinstance(value, float):
        if value.is_integer():
            return str(int(value))
        return str(value).replace(".", ",")
    text = normalize_text(value)
    return text or None


def date_to_string(value: Any) -> str | None:
    if value is None:
        return None
    if isinstance(value, datetime):
        return value.date().isoformat()
    if isinstance(value, date):
        return value.isoformat()
    text = normalize_text(value)
    if not text or text in {"-", "None"}:
        return None

    for fmt in ("%d/%m/%Y", "%d/%m%Y", "%d/%m/%y"):
        try:
            return datetime.strptime(text, fmt).date().isoformat()
        except ValueError:
            continue

    return None


def is_current_header(row: list[str]) -> bool:
    return row[0].lower() in {"n°", "nÂ°"} and "espacio curricular" in row[1].lower()


def is_legacy_header(row: list[str]) -> bool:
    return row[0].lower() == "fecha" and "asignaturas" in row[1].lower()


def is_year_header(value: str) -> bool:
    text = normalize_text(value).upper()
    return bool(
        re.match(r"^(PRIMER|SEGUNDO|TERCER|CUARTO|QUINTO)\s+A(N|Ñ)O$", text)
        or re.match(r"^\d[°Â°]?\s*A(N|Ñ)O$", text)
        or text in {"AÑO PROPEDÉUTICO", "AÃ‘O PROPEDÃ‰UTICO"}
    )


def parse_sheet(ws) -> dict[str, Any]:
    source_sheet_name = ws.title
    title_career = career_from_text(ws["A1"].value or "")
    plan_name = None
    resolution = None
    legajo_parts: list[str] = []
    student_name = normalize_text(ws["C6"].value) or normalize_text(source_sheet_name)

    for row_idx in range(1, min(ws.max_row, 12) + 1):
        col_b = normalize_text(ws.cell(row_idx, 2).value).lower()
        if col_b == "plan":
            plan_name = normalize_text(ws.cell(row_idx, 3).value) or None
            if plan_name and not str(plan_name).lower().startswith("plan"):
                plan_name = f"Plan {plan_name}"
        elif col_b.startswith("res. min."):
            resolution = normalize_text(ws.cell(row_idx, 3).value) or None
        elif col_b.startswith(("n° legajo", "nÂ° legajo")):
            for col in range(3, 7):
                part = normalize_text(ws.cell(row_idx, col).value)
                if part:
                    legajo_parts.append(part)

    rows = []
    current_career = CAREER_BT if is_bachillerato_plan(plan_name) else (title_career or CAREER_CS)
    current_profile: dict[str, Any] | None = None
    current_year = None
    current_year_sort = 999
    mode = None
    profile_order = 0
    observations: list[str] = []

    def ensure_profile(career_name: str) -> dict[str, Any]:
        nonlocal current_profile, profile_order
        if current_profile and current_profile["career_name"] == career_name and not current_profile["closed"]:
            return current_profile

        profile_order += 1
        current_profile = {
            "career_name": career_name,
            "plan_name": plan_name,
            "resolution_text": resolution,
            "legajo_text": " ".join(legajo_parts) or None,
            "display_order": profile_order,
            "notes": None,
            "entries": [],
            "closed": False,
        }
        rows.append(current_profile)
        return current_profile

    def maybe_switch_career_for_restarted_first_year(next_year_sort: int) -> None:
        nonlocal current_profile, current_career
        if (
            current_profile
            and current_profile["entries"]
            and current_profile["career_name"] == CAREER_CS
            and current_year_sort >= 4
            and next_year_sort == 1
        ):
            current_profile["closed"] = True
            current_profile = None
            current_career = CAREER_BT

    def maybe_adjust_profile_career(subject_name: str) -> None:
        if current_profile is None or current_year_sort != 1 or current_profile["entries"]:
            return
        if is_bachillerato_plan(plan_name):
            current_profile["career_name"] = CAREER_BT
            return
        current_profile["career_name"] = infer_first_year_career(subject_name)

    for row_idx in range(1, ws.max_row + 1):
        first_eight = [normalize_text(ws.cell(row_idx, col).value) for col in range(1, 9)]
        if not any(first_eight):
            continue

        row_text = " ".join(part for part in first_eight if part)
        row_career = None
        if not is_current_header(first_eight) and not is_legacy_header(first_eight):
            row_career = career_from_text(row_text)

        if row_career and row_idx != 1:
            current_career = row_career
            if current_profile:
                current_profile["closed"] = True
            current_profile = None
            current_year = None
            current_year_sort = 999
            mode = None
            continue

        if is_year_header(first_eight[0]):
            next_year = first_eight[0]
            next_year_sort = year_sort(next_year)
            maybe_switch_career_for_restarted_first_year(next_year_sort)
            ensure_profile(current_career)
            current_year = next_year
            current_year_sort = next_year_sort
            mode = None
            continue

        if first_eight[1].lower().startswith("observaciones"):
            note = normalize_text(ws.cell(row_idx + 1, 2).value)
            if note:
                observations.append(note)
            continue

        if is_current_header(first_eight):
            ensure_profile(current_career)
            mode = "current"
            continue

        if is_legacy_header(first_eight):
            ensure_profile(current_career)
            mode = "legacy"
            continue

        if mode == "legacy" and first_eight[3].upper() in {"EN Nº", "EN NÂº"}:
            continue

        if current_profile is None or current_year is None or mode is None:
            continue

        if mode == "current":
            subject_name = first_eight[1]
            if not subject_name:
                continue
            maybe_adjust_profile_career(subject_name)
            current_profile["entries"].append(
                {
                    "year_label": current_year,
                    "year_sort": current_year_sort,
                    "row_order": len(current_profile["entries"]) + 1,
                    "row_number": first_eight[0] or None,
                    "subject_name": subject_name,
                    "status": first_eight[2] or None,
                    "final_grade": grade_to_string(ws.cell(row_idx, 4).value),
                    "exam_date": date_to_string(ws.cell(row_idx, 5).value),
                    "establishment": None,
                    "notes": first_eight[5] or None,
                }
            )
        elif mode == "legacy":
            subject_name = first_eight[1]
            if not subject_name:
                continue
            maybe_adjust_profile_career(subject_name)
            current_profile["entries"].append(
                {
                    "year_label": current_year,
                    "year_sort": current_year_sort,
                    "row_order": len(current_profile["entries"]) + 1,
                    "row_number": None,
                    "subject_name": subject_name,
                    "status": first_eight[2] or None,
                    "final_grade": grade_to_string(ws.cell(row_idx, 4).value),
                    "exam_date": date_to_string(ws.cell(row_idx, 1).value),
                    "establishment": first_eight[5] or None,
                    "notes": None,
                }
            )

    for profile in rows:
        profile["notes"] = " ".join(observations) or None
        profile.pop("closed", None)

    return {
        "source_sheet_name": source_sheet_name,
        "source_student_name": student_name,
        "profiles": rows,
    }


def main() -> None:
    project_root = Path(__file__).resolve().parents[2]
    workbook_path = project_root / WORKBOOK_NAME
    output_path = project_root / "asistencias-cefyt" / "database" / "data" / "student_academic_histories.json"

    workbook = load_workbook(workbook_path, data_only=True)
    payload = []
    for ws in workbook.worksheets:
        if re.fullmatch(r"20\d{2}", ws.title):
            continue
        payload.append(parse_sheet(ws))

    output_path.write_text(
        json.dumps(payload, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )

    total_profiles = sum(len(student["profiles"]) for student in payload)
    total_entries = sum(len(profile["entries"]) for student in payload for profile in student["profiles"])
    print(f"Exportados {len(payload)} alumnos, {total_profiles} perfiles y {total_entries} registros a {output_path}")


if __name__ == "__main__":
    main()
