#!/usr/bin/env python3
# lighttext.py — LightText (.lt) to HTML, Markdown or EPUB converter
# Usage: python3 lighttext.py input.lt output.html
#        python3 lighttext.py input.lt output.md
#        python3 lighttext.py input.lt output.epub

import sys
import os
import re
import zipfile
import uuid
from datetime import datetime


def html_escape(text):
    return (text
        .replace("&", "&amp;")
        .replace("<", "&lt;")
        .replace(">", "&gt;")
        .replace('"', "&quot;"))


def _protect_escapes(text):
    # Protect backslash-escaped chars so they survive formatting
    escapes = {
        r'\*': '__ESC_STAR__',
        r'\_': '__ESC_UND__',
        r'\[': '__ESC_LB__',
        r'\]': '__ESC_RB__',
        r'\>': '__ESC_GT__',
        r'\-': '__ESC_DASH__',
        r'\\': '__ESC_BS__',
    }
    for seq, placeholder in escapes.items():
        text = text.replace(seq, placeholder)
    return text

def _restore_escapes(text):
    restores = {
        '__ESC_STAR__': '*',
        '__ESC_UND__': '_',
        '__ESC_LB__': '[',
        '__ESC_RB__': ']',
        '__ESC_GT__': '>',
        '__ESC_DASH__': '-',
        '__ESC_BS__': '\\',
    }
    for placeholder, char in restores.items():
        text = text.replace(placeholder, char)
    return text


def inline_html(text):
    text = _protect_escapes(text)
    links = {}
    def stash(m):
        key = f"__LTLINK{len(links)}__"
        txt = html_escape(m.group(1).strip())
        url = html_escape(m.group(2).strip())
        links[key] = f'<a href="{url}">{txt}</a>'
        return key
    text = re.sub(r'\[([^\]>]+?)\s*>\s*([^\]]+)\]', stash, text)
    text = html_escape(text)
    for key, tag in links.items():
        text = text.replace(key, tag)
    text = re.sub(r'\*([^*]+)\*', r'<strong>\1</strong>', text)
    text = re.sub(r'_([^_]+)_', r'<em>\1</em>', text)
    text = _restore_escapes(text)
    return text


def inline_md(text):
    text = _protect_escapes(text)
    text = re.sub(r'\[([^\]>]+?)\s*>\s*([^\]]+)\]',
                  lambda m: f'[{m.group(1).strip()}]({m.group(2).strip()})', text)
    text = re.sub(r'\*([^*]+)\*', r'**\1**', text)
    text = _restore_escapes(text)
    return text


def is_heading(line):
    letters = [c for c in line if c.isalpha()]
    return len(letters) > 0 and all(c.isupper() for c in letters)


def parse(lines, fmt):
    """Parse .lt lines and return a list of output lines in fmt (html/md/xhtml)."""
    is_html = fmt in ("html", "xhtml")
    inline  = inline_html if is_html else inline_md

    out = []
    in_ul = False
    in_ol = False
    para_lines = []
    prev_blank = True
    ol_counter = 0

    def flush_para():
        if para_lines:
            joined = " ".join(para_lines)
            out.append(f"<p>{inline(joined)}</p>" if is_html else f"{inline_md(joined)}")
            para_lines.clear()

    def flush_list():
        nonlocal in_ul, in_ol, ol_counter
        if in_ul:
            if is_html: out.append("</ul>")
            in_ul = False
        if in_ol:
            if is_html: out.append("</ol>")
            in_ol = False
            ol_counter = 0

    for raw in lines:
        line = raw.rstrip("\n").rstrip("\r")

        if not line.strip():
            flush_para()
            flush_list()
            if not is_html:
                out.append("")
            prev_blank = True
            continue

        if prev_blank and is_heading(line):
            flush_para(); flush_list()
            out.append(f"<h1>{inline(line)}</h1>" if is_html else f"# {inline_md(line)}\n")
            prev_blank = False
            continue

        if re.match(r'^- .+', line):
            flush_para()
            if in_ol: flush_list()
            if not in_ul:
                if is_html: out.append("<ul>")
                in_ul = True
            content = line[2:]
            out.append(f"<li>{inline(content)}</li>" if is_html else f"- {inline_md(content)}")
            prev_blank = False
            continue

        if re.match(r'^\d+\. .+', line):
            flush_para()
            if in_ul: flush_list()
            if not in_ol:
                if is_html: out.append("<ol>")
                in_ol = True
                ol_counter = 0
            ol_counter += 1
            content = re.sub(r'^\d+\. ', '', line)
            out.append(f"<li>{inline(content)}</li>" if is_html else f"{ol_counter}. {inline_md(content)}")
            prev_blank = False
            continue

        flush_list()
        para_lines.append(line)
        prev_blank = False

    flush_para()
    flush_list()
    return out


def build_html(lines, title, xhtml=False):
    ns   = ' xmlns="http://www.w3.org/1999/xhtml"' if xhtml else ''
    meta = '<meta charset="UTF-8"/>' if xhtml else '<meta charset="UTF-8">'
    vp   = '<meta name="viewport" content="width=device-width, initial-scale=1.0"/>' if xhtml else \
           '<meta name="viewport" content="width=device-width, initial-scale=1.0">'
    body = parse(lines, "xhtml" if xhtml else "html")
    doc  = []
    if xhtml:
        doc.append('<?xml version="1.0" encoding="UTF-8"?>')
    doc += [
        "<!DOCTYPE html>",
        f'<html lang="en"{ns}>',
        "<head>",
        meta, vp,
        f"<title>{html_escape(title)}</title>",
        "</head>",
        "<body>",
        *body,
        "</body>",
        "</html>",
    ]
    return "\n".join(doc) + "\n"


def build_md(lines, title):
    body = parse(lines, "md")
    return f"# {title}\n\n" + "\n".join(body) + "\n"


def build_epub(lines, title, output_path):
    doc_id   = str(uuid.uuid4())
    content  = build_html(lines, title, xhtml=True)

    container_xml = """\
<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
  </rootfiles>
</container>"""

    opf = f"""\
<?xml version="1.0" encoding="UTF-8"?>
<package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
    <dc:title>{html_escape(title)}</dc:title>
    <dc:language>en</dc:language>
    <dc:identifier id="uid">{doc_id}</dc:identifier>
  </metadata>
  <manifest>
    <item id="content" href="content.html" media-type="application/xhtml+xml"/>
  </manifest>
  <spine>
    <itemref idref="content"/>
  </spine>
</package>"""

    with zipfile.ZipFile(output_path, "w") as zf:
        # mimetype must be first and uncompressed
        zf.writestr(zipfile.ZipInfo("mimetype"), "application/epub+zip",
                    compress_type=zipfile.ZIP_STORED)
        zf.writestr("META-INF/container.xml", container_xml)
        zf.writestr("OEBPS/content.opf", opf)
        zf.writestr("OEBPS/content.html", content)


def convert(input_path, output_path):
    ext = os.path.splitext(output_path)[1].lower()
    if ext not in (".html", ".md", ".epub"):
        print(f"Error: output must be .html, .md or .epub", file=sys.stderr)
        sys.exit(1)

    title = os.path.splitext(os.path.basename(input_path))[0]

    with open(input_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    if ext == ".html":
        output = build_html(lines, title)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(output)

    elif ext == ".md":
        output = build_md(lines, title)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(output)

    elif ext == ".epub":
        build_epub(lines, title, output_path)

    print(f"Done: {output_path}")


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python3 lighttext.py input.lt output.{html|md|epub}", file=sys.stderr)
        sys.exit(1)
    input_path, output_path = sys.argv[1], sys.argv[2]
    if not os.path.isfile(input_path):
        print(f"Error: file not found: {input_path}", file=sys.stderr)
        sys.exit(1)
    convert(input_path, output_path)
