#!/usr/bin/env sh
# lighttext.sh — Light Text (.lt) to HTML, Markdown or EPUB converter
# Usage: lighttext.sh input.lt output.html
#        lighttext.sh input.lt output.md
#        lighttext.sh input.lt output.epub

# --- Args ---
if [ "$#" -ne 2 ]; then
    echo "Usage: lighttext.sh input.lt output.{html|md|epub}" >&2
    exit 1
fi

INPUT="$1"
OUTPUT="$2"

if [ ! -f "$INPUT" ]; then
    echo "Error: file not found: $INPUT" >&2
    exit 1
fi

# Detect output format from extension
case "$OUTPUT" in
    *.html) FORMAT="html" ;;
    *.md)   FORMAT="md"   ;;
    *.epub) FORMAT="epub" ;;
    *)
        echo "Error: output must be .html, .md or .epub" >&2
        exit 1
        ;;
esac

TITLE=$(basename "$INPUT" .lt)

# ============================================================
# SHARED HELPERS
# ============================================================

html_escape() {
    sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g; s/"/\&quot;/g'
}

protect_escapes() {
    sed -e 's/\\\\/__ESC_BS__/g' -e 's/\\\*/__ESC_STAR__/g' -e 's/\\_/___ESC_UND__/g' -e 's/\\\[/__ESC_LB__/g' -e 's/\\\]/__ESC_RB__/g' -e 's/\\>/__ESC_GT__/g' -e 's/\\-/__ESC_DASH__/g'
}

restore_escapes() {
    sed -e 's/__ESC_BS__/\\/g' -e 's/__ESC_STAR__/*/g' -e 's/___ESC_UND__/_/g' -e 's/__ESC_LB__/[/g' -e 's/__ESC_RB__/]/g' -e 's/__ESC_GT__/>/g' -e 's/__ESC_DASH__/-/g'
}

inline_html() {
    echo "$1" | protect_escapes | awk '
    function esc(s){gsub(/&/,"&amp;",s); gsub(/</,"&lt;",s); gsub(/>/,"&gt;",s); gsub(/"/,"&quot;",s); return s}
    { line=$0; out="";
      while(match(line,/\[[^\]>]+\s*>\s*[^\]]+\]/)){
        pre=substr(line,1,RSTART-1); link=substr(line,RSTART,RLENGTH); post=substr(line,RSTART+RLENGTH);
        sub(/^\[/,"",link); sub(/\]$/,"",link); split(link,p,/>/); txt=p[1]; url=p[2];
        gsub(/^\s+|\s+$/,"",txt); gsub(/^\s+|\s+$/,"",url);
        out=out pre "<a href=\"" esc(url) "\">" esc(txt) "</a>"; line=post;
      }
      print out line;
    }' | html_escape | sed 's/&lt;a href="/<a href="/g; s/"&gt;/%22>/g; s/&quot;&gt;/">/g; s/&lt;\/a&gt;/<\/a>/g' | sed 's/\*\([^*][^*]*\)\*/<strong>\1<\/strong>/g' | sed 's/_\([^_][^_]*\)_/<em>\1<\/em>/g' | restore_escapes
}

inline_md() {
    echo "$1" | protect_escapes | sed 's/\[\([^]>]*[^]> ]\) *> *\([^]) ][^]]*\)\]/[\1](\2)/g' | sed 's/\*\([^*][^*]*\)\*/**\1**/g' | restore_escapes
}

is_heading() {
    stripped=$(echo "$1" | tr -d '[:space:][:digit:][:punct:]')
    [ -n "$stripped" ] && [ "$stripped" = "$(echo "$stripped" | tr '[:lower:]' '[:upper:]')" ]
}

# ============================================================
# PARSE — emit lines to stdout, caller redirects
# ============================================================

parse_html() {
    in_ul=0
    in_ol=0
    para_lines=""
    para_open=0

    flush_list_html() {
        [ "$in_ul" -eq 1 ] && { echo "</ul>"; in_ul=0; }
        [ "$in_ol" -eq 1 ] && { echo "</ol>"; in_ol=0; }
    }
    flush_para_html() {
        [ "$para_open" -eq 1 ] && {
            echo "<p>$(inline_html "$para_lines")</p>"
            para_lines=""
            para_open=0
        }
    }

    echo "<!DOCTYPE html>"
    echo '<html lang="en">'
    echo "<head>"
    echo '<meta charset="UTF-8">'
    echo '<meta name="viewport" content="width=device-width, initial-scale=1.0">'
    echo "<title>$TITLE</title>"
    echo "</head>"
    echo "<body>"

    while IFS= read -r line || [ -n "$line" ]; do
        if [ -z "$line" ]; then
            flush_para_html; flush_list_html; continue
        fi
        if is_heading "$line"; then
            flush_para_html; flush_list_html
            echo "<h1>$(inline_html "$line")</h1>"
            prev_blank=0; continue
        fi
        if echo "$line" | grep -qE '^- .+'; then
            flush_para_html
            [ "$in_ol" -eq 1 ] && flush_list_html
            [ "$in_ul" -eq 0 ] && { echo "<ul>"; in_ul=1; }
            echo "<li>$(inline_html "$(echo "$line" | sed 's/^- //')")</li>"
            prev_blank=0; continue
        fi
        if echo "$line" | grep -qE '^[0-9]+\. .+'; then
            flush_para_html
            [ "$in_ul" -eq 1 ] && flush_list_html
            [ "$in_ol" -eq 0 ] && { echo "<ol>"; in_ol=1; }
            echo "<li>$(inline_html "$(echo "$line" | sed 's/^[0-9]*\. //')")</li>"
            prev_blank=0; continue
        fi
        flush_list_html
        if [ "$para_open" -eq 0 ]; then
            para_lines="$line"; para_open=1
        else
            para_lines="$para_lines $line"
        fi
        prev_blank=0
        prev_blank=0
    done < "$INPUT"

    flush_para_html
    flush_list_html
    echo "</body>"
    echo "</html>"
}

# ----

parse_md() {
    in_ul=0
    in_ol=0
    para_lines=""
    para_open=0
    ol_counter=0

    flush_list_md() {
        in_ul=0; in_ol=0; ol_counter=0
    }
    flush_para_md() {
        [ "$para_open" -eq 1 ] && {
            echo "$(inline_md "$para_lines")"
            echo ""
            para_lines=""
            para_open=0
        }
    }

    echo "# $TITLE"

    prev_blank=1
    while IFS= read -r line || [ -n "$line" ]; do
        if [ -z "$line" ]; then
            flush_para_md; flush_list_md; prev_blank=1
            continue
        fi
        if [ "$prev_blank" -eq 1 ] && is_heading "$line"; then
            flush_para_md; flush_list_md
            echo ""
            echo "# $(inline_md "$line")"
            prev_blank=0; continue
        fi
        if echo "$line" | grep -qE '^- .+'; then
            flush_para_md
            [ "$in_ul" -eq 0 ] && { in_ul=1; }
            echo "- $(inline_md "$(echo "$line" | sed 's/^- //')")"
            prev_blank=0; continue
        fi
        if echo "$line" | grep -qE '^[0-9]+\. .+'; then
            flush_para_md
            [ "$in_ol" -eq 0 ] && { in_ol=1; ol_counter=0; }
            ol_counter=$((ol_counter + 1))
            echo "$ol_counter. $(inline_md "$(echo "$line" | sed 's/^[0-9]*\. //')")"
            prev_blank=0; continue
        fi
        flush_list_md
        if [ "$para_open" -eq 0 ]; then
            para_lines="$line"; para_open=1
        else
            para_lines="$para_lines $line"
        fi
        prev_blank=0
        prev_blank=0
    done < "$INPUT"

    flush_para_md
}

# ============================================================
# OUTPUT
# ============================================================

case "$FORMAT" in

    html)
        parse_html > "$OUTPUT"
        ;;

    md)
        parse_md > "$OUTPUT"
        ;;

    epub)
        # EPUB = zip of: mimetype + META-INF/container.xml + OEBPS/content.opf + OEBPS/content.html
        if ! command -v zip > /dev/null 2>&1; then
            echo "Error: 'zip' is required for EPUB output" >&2
            exit 1
        fi

        TMPDIR=$(mktemp -d)
        trap 'rm -rf "$TMPDIR"' EXIT

        mkdir -p "$TMPDIR/META-INF"
        mkdir -p "$TMPDIR/OEBPS"

        # mimetype (must be uncompressed and first in zip)
        printf 'application/epub+zip' > "$TMPDIR/mimetype"

        # META-INF/container.xml
        cat > "$TMPDIR/META-INF/container.xml" << 'XML'
<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
  </rootfiles>
</container>
XML

        # OEBPS/content.opf
        UUID=$(cat /proc/sys/kernel/random/uuid 2>/dev/null || od -x /dev/urandom | head -1 | awk '{print $2$3"-"$4"-"$5"-"$6"-"$7$8$9}')
        cat > "$TMPDIR/OEBPS/content.opf" << XML
<?xml version="1.0" encoding="UTF-8"?>
<package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
    <dc:title>$TITLE</dc:title>
    <dc:language>en</dc:language>
    <dc:identifier id="uid">$UUID</dc:identifier>
  </metadata>
  <manifest>
    <item id="content" href="content.html" media-type="application/xhtml+xml"/>
  </manifest>
  <spine>
    <itemref idref="content"/>
  </spine>
</package>
XML

        # OEBPS/content.html — reuse HTML parser but as XHTML for EPUB
        {
            echo '<?xml version="1.0" encoding="UTF-8"?>'
            echo '<!DOCTYPE html>'
            echo '<html xmlns="http://www.w3.org/1999/xhtml" lang="en">'
            echo '<head>'
            echo '<meta charset="UTF-8"/>'
            echo "<title>$TITLE</title>"
            echo '</head>'
            echo '<body>'

            # Run parser but skip the outer html/head/body wrapper
            parse_html | sed '1,/^<body>$/d' | sed '/<\/body>/,$d'

            echo '</body>'
            echo '</html>'
        } > "$TMPDIR/OEBPS/content.html"

        # Build EPUB zip — mimetype must be first and uncompressed
        ORIG_DIR=$(pwd)
        cd "$TMPDIR"
        zip -X0 "$ORIG_DIR/$OUTPUT" mimetype
        zip -rX9 "$ORIG_DIR/$OUTPUT" META-INF OEBPS
        cd "$ORIG_DIR"
        ;;

esac

echo "Done: $OUTPUT"
