#! /usr/bin/awk -f
#####################################################################
#
# Name:         txt2html.awk - Convert plain text file to HTML
#
# Synopsis:     txt2html.awk file.txt > out.html
#
# Description:  txt2html.awk reads a plain text file and tries to add
#               HTML tags. It also assembles a table of contents and
#               adds it to the end of the file.
#
#               Recognized Style elements are:
#
#             * The very first line will be converted to the document
#               title.
#             * A line containing just containing repeated characters like
#               *,+,-,=,~,# is converted into a horizontal ruler line if
#               the lines before and after this line are empty.
#             * A line underlined with *,+,-,=,~, is converted into a
#               level 1 heading
#             * A line is converted into a level 2 heading if the lines
#               before and after this line are empty
#             * Lines starting with -,o,*,+,#,x are converted to
#               unnumbered lists
#             * Lines starting with a number or letter followed by .,:,)
#               are converted to numbered lists
#             * Indented lines are treated as pre-formatted
#             * Lines like '****** Some Text ******' are converted to
#               level 1 headings
#
# Author:       Peter Ernst
# Created:      Sun May 14 10:19:00 1995
# Modified:     Sun May 21 21:25:51 1995 (Peter Ernst) peer@laurin
# Language:     AWK
# Version:      1.1
#
# (C) Copyright 1995 Peter ERNST, all rights reserved.
#
###################################################################### 
BEGIN {
 buf[1] = "<HTML>\n<Head>"
 buf[2] = "<Title>"
 listlevel=0
 base="ref"
 ndx=0
}

# <- buf[1] <- buf[2] <- x

function push(x) {
  print buf[1]
  buf[1] = buf[2]
  buf[2] = x
}

# escape < and >
{ gsub("<","\\&lt;")
  gsub(">","\\&gt;")
}

#           line
# <- buf[1] v     buf[2]
function inject(line) {
  print buf [1]
  buf[1] = line
 
}

# rewind list level to target

function adjustlevel(target) {
  if (!listlevel && target)
   # we are about to start a new list -> zap indentation
   noindent()

   if (target != listlevel && buf[1] == "<P>")
   {
    t = "<P>"
    buf[1]="" # no paragraph when starting/ending a list
   }
   else
    t = ""

  # reduce list level
  while (target < listlevel)
  {
    inject(listend[listlevel])
    delete indent[marker[listlevel]]
    listlevel--
  }

  # increase listlevel
  while (target > listlevel)
  {
    listlevel++
    inject(liststart[listlevel])
  }
  if (t != "") inject(t)
}

# zap indentation (if any)

function noindent() {
 if (i)
 {
  inject("</PRE>")
  i=0
 }
}

function makeHeading(level) {
 
 if (i)
 { # close indentation
   buf[1] = "</PRE>"
   i = 0
 }
 else
   buf[1]=""
   
 t           = base ndx++ # new reference
 tbl[t]      = buf[2]     # remember heading for index
 leveltbl[t] = level      # remember level
 buf[2]      = "<H" level "><A NAME =\"" t "\">" buf[2] "</A></H" level ">"
 head=1
 adjustlevel(0) # close lists
}

function pushlistitem(type) {
 # print the list item
 head=0
 t="<LI>"
 for (n=2; n <= NF; n++)
  t = t $(n) " "
 push(t)

 # get the indentationlevel associated with current marker
 thislevel = nesting[$1]

 if (!thislevel)
 {# start a new list
  thislevel            = listlevel+1
  nesting[$1]          = thislevel # remember marker
  marker[thislevel]    = $1
  liststart[thislevel] = "<" type "L " thislevel ">"
  listend[thislevel]   = "</" type "L " thislevel ">"
 }
 
 adjustlevel(thislevel)

}

##############################################

NR == 2 { push("</Title>\n</Head>\n<BODY>") }

# empty line
/^[ 	]*$/ {
  if (i)
  { # zap indentation
   push("</PRE>")
   i=0
  }

  if (buf[2] != "<P>")
  { # not a repeated empty line
   if (buf[1] == "<P>" && listlevel < 2)
    # buf[1]: <P>
    # buf[2]: xxx
    # make header from xxx
    makeHeading(2)
   else if (!head)
    push("<P>")
  }
  next
}

# underlining
/^[ 	]*[-*=+xX#~][- *=+xX#~]*$/ {
  if (!head)
  {
   if (buf[2] != "<P>")
   {
    if (buf[1] == "<P>" || buf[1] == "<PRE>" )
    {# buf[1]: <P>
     # buf[2]: xxx
     # underlined heading
 	 sub("<LI>","",buf[2])
     makeHeading(1)
    }
    else if (listlevel)
    {
     if (match(buf[1],"^<[OU]L"))
     { # buf[1]: <UL>
       # buf[2]: <LI>xxx
       # undo the list indentation
       if (sub("<LI>","",buf[2]))
       {
        buf[1]=""
        delete indent[marker[listlevel]]
        listlevel--
        makeHeading(2)
       }
     }
     else
  	   adjustlevel(listlevel-1)
    }
    else
    {
      push("<HR>")
      adjustlevel(0)
    }
   }
  }
  next
}

# big headers  ++++++ header +++++

#/^[ 	]*[-*=+xX#~]+[	 a-zA-Z0-9,:]+[-*=+xX#~]{2,}[ 	]*$/ {
/^[ 	]*[-*=+xX#~]+[	 a-zA-Z0-9,:_]+[-*=+xX#~][-*=+xX#~]+[ 	]*$/ {
  sub("^[-*=+xX# 	]*","")
  sub("[-*=+xX# 	]*$","")
  push($0)
  if (buf[1] != "<P>") inject("")
  makeHeading(1);
  next
}

# detect unnumbered lists
/^[ 	]*[-o+*x#][ 	]/ {
 pushlistitem("U")
 next
}

# detect numbered headings

/^[ 	]*[0-9]+[:.)]+/ {
 sub("[0-9]*","",$1)
 $1 = 0 $1
 pushlistitem("O")
 next
}

/^[ 	]*[a-z][:.)]+/ {
 sub("[a-z]*","",$1)
 $1 = "a" $1
 pushlistitem("O")
 next
}

/^[ 	]*[A-Z][:.)]+/ {
 sub("[A-Z]*","",$1)
 $1 = "A" $1
 pushlistitem("O")
 next
}

# Protect indentation
/^[ 	]/ {
  head=0
  if (!i && !listlevel)
  {
   push("<PRE>")
   i++
  }
  sub("^[ 	]*"," ")
  push($0)
  next
}

# regular text
{
  head=0
  if (i)
  {
   if (i == 1 && buf[1] == "<PRE>" )
    # this is actually a paragraph starting with indentation
    buf[1]=""
   else
    push("</PRE>")
   i=0
  }
  push($0)
  adjustlevel(0) # unindented text terminates lists
}

END { push("<HR>\n<H1>Table Of Contents</H1>\n<OL>")
      adjustlevel(0)
      print buf[1]
      print buf[2]
      listlevel=1
      for (i=0; i < ndx; i++)
      {
        t     = base i
        level = leveltbl[t]
	    # reduce list level
        while (level < listlevel)
        {
         print "</UL>"
         listlevel--
        }
        # increase listlevel
        while (level > listlevel)
        {
         listlevel++
         print "<UL>"
        }   
        print "<LI>" "<A HREF=\"#" t ">" tbl[t] "</A>"

      }
      print "</OL>\n</BODY>\n</HTML>"
    }