from xmlparser import *
from xmlpatterns import *
from MatchingInput import *
from patterns_nb import *
from StringIO import StringIO
import string
import sys
import time

# ========================================================================

# This program converts documents marked up according to the Extreme
# Markup Conference XML DTD to HTML.

# It does its processing by applying pattern matching techniques
# to a stream of SAX-like markup tokens generated by an XML parser.

def processDocument (xmlSubject, out):
   """
      Process the top-level elements, produce the outer HTML wrapping,
      and dispatch processing for the lower-level elements.
   """
   out.write ("<HTML><BODY>\n")
   while True:
      if xmlSubject ^ xmlStartElementP ("paper"):
         while True:
            if xmlSubject ^ xmlStartElementP ("front") & \
                            xmlCharactersP () [0:] & \
                            xmlStartElementP ("title"):
               title = paraContent (xmlSubject)
               out.write ("<HEAD><TITLE>" + title + \
                          "</TITLE></HEAD>\n<BODY>\n<H1 ALIGN=\"CENTER\">" + \
                          title + "</H1>\n")
               outputSections (xmlSubject, out)
            elif xmlSubject ^ xmlStartElementP ("body"):
               outputSections (xmlSubject, out)
            elif xmlSubject ^ xmlEndElementP ():
               break
            elif xmlSubject ^ MoveP (1):
               pass
            else:
               break
      elif xmlSubject ^ xmlCharactersP ():
         pass
      else:
          break
   out.write ("<P><I>" + time.asctime () + "</I></P>\n</BODY></HTML>\n")

def outputAuthor (xmlSubject, out):
   """
      Process the elements found in the "<author>" part of the document.
   """
   out.write ("<P ALIGN=\"CENTER\"><I>")
   while True:
      if xmlSubject ^ xmlStartElementP ("fname"):
         out.write (paraContent (xmlSubject))
      elif xmlSubject ^ xmlStartElementP ("surname"):
         out.write (" " + paraContent (xmlSubject))
      elif xmlSubject ^ xmlStartElementP ("address"):
         out.write ("</I><BR>\n")
         while True:
            if xmlSubject ^ xmlStartElementP ("web"):
               url = paraContent (xmlSubject)
               out.write ("<BR>\n<A HREF=\"http://" + url + \
                          "\" TARGET=\"_new\">" + url + "</A>")
            elif xmlSubject ^ xmlStartElementP ("email"):
               url = paraContent (xmlSubject)
               out.write ("<BR>\n<A HREF=\"mailto:" + url + "\"" + \
                          ">" + url + "</A>")
            elif xmlSubject ^ xmlStartElementP ("phone"):
               out.write ("<BR>\n" + paraContent (xmlSubject))
            elif xmlSubject ^ xmlStartElementP ("affil"):
               out.write (paraContent (xmlSubject) + "</BR>\n")
            elif xmlSubject ^ xmlStartElementP ():
               out.write (paraContent (xmlSubject) + " ")
            elif xmlSubject ^ xmlCharactersP ():
               pass
            elif xmlSubject ^ xmlEndElementP () [0:1]:
               break
      elif xmlSubject ^ xmlStartElementP ("bio"):
         out.write ("</P>\n<P>")
         outputSections (xmlSubject, out)
      elif xmlSubject ^ xmlCharactersP ():
         pass
      elif xmlSubject ^ xmlEndElementP () [0:1]:
         break
   out.write ("</P>\n")

def outputSections (xmlSubject, out, secNum = ""):
   """
      Do the processing for what's found inside the top-level
      elements, within the various levels of sections and
      within the various kinds of lists.
   """
   secCount = 0
   while True:
      if xmlSubject ^ xmlStartElementP ("para"):
         out.write ("<P>" + paraContent (xmlSubject) + "</P>\n")
      elif xmlSubject ^ xmlStartElementP ("randlist"):
         outputRandlist (xmlSubject, out)
      elif xmlSubject ^ xmlStartElementP ("deflist"):
         out.write ("<DL>")
         while True:
            if xmlSubject ^ xmlStartElementP ("def.item"):
               while True:
                  if xmlSubject ^ xmlStartElementP ("def.term"):
                     out.write ("<DT>" + paraContent (xmlSubject) + "</DT>")
                  elif xmlSubject ^ xmlStartElementP ():
                     out.write ("<DD>")
                     outputSections (xmlSubject, out)
                     out.write ("</DD>\n")
                  elif xmlSubject ^ xmlCharactersP ():
                     pass
                  elif xmlSubject ^ xmlEndElementP () [0:1]:
                     break
            elif xmlSubject ^ xmlCharactersP ():
               pass
            elif xmlSubject ^ xmlEndElementP () [0:1]:
               break
         out.write ("</DL>\n")
      elif xmlSubject ^ xmlStartElementP ("section") & \
                        xmlStartElementP ("title"):
         secCount += 1
         out.write ("<H2>" + str (secCount) + ". " + \
                    paraContent (xmlSubject) + "</H2>\n")
         outputSections (xmlSubject, out, str (secCount))
      elif xmlSubject ^ xmlStartElementP ("subsec1") & \
                        xmlStartElementP ("title"):
         secCount += 1
         out.write ("<H3>" + secNum + "." + str (secCount) + " " + \
                    paraContent (xmlSubject) + "</H3>\n")
         outputSections (xmlSubject, out, secNum + "." + str (secCount))
      elif xmlSubject ^ xmlStartElementP ("abstract"):
         out.write ("<H3>Abstract</H3>\n")
         outputSections (xmlSubject, out)
      elif xmlSubject ^ xmlStartElementP ("author"):
         outputAuthor (xmlSubject, out)
      elif xmlSubject ^ xmlStartElementP ("keywords"):
         out.write ("<H3>Keywords</H3>\n")
         out.write ("<P><UL>")
         while True:
            if xmlSubject ^ xmlStartElementP ("keyword"):
               out.write ("<LI>" + paraContent (xmlSubject) + "</LI>\n")
            elif xmlSubject ^ xmlCharactersP ():
               pass
            elif xmlSubject ^ xmlEndElementP () [0:1]:
               break
         out.write ("</UL></P>\n")
      elif xmlSubject ^ xmlCharactersP ():
         pass
      elif xmlSubject ^ xmlEndElementP ():
         break
      else:
         break

def outputRandlist (xmlSubject, out):
   """
      Factor out the processing for an unordered list -- it would
      otherwise occur in more than one place.
   """
   out.write ("<UL>")
   while True:
      if xmlSubject ^ xmlStartElementP ("li"):
         out.write ("<LI>")
         outputSections (xmlSubject, out)
         out.write ("</LI>\n")
      elif xmlSubject ^ xmlCharactersP ():
         pass
      elif xmlSubject ^ xmlEndElementP () [0:1]:
         break
   out.write ("</UL>\n")

def paraContent (xmlSubject, keepBreaks = False):
   """
      Process paragraph content and its like, including character-level
      escaping.
   """
   out = StringIO ()
   while True:
      if xmlSubject ^ xmlCharactersP () << "chars":
         subject = MatchingInput (xmlSubject ["chars"].characters)
         while True:
            if subject ^ (AnyOfP (string.printable) - AnyOfP ("<>&")) [1:]:
               out.write (subject.AllMatched)
            elif subject ^ IsP ("<"):
               out.write ("&lt;")
            elif subject ^ IsP (">"):
               out.write ("&gt;")
            elif subject ^ IsP ("&"):
               out.write ("&amp;")
            elif subject ^ MoveP (1):
               out.write ("&#" + str (ord (subject.AllMatched)) + ";")
            else:
               break
      elif xmlSubject ^ xmlStartElementP ("verbatim"):
         out.write ("<PRE>" + paraContent (xmlSubject, True) + "</PRE>")
      elif xmlSubject ^ xmlStartElementP ("web"):
         url = paraContent (xmlSubject, keepBreaks)
         out.write ("<A HREF=\"http://" + url + "\" TARGET=\"_new\">" + \
                    url + "</A>")
      elif xmlSubject ^ xmlStartElementP ("randlist"):
         outputRandlist (xmlSubject, out)
      elif xmlSubject ^ xmlEndElementP () [0:1]:
         break
   return out.getvalue ()

# Last but not least, start the whole thing rolling, by starting up
# an XML parser, converting it to a file-like producer of tokens,
# converting that to a pattern-matching-friendly Matching input, and
# feeding that and an output destination to the top-level element
# processor.

time.clock()

processDocument (MatchingInput (xmlParserReader (anXMLParser (documentEntity = open (sys.argv [1])))), sys.stdout)

print >>sys.stderr, "Times:", int (time.clock () * 1000), "milliseconds"
