"""This is a validating XML parser with a SAX driver. It currently
handles most of the well-formedness checking and validation and the
rest will be added as soon as possible.

Version 0.40 - 16.May.98
   Lars Marius Garshol - larsga@ifi.uio.no
   http://www.stud.ifi.uio.no/~larsga/download/python/xml/xmlproc.html
"""
   
import re,string,sys,urllib,urlparse,types

from xmlutils import *
from xmlapp import *
from xmldtd import *

version="0.40"

# Standard exceptions

class OutOfDataException(Exception):
    """An exception that signals that more data is expected, but the current
    buffer has been exhausted."""
    pass

# ==============================
# The general entity parser
# ==============================

class EntityParser:
    """A generalized parser for XML entities, whether DTD, documents or even
    catalog files."""

    def __init__(self):
	self.ent_stack=[]
	self.open_ents=[]  # Used to test for entity recursion
	self.app=Application()
	self.err=ErrorHandler(self)
	self.ent=EntityHandler(self.err)

	# Block information
	self.data=""
	self.final=0
	self.datasize=0
	self.start_point=-1
	
	# Location tracking
	self.line=1
	self.last_break=0
	self.block_offset=0 # Offset from start of stream to start of cur block
	self.pos=0
	self.current_sysID=None	
	self.last_upd_pos=0

    def set_application(self,app):
	"Sets the object to send data events to."
	self.app=app
	app.set_locator(self)

    def set_error_handler(self,err):
	"Sets the object to send error events to."
	self.err=err
	self.ent.set_error_handler(err)

    def set_entity_handler(self,ent):
	"Sets the object that resolves entity references."
	self.ent=ent
	self.ent.set_error_handler(self.err)
	
    def parse_resource(self,sysID):
	"""Begin parsing an XML entity with the specified public and
	system identifiers (the system identifier, a URI, is required).
	Only used for the document entity, not to handle subentities, which
	open_entity takes care of."""

	self.current_sysID=sysID
	try:
	    infile=urllib.urlopen(sysID)
	except IOError,e:
	    self.err.fatal("Couldn't open resource '%s'" % sysID)
	    return
	
	self.parseStart()
	self.read_from(infile)
	infile.close()
	self.flush()
	self.parseEnd()

    def open_entity(self,sysID):
	"""Starts parsing a new entity, pushing the old onto the stack. This
	method must not be used to start parsing, use parse_resource for
	that."""

	sysID=urlparse.urljoin(self.get_current_sysid(),sysID)
	    
	try:
	    inf=urllib.urlopen(sysID)
	except IOError,e:
	    self.err.fatal("Couldn't open entity '%s'" % sysID)
	    return

	self.ent_stack.append(self.get_current_sysid(),self.data,self.pos,\
			      self.line,self.last_break,self.datasize,\
			      self.last_upd_pos,self.block_offset)
	
	self.current_sysID=sysID
	self.pos=0
	self.line=1
	self.last_break=0
	self.data=""
	
	self.read_from(inf)

	self.flush()
	self.pop_entity()

    def push_entity(self,sysID,contents):
	"""Parse some text and consider it a new entity, making it possible
	to return to the original entity later."""
	self.ent_stack.append(self.get_current_sysid(),self.data,self.pos,\
			      self.line,self.last_break,self.datasize,\
			      self.last_upd_pos,self.block_offset)

	self.data=contents
	self.current_sysID=sysID
	self.pos=0
	self.line=1
	self.last_break=0
	self.datasize=len(contents)
	self.last_upd_pos=0

    def pop_entity(self):
	"Skips out of the current entity and back to the previous one."

	if self.ent_stack==[]:
	    self.err.fatal("Internal error: Entity stack broken")

	(self.current_sysID,self.data,self.pos,self.line,self.last_break,\
	 self.datasize,self.last_upd_pos,self.block_offset)=self.ent_stack[-1]

	del self.ent_stack[-1]
	self.final=0
	
    def read_from(self,fileobj,bufsize=16384):
	"""Reads data from a file-like object until EOF. Does not close it.
	**WARNING**: This method does not call the parseStart/parseEnd methods,
	since it does not know if it may be called several times. Use
	parse_resource if you just want to read a file."""
	while 1:
	    buf=fileobj.read(bufsize)
	    if buf=="": break
	    self.feed(buf)

    def feed(self,new_data):
	"""Accepts more data from the data source. This method must
	set self.datasize and correctly update self.pos and self.data."""
	self.update_pos() # Update line/col count

        # Doing line end translation
        new_data=string.translate(new_data,id_trans,"\015")
        
	if self.start_point!=-1:
	    self.block_offset=self.block_offset+self.datasize
	    self.data=self.data[self.pos:]+new_data
	    self.last_break=self.last_break-self.pos  # Keep track of column
	    self.pos=0
	    self.last_upd_pos=0
	else:
	    self.data=self.data+new_data
	self.datasize=len(self.data)

	self.do_parse()

    def parseStart(self):
	"Called before the parse starts to notify subclasses."
	pass

    def parseEnd(self):
	"Called when there are no more data to notify subclasses."
	pass

    def flush(self):
	"Parses any remnants of data in the last block."
	if not self.pos+1==self.datasize:
	    self.final=1
 	    try:
		self.do_parse()
	    except OutOfDataException,e:
		self.err.fatal("Construct started, but never completed")
		    
    # --- LOW-LEVEL SCANNING METHODS

    def set_start_point(self):
	"""Stores the current position and tells the parser not to forget any
	of the data beyond this point until get_region is called."""
	self.start_point=self.pos

    def get_region(self):
	"""Returns the area from start_point to current position and remove
	start_point."""
	data=self.data[self.start_point:self.pos]
	self.start_point=-1
	return data

    def find_reg(self,regexp):
	"""Moves self.pos to the first character that matches the regexp and
	returns everything from pos and up to (but not including) that
	character."""
	oldpos=self.pos
	mo=regexp.search(self.data,self.pos)
	if mo==None:
	    raise OutOfDataException()
	self.pos=mo.start(0)
	return self.data[oldpos:self.pos]
    
    def scan_to(self,target):
	"Moves self.pos to beyond target and returns skipped text."
	new_pos=string.find(self.data,target,self.pos)
	if new_pos==-1:
	    raise OutOfDataException()
	res=self.data[self.pos:new_pos]
	self.pos=new_pos+len(target)
	return res

    def get_index(self,target):
	"Finds the position where target starts and returns it."
	new_pos=string.find(self.data,target,self.pos)
	if new_pos==-1:
	    raise OutOfDataException()
	return new_pos
    
    def test_str(self,test_str):
	"See if text at current position matches test_str, without moving."
	if self.datasize-self.pos<len(test_str) and not self.final:
	    raise OutOfDataException()
	return self.data[self.pos:self.pos+len(test_str)]==test_str
    
    def now_at(self,test_str):
	"Checks if we are at this string now, and if so skips over it."
	if self.datasize-self.pos<len(test_str) and not self.final:
	    raise OutOfDataException()
	
	if self.data[self.pos:self.pos+len(test_str)]==test_str:
	    self.pos=self.pos+len(test_str)
	    return 1
	else:
	    return 0
    
    def skip_ws(self,necessary=0):
	"Skips over any whitespace at this point."
	ws=reg_ws.match(self.data,self.pos)
	if ws==None:
	    if necessary:
		self.err.fatal("Whitespace expected here")
	    return
	self.pos=ws.end(0)
	ws=ws.group(0)

    def test_reg(self,regexp):
	"Checks if we match the regexp."
	if self.pos>self.datasize-5 and not self.final:
	    raise OutOfDataException()
	
	return regexp.match(self.data,self.pos)!=None
	    
    def get_match(self,regexp):
	"Returns the result of matching the regexp and advances self.pos."
	if self.pos>self.datasize-5 and not self.final:
	    raise OutOfDataException()
	
	ent=regexp.match(self.data,self.pos)
	if ent==None:
	    self.err.fatal("Didn't match "+regexp.pattern)
	    return ""

	if ent.end(0)==self.datasize:
	    raise OutOfDataException()

	self.pos=ent.end(0)
	return ent.group(0)

    def update_pos(self):
	"Updates (line,col)-pos by checking processed blocks."
	breaks=string.count(self.data[self.last_upd_pos:self.pos],"\n")
	self.last_upd_pos=self.pos

	if breaks>0:
	    self.line=self.line+breaks
	    self.last_break=string.rfind(self.data,"\n",0,self.pos)

    def get_wrapped_match(self,wraps):
	"Returns a contained match. Useful for regexps inside quotes."
	found=0
	for (wrap,regexp) in wraps:
	    if self.test_str(wrap):
		found=1
		self.pos=self.pos+len(wrap)
		break

	if not found:
	    msg="One of "
	    for (wrap,regexp) in wraps[:-1]:
		msg="%s'%s', " % (msg,wrap)
	    self.err.fatal("%s or '%s' expected" % (msg[:-2],wraps[-1][0]))

	data=self.get_match(regexp)
	if not self.now_at(wrap):
	    self.err.fatal("'%s' expected" % (wrap))

	return data
	    
    #--- USEFUL METHODS

    def get_current_sysid(self):
	"Returns the sysid of the file we are reading now."
	return self.current_sysID

    def set_sysid(self,sysID):
	"Sets the current system identifier. Does not store the old one."
	self.current_sysID=sysID

    def get_offset(self):
	"Returns the current offset from the start of the stream."
	return self.block_offset+self.pos
	
    def get_line(self):
	"Returns the current line number."
	self.update_pos()
	return self.line

    def get_column(self):
	"Returns the current column position."
	self.update_pos()
	return self.pos-self.last_break

# ==============================
# Code-sharing class for XMLProcessor and DTDParser
# ==============================

class AbstractXML(EntityParser):
    "An abstract superclass with shared code for XMLProcessor and DTDParser."

    def parse_string(self,where):
	"""Parses a string (attribute value, entity value) and resolves all
	entity references in it. where=0 means entity declaration, where=1
	means attribute string and where=2 means we are parsing the literal
        entity value of a reference to an internal entity in an attribute
        value."""

	val=""
        if where!=2:
            if self.now_at('"'):
                delim='"'
            elif self.now_at("'"):
                delim="'"
            else:
                self.err.fatal("Expected \" or '")
                self.scan_to(">")
                return

	if where==0:
	    reg_stop=re.compile("%|&|"+delim)
	elif where==1:
	    reg_stop=re.compile("<|&|"+delim) # Parameter entities not recogn.
        elif where==2:
            reg_stop=re.compile("<|&")
	        
        while 1:
	    piece=self.find_reg(reg_stop)
            
            if where==1 or where==2:
                val=val+string.translate(piece,ws_trans)
            else:
                val=val+piece

	    if where!=2 and self.now_at(delim):
                break

	    if self.now_at("&#"):
		if self.now_at("x"):
		    digs=unhex(self.get_match(reg_hex_digits))
		else:
		    digs=string.atoi(self.get_match(reg_digits))
		    
		if not (digs==9 or digs==10 or digs==13 or \
			(digs>=32 and digs<=255)):
		    if digs>255:
			self.err.fatal("Unsupported character in character "
				       "reference")
		    else:
			self.err.fatal("Illegal character in character "
				       "reference")
		else:
		    val=val+chr(digs)
		
	    elif self.now_at("&"):	
		name=self.get_match(reg_name)

		if where==0:
		    val=val+"&"+name+";"
		else:
                    try:
                        ent=self.ent.resolve_ge(name)
			if ent.is_internal():
                            # Doing all this here sucks a bit, but...
                            self.push_entity(self.get_current_sysid(),\
                                             ent.value)

                            self.final=1 # Only one block
                            
                            try:
                                val=val+self.parse_string(2)
                            except OutOfDataException,e:
                                print "OODE"
                            
                            if not self.pos==self.datasize:
                                self.err.fatal("Construct started, but never "
                                               "completed" %\
                                               (self.pos+1,self.datasize))

                            self.pop_entity()
			else:
			    self.err.fatal("External entity references not "
					   "allowed here")
                    except KeyError,e:
                        self.err.fatal("Unknown entity '%s'" % name)	       
                        
	    elif self.now_at("%"):
		# We never get here if we are in an attribute value, since
		# % is not included in the reg_stop regexp.
		name=self.get_match(reg_name)

		if self.internal:
		    self.err.fatal("Parameter entity references not "
				       "allowed in internal subset")
		    val=val+"%"+name+";"
		else:
                    try:
                        ent=self.dtd.resolve_pe(name)
			if ent.is_internal():
			    val=val+ent.value
			else:
			    self.err.fatal("External entity references not "
                                           "allowed here")
                    except KeyError,e:
                        self.err.fatal("Unknown parameter entity '%s'" % name)
            elif self.now_at("<") and (where==1 or where==2):
                self.err.fatal("'<' not allowed in attribute values")
	    else:
		self.err.fatal("Entity reference expected. (Internal error.)")
		
	    if not self.now_at(";"):
		self.err.fatal("';' expected")
		self.scan_to(delim)

            if where==2 and self.pos==self.datasize:
                break
                            
	return val
    
    def parse_comment(self):
	"Parses the comment from after '<!--' and beyond '-->'."
	self.app.handle_comment(self.get_match(reg_comment_content))
	if not self.now_at("-->"):
	    self.err.fatal("Comment incorrectly terminated")

    def parse_pi(self):
	"""Parses a processing instruction from after the '<?' and beyond
	the '?>'."""
	trgt=self.get_match(reg_name)

	if trgt=="xml":
	    self.parse_xml_decl()
	    if not self.now_at("?>"):
		self.err.fatal("'?>' expected")
	    self.seen_xmldecl=1
	else:
	    self.skip_ws()
	    rem=self.scan_to("?>") # OutOfDataException if not found

	    if reg_res_pi.match(trgt)!=None:
		if trgt=="xml:namespace":
		    self.err.warning("XML name spaces not yet supported")
		else:
		    self.err.fatal("Processing instruction target names "
				   "beginning with 'xml' are reserved")
	    self.app.handle_pi(trgt,rem)   
	    
# ==============================
# A full well-formedness parser
# ==============================

class XMLProcessor(AbstractXML):
    "A parser that performs a complete well-formedness check."

    def __init__(self):
	EntityParser.__init__(self)

	# State vars
	self.stack=[]
	self.seen_root=0
	self.seen_doctype=0
	self.seen_xmldecl=0

	# Various handlers
	self.dtd=WFCDTD(self.err)
	self.ent=self.dtd
        self.dtd_listener=None
    
    def do_parse(self):
	"Does the actual parsing."
	try:
	    while self.pos+1<self.datasize:
		prepos=self.pos

		if self.test_reg(reg_start_tag):
		    self.parse_start_tag()
		elif self.now_at("</"):
		    self.parse_end_tag()
		elif not (self.test_str("<") or self.test_str("&")):
		    self.parse_data()
		elif self.now_at("&#"):
		    self.parse_charref()
		elif self.now_at("&"):
		    self.parse_ent_ref()
		elif self.now_at("<!--"):
		    self.parse_comment()
		elif self.now_at("<?"):
		    self.parse_pi()
		elif self.now_at("<![CDATA["):
		    self.parse_cdata()
		elif self.now_at("<!DOCTYPE"):
		    self.parse_doctype()
		else:
		    self.err.fatal("Illegal construct")
		    self.pos=self.pos+1 # Avoid endless loop

	except OutOfDataException,e:
	    if self.final:
		raise sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2]
	        #raise e
	    else:
		self.pos=prepos  # Didn't complete the construct

    def set_dtd_listener(self,listener):
        "Registers an object that listens for DTD parse events."
        self.dtd_listener=listener
                
    def parseStart(self):
	"Must be called before parsing starts. (Notifies application.)"
	self.app.doc_start()

    def parseEnd(self):
	"""Must be called when parsing is finished. (Does some checks and "
	"notifies the application.)"""	    
	if self.stack!=[] and self.ent_stack==[]:
	    self.err.fatal("Premature document end, element '%s' not "
			   "closed" % (self.stack[-1]))
	elif not self.seen_root:
	    self.err.fatal("Premature document end, no root element")

	self.app.doc_end()
	    
    def parse_start_tag(self):
	"Parses the start tag."
	self.pos=self.pos+1 # Skips the '<'
	name=self.get_match(reg_name)
	self.skip_ws()

	attrs={}
	while not (self.test_str("/>") or self.test_str(">")):
	    a_name=self.get_match(reg_name)
	    self.skip_ws()
	    if not self.now_at("="):
		self.err.fatal("'=' expected")
	    self.skip_ws()

	    a_val=self.parse_string(1) # Substitutes entities etc

	    if attrs.has_key(a_name):
		self.err.fatal("Attribute '%s' occurs twice" % a_name)
		
	    attrs[a_name]=a_val		    
	    self.skip_ws()
            
        # --- Insert default attributes
        try:
            element=self.dtd.get_elem(name)
            for attr in element.get_attr_list():
                decl=element.get_attr(attr)

                if decl.decl=="#FIXED":
                    try:
                        if attrs[attr]!=decl.default:
                            self.err.error("Actual value of attribute '%s' "
                                           "does not match fixed value" % attr)
                    except KeyError,e:
                        attrs[attr]=decl.default
                elif decl.decl=="#DEFAULT" and not attrs.has_key(attr):
                    attrs[attr]=decl.default
        except KeyError,e:
            pass
        
	# --- Take care of the tag
	    
	if self.stack==[] and self.seen_root:
	    self.err.fatal("Elements not allowed outside root element")
	    
	self.seen_root=1
	self.stack.append(name)
	    
	if self.now_at(">"):
	    self.app.handle_start_tag(name,attrs)
	else:
	    self.now_at("/>")
	    self.app.handle_start_tag(name,attrs)
	    del self.stack[-1]
	    self.app.handle_end_tag(name)

    def parse_end_tag(self):
	"Parses the end tag from after the '</' and beyond '>'."
	name=self.get_match(reg_name)
	self.skip_ws()
	if not self.now_at(">"): self.err.fatal("'>' expected")

	try:
	    if not name==self.stack[-1]:
		self.err.fatal("End tag for '%s' seen, but '%s' expected" \
			       % (name,self.stack[-1]))

		# Let's do some guessing in case we continue
		if len(self.stack)>1 and self.stack[-2]==name:
		    del self.stack[-1]
		    del self.stack[-1]
	    else:
		del self.stack[-1]
	except IndexError,e:
	    self.err.fatal("Element '%s' not open" % name)

	self.app.handle_end_tag(name)

    def parse_data(self):
	"Parses character data."
	mo=reg_c_data.search(self.data,self.pos)
	if mo==None:
	    if not self.final:
		raise OutOfDataException()

	    start=self.pos
	    end=self.datasize
	    self.pos=self.datasize
	else:
	    start=self.pos
	    end=mo.end(0)-1
	    self.pos=mo.end(0)-1

	if string.find(self.data,"]]>",start,end)!=-1:
	    self.pos=string.find(self.data,"]]>",start,end)
	    self.err.fatal("']]>' must not occur in character data")   

	if self.stack==[]:
	    res=reg_ws.match(self.data,start)

	    if res!=None and res.end(0)==self.datasize:
		dec=1
	    else:
		dec=0

	    if res==None or res.end(0)-dec!=end:
		self.err.fatal("Character data not allowed outside root "
			       "element")
	    
	self.app.handle_data(self.data,start,end)
	
    def parse_charref(self):
	"Parses a character reference."
	if self.now_at("x"):
	    digs=unhex(self.get_match(reg_hex_digits))
	else:
	    digs=string.atoi(self.get_match(reg_digits))

	if not self.now_at(";"): self.err.fatal("';' expected")
	    
	if not (digs==9 or digs==10 or digs==13 or \
		(digs>=32 and digs<=255)):
	    if digs>255:
		self.err.fatal("Unsupported character")
	    else:
		self.err.fatal("Illegal character")
	else:
	    if self.stack==[]:
		self.err.fatal("Character data not allowed outside root "
			       "element")
	    self.app.handle_data(chr(digs),0,1)

    def parse_cdata(self):
	"Parses a CDATA marked section from after the '<![CDATA['."
	new_pos=self.get_index("]]>")
	if self.stack==[]:
	    self.err.fatal("Character data not allowed outside root element")
	self.app.handle_data(self.data,self.pos,new_pos)
	self.pos=new_pos+3

    def parse_ent_ref(self):
	"Parses a general entity reference from after the '&'."
	name=self.get_match(reg_name)
	if not self.now_at(";"): self.err.fatal("';' expected")

        try:
            ent=self.ent.resolve_ge(name)
	except KeyError,e:
	    self.err.fatal("Unknown entity '%s'" % name)
            return

	if ent.name in self.open_ents:
	    self.err.fatal("Entity recursion detected")
	    return
	else:
	    self.open_ents.append(ent.name)

	if self.stack==[]:
	    self.err.fatal("Entity references not allowed outside root "
			   "element")
	    
	if ent.is_internal():
	    self.push_entity(self.get_current_sysid(),ent.value)
	    self.do_parse()
	    self.flush()
	    self.pop_entity()
	else:
	    if ent.notation!="":
		self.err.fatal("Unparsed entities not allowed as general "
			       "entity references in element content")

            sysid=self.app.resolve_entity(ent.get_pubid(),ent.get_sysid())
	    self.open_entity(sysid)

	del self.open_ents[-1]
	
    def parse_doctype(self):
	"Parses the document type declaration."

	if self.seen_doctype:
	    self.err.fatal("Multiple document type declarations")
	if self.seen_root:
	    self.err.fatal("Document type declaration not allowed "
			   "inside root element")
	
	self.skip_ws(1)
	rootname=self.get_match(reg_name)
	self.skip_ws(1)

	if self.now_at("SYSTEM"):
	    self.skip_ws(1)
	    sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
					   ("'",reg_sysid_apo)])
	    pub_id=""
	elif self.now_at("PUBLIC"):
	    self.skip_ws(1)
	    pub_id=self.get_wrapped_match([("\"",reg_pubid_quote),\
					   ("'",reg_pubid_apo)])
	    self.skip_ws(1)
	    sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
					   ("'",reg_sysid_apo)])
	else:
	    pub_id=""
	    sys_id=""

	self.skip_ws()
	if pub_id!="" or sys_id!="":
	    self.app.handle_doctype(rootname,pub_id,sys_id)
	
	if self.now_at("["):
	    self.parse_internal_dtd()    
	elif not self.now_at(">"):
	    self.err.fatal("'>' expected")

	self.seen_doctype=1 # Has to be at the end to avoid block trouble
    
    def parse_internal_dtd(self):
	"Parse the internal DTD beyond the ']>'."

	self.set_start_point() # Record start of int_subset, preserve data
	self.update_pos()
	line=self.line
	lb=self.last_break
	
	while 1:
	    self.find_reg(reg_int_dtd)

	    if self.now_at("\""): self.scan_to("\"")
	    elif self.now_at("'"): self.scan_to("'")
	    elif self.now_at("<?"): self.scan_to("?>")
	    elif self.now_at("<!--"): self.scan_to("-->")
	    elif self.now_at("<!["): self.scan_to("]]>")
	    elif self.now_at("]>"): break

	# [:-2] cuts of the "]>" at the end
	self.handle_internal_dtd(line,lb,self.get_region()[:-2])

    def parse_xml_decl(self):
	"Parses the contents of the XML declaration from after the '!xml'."

	self.update_pos()
	if self.get_column()!=5 or self.get_line()!=1:
	    self.err.fatal("XML declaration must appear first in "
			   "document")
	if self.seen_xmldecl: # Set in parse_pi, to avoid block problems
	    self.err.fatal("Multiple XML declarations")

	enc=""
	sddecl=""
	self.skip_ws()
	if self.now_at("version"):
	    self.skip_ws()
	    if not self.now_at("="): self.err.fatal("'=' expected")
	    self.skip_ws()
	    ver=self.get_match(reg_ver)[1:-1]
	    if ver!="1.0":
		self.err.fatal("Unsupported XML version")

	    self.skip_ws()
	else:
	    self.err.fatal("XML version info missing on XML declaration")

	try:
	    tst=self.now_at("encoding")
	except OutOfDataException,e:
	    tst=0 # It's OK

	if tst:
	    self.skip_ws()
	    if not self.now_at("="): self.err.fatal("'=' expected")
	    self.skip_ws()
	    enc=self.get_match(reg_enc_name)[1:-1]
	    if enc!="UTF-8" and enc!="ISO-8859-1":
		self.err.fatal("Unsupported character encoding")
	    self.skip_ws()	    

	try:
	    tst=self.now_at("standalone")
	except OutOfDataException,e:
	    tst=0

	if tst:
	    self.skip_ws()
	    if not self.now_at("="): self.err.fatal("'=' expected")
	    self.skip_ws()
	    sddecl=self.get_match(reg_std_alone)[1:-1]
	    self.standalone= sddecl=="yes"

	    self.skip_ws()

	self.skip_ws()
	self.app.set_entity_info(ver,enc,sddecl)
	
    def handle_internal_dtd(self,doctype_line,doctype_lb,int_dtd):
	"Handles the internal DTD."
	p=DTDParser()
	p.set_error_handler(self.err)
	p.set_dtd_consumer(self.dtd)
        if self.dtd_listener!=None:
            self.dtd.set_dtd_listener(self.dtd_listener)
	p.set_internal(1)
	self.err.set_locator(p)

	try:
	    try:		
		p.line=doctype_line
		p.last_break=doctype_lb
		
		p.set_sysid(self.get_current_sysid())
		p.feed(int_dtd)
	    except OutOfDataException,e:
		self.err.fatal("Premature end of internal DTD subset")
	finally:
	    self.err.set_locator(self)
	    self.dtd.dtd_end()
	    
# ==============================
# A DTD parser
# ==============================
	    
class DTDParser(AbstractXML):
    "A parser for XML DTDs, both internal and external."

    def __init__(self):
	EntityParser.__init__(self)
	self.internal=0
	self.dtd=DTDConsumer(self)

	self.ignore=0 # Currently in a conditional section marked ignore?
	self.section_stack=[] # Conditional section nesting tracker

    def set_dtd_consumer(self,dtd):
	"Tells the parser where to send DTD information."
	self.dtd=dtd
	
    def set_internal(self,yesno):
	"Tells the parser whether the DTD is internal or external."
	self.internal=yesno
	
    def do_parse(self):
	"Does the actual parsing."

	try:
	    self.skip_ws()
	    while self.pos<self.datasize:
		prepos=self.pos
		
		if self.now_at("<!ELEMENT"):
		    self.parse_elem_type()
		elif self.now_at("<!ENTITY"):
		    self.parse_entity()
		elif self.now_at("<!ATTLIST"):
		    self.parse_attlist()
		elif self.now_at("<!NOTATION"):
		    self.parse_notation()
		elif self.test_reg(reg_pe_ref):
		    self.parse_pe_ref()
		elif self.now_at("<?"):
		    print "foo"
		    self.parse_pi()
		elif self.now_at("<!--"):
		    self.parse_comment()
		elif self.now_at("<!["):
		    self.parse_conditional()
		elif self.now_at("]]>") and self.section_stack!=[]:
		    self.ignore=self.section_stack[-1]
		    del self.section_stack[-1]
		else:
		    self.err.fatal("Illegal construct")
		    self.pos=self.pos+1

		self.skip_ws()

	except OutOfDataException,e:
	    if self.final:
		raise e
	    else:
		self.pos=prepos

    def parse_entity(self):
	"Parses an entity declaration."

	self.skip_ws(1)
	if self.now_at("%"):
	    pedecl=1
	    self.skip_ws(1)
	else:
	    pedecl=0
	
	ent_name=self.get_match(reg_name)
	self.skip_ws(1)

	pub_id=""
	if self.now_at("SYSTEM"):
	    internal=0
	    self.skip_ws(1)
	    sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
					   ("'",reg_sysid_apo)])
	elif self.now_at("PUBLIC"):
	    internal=0
	    self.skip_ws(1)
	    pub_id=self.get_wrapped_match([("\"",reg_pubid_quote),\
					   ("'",reg_pubid_apo)])

	    # Parsing alternate sys_id
	    self.skip_ws(1)
	    sys_id=self.get_wrapped_match([("\"",reg_sysid_quote),\
					   ("'",reg_sysid_apo)])
	else:
	    internal=1
	    ent_val=self.parse_string(0)

	self.skip_ws()
	if not internal and self.now_at("NDATA"):
	    # Parsing the optional NDataDecl
	    if pedecl:
		self.err.fatal("Parameter entities cannot be unparsed")
	    self.skip_ws()

	    ndata=self.get_match(reg_name)
	else:
	    ndata=""
	    
	if not self.now_at(">"):
	    self.err.fatal("Entity declaration incorrectly terminated, '>' "
			   "expected")

        if pedecl:
            if internal:
                self.dtd.new_parameter_entity(ent_name,ent_val)
            else:
                self.dtd.new_external_pe(ent_name,pub_id,sys_id)
        else:
            if internal:
                self.dtd.new_general_entity(ent_name,ent_val)
            else:
                self.dtd.new_external_entity(ent_name,pub_id,sys_id,ndata)

    def parse_notation(self):
	"Parses a notation declaration."
	self.skip_ws(1)
	name=self.get_match(reg_name)
	self.skip_ws(1)

	pubid=""
	sysid=""
	if self.now_at("PUBLIC"):
	    self.skip_ws()
	    pubid=self.get_wrapped_match([("\"",reg_pubid_quote),
					  ("'",reg_pubid_apo)])
	    self.skip_ws(1)
	    if self.test_str("\"") or self.test_str("'"):
		sysid=self.get_wrapped_match([("\"",reg_sysid_quote),
					      ("'",reg_sysid_apo)])
	elif self.now_at("SYSTEM"):
	    self.skip_ws(1)
	    sysid=self.get_wrapped_match([("\"",reg_sysid_quote),
					  ("'",reg_sysid_apo)])
	else:
	    self.err.fatal("'PUBLIC' or 'SYSTEM' expected")
	    self.scan_to(">")
	    self.pos=self.pos-1  # Rewinding to '>'

	self.skip_ws()
	if not self.now_at(">"):
	    self.err.fatal("'>' expected")

	self.dtd.new_notation(name,sysid,pubid)

    def parse_pe_ref(self):
	"Parses a reference to a parameter entity."
	pe_name=self.get_match(reg_pe_ref)[1:-1]

        try:
            ent=self.dtd.resolve_pe(pe_name)
	except KeyError,e:
	    self.err.fatal("Unknown parameter entity '%s'" % name)
            return 

	if ent.is_internal():
	    self.push_entity(self.get_current_sysid(),ent.value)
	    self.do_parse()
	    self.pop_entity()
	else:
	    self.open_entity(ent.sysid) # Does parsing and popping
	    
    def parse_attlist(self):
	"Parses an attribute list declaration."

	self.skip_ws(1)
	elem=self.get_match(reg_name)
	self.skip_ws(1)

	while not self.test_str(">"):
	    attr=self.get_match(reg_name)
	    self.skip_ws(1)

	    if self.test_reg(reg_attr_type):
		a_type=self.get_match(reg_attr_type)
	    elif self.now_at("NOTATION"):
		self.skip_ws(1)
		a_type=("NOTATION",self.__parse_list(reg_name,"|"))
	    elif self.now_at("("):
		self.pos=self.pos-1 # Does not expect '(' to be skipped
		a_type=self.__parse_list(reg_nmtoken,"|")
	    else:
		self.err.fatal("Expected type or alternative list")
		self.scan_to(">")
		return
	    
	    self.skip_ws(1)

	    if self.test_reg(reg_attr_def):
		a_decl=self.get_match(reg_attr_def)
		a_def=""
	    elif self.now_at("#FIXED"):
		self.skip_ws(1)
		a_decl="#FIXED"
		a_def=self.parse_string(0)
	    else:
		a_decl="#DEFAULT"
		a_def=self.parse_string(0)
	    
	    self.skip_ws()

	    self.dtd.new_attribute(elem,attr,a_type,a_decl,a_def)

	self.pos=self.pos+1 # Skipping the '>'

    def parse_elem_type(self):
	"Parses an element type declaration."

	self.skip_ws(1)
	elem_name=self.get_match(reg_name)
	self.skip_ws(1)

	# content-spec
	if self.now_at("EMPTY"):
	    elem_cont=None
	elif self.now_at("ANY"):
	    elem_cont=1 
	elif self.now_at("("):
	    elem_cont=self.parse_content_model()
	else:
	    self.err.fatal("Invalid content declaration")
	    elem_cont=None

	self.skip_ws()
	if not self.now_at(">"):
	    self.err.fatal("Element declaration incorrectly terminated,"+\
			   "'>' expected.")

	self.dtd.new_element_type(elem_name,elem_cont)

    def parse_content_model(self,level=0):
	"""Parses the content model of an element type declaration. Level
	tells the function if we are on the top level (=0) or not (=1)."""

	# Creates a content list with separator first
	cont_list=[]
	sep="" 
	
	if self.now_at("#PCDATA") and level==0:
	    return self.parse_mixed_content_model()

	while 1:
	    self.skip_ws()
	    if self.now_at("("):
		cp=self.parse_content_model(1)
	    else:
		cp=self.get_match(reg_name)

	    self.skip_ws()

	    if self.test_str("?") or self.test_str("*") or self.test_str("+"):
		mod=self.data[self.pos]
		self.pos=self.pos+1
	    else:
		mod=""
	    cont_list.append((cp,mod))

	    self.skip_ws()
	    
	    if self.now_at(")"):
		break

	    if sep=="":
		if self.test_str("|") or self.test_str(","):
		    sep=self.data[self.pos]
		    self.pos=self.pos+1
		else:
		    self.err.fatal("Unknown separator")
	    else:
		if not self.now_at(sep):
		    self.err.fatal("Mixing of choice and sequence lists!")
		    
	if self.test_str("+") or self.test_str("?") or self.test_str("*"):
	    mod=self.data[self.pos]
	    self.pos=self.pos+1
	else:
	    mod=""

	if sep==",":
	    return SeqContentModel(cont_list,mod)
	elif sep=="|":
	    return ChoiceContentModel(cont_list,mod)
	elif sep=="":
	    if mod=="":
		# Pick out modifier from content list, if any
		mod=cont_list[0][1]
	    return ContentModel(cont_list,mod)

    def parse_conditional(self):
	"Parses a conditional section."	
	if self.internal:
	    self.err.fatal("Conditional sections not allowed in internal "
			   "subset")
	    ignore=1
	    self.scan_to("[")
	else:
	    self.skip_ws()

	    if self.now_at("IGNORE"):
		ignore=1
	    elif self.now_at("INCLUDE"):
		ignore=0
	    else:
		self.err.fatal("'IGNORE' or 'INCLUDE' expected")
		self.scan_to("[")
		ignore=1

	    self.skip_ws()
	    if not self.now_at("["):
		self.err.fatal("'[' expected")

	self.section_stack.append(self.ignore)
	self.ignore=ignore or self.ignore		    
	
    def parse_mixed_content_model(self):
	"Parses mixed content models. Ie: ones containing #PCDATA."

	cont_list=[("#PCDATA","")]
	sep="?"

	while 1:
	    self.skip_ws()
	    if self.now_at("|"):
		sep="|"
	    elif self.now_at(")"):
		break
	    else:
		self.err.fatal("'|' expected")

	    self.skip_ws()
	    cont_list.append((self.get_match(reg_name),""))

	if sep=="|" and not self.now_at("*"):
	    self.err.fatal("'*' expected.")

	return ChoiceContentModel(cont_list,"+") 
	
    def __parse_list(self, elem_regexp, separator):
	"Parses a '(' S? elem_regexp S? separator ... ')' list. (Internal.)"

	list=[]
	self.skip_ws()
	if not self.now_at("("):
	    self.err.fatal("'(' expected")

	while 1:
	    self.skip_ws()
	    list.append(self.get_match(elem_regexp))
	    self.skip_ws()
	    if self.now_at(")"):
		break
	    elif not self.now_at(separator):
		self.err.fatal("Expected ')' or '%s'" % separator)
		break

	return list
