
"""
This is a convenience XML utility library for those who want to
make XML software in Python.

It's currently not released, because it needs much more work, but it's
included in the xmlproc package because xmlproc uses it.

Version 0.01 - 12.Mar.98
   Lars Marius Garshol - larsga@ifi.uio.no
   http://www.stud.ifi.uio.no/~larsga/download/python/xml/
"""
   
import string,re

class XML_catalog:
    """An SGML Open catalog file handler
    
    Only handles a simple syntax subset. Each entry must be on separate
    lines and all non-keywords must be quoted with \"s. Comments must be on
    separate lines and start the line with '--'.

    (The bit below is ugly due to a gendoc bug. Sorry.)
    
    Omissions:      
    - LINKTYPE: does not apply to XML
    - SGMLDECL: does not apply to XML
    - OVERRIDE: not fully understood yet
    - SYSTEM:   not fully understood yet
    - DOCUMENT: does not seem too useful
    - BASE:     too complex for now
    - DELEGATE: too complex for now"""

    def __init__(self):
	self.fpi_hash={}
	self.g_entity_hash={}
	self.p_entity_hash={}
	self.doctype_hash={}
	self.not_hash={}
	self.unread_catalogs=[]

    def read_catalog(self,infile):
	"Reads a catalog file from the infile parameter."
	
	while 1:
	    line=infile.readline()
	    if line=="": break

	    # Breaking up the line
	    fields=[]
	    pos=string.index(line," ")
	    fields.append(line[:pos])

	    pos=string.index(line,"\"",pos)+1
	    pos2=string.index(line,"\"",pos)
	    fields.append(line[pos:pos2])

	    pos=string.index(line,"\"",pos2+1)+1
	    pos2=string.index(line,"\"",pos)
	    fields.append(line[pos:pos2])

	    if fields[0]=="PUBLIC":
		self.fpi_hash[fields[1]]=fields[2]
	    elif fields[0]=="ENTITY":
		if fields[1][0]=="%":
		    self.p_entity_hash[fields[1][1:]]=fields[2]
		else:
		    self.g_entity_hash[fields[1]]=fields[2]
	    elif fields[0]=="DOCTYPE":
		self.doctype_hash[fields[1]]=fields[2]
	    elif fields[0]=="NOTATION":
		self.not_hash[fields[1]]=fields[2]
	    elif fields[0]=="CATALOG":
		self.unread_catalogs.append(fields[1])
	    else:
		raise XMLError("Unknown catalog file entry",line[:7])

    def resolve_fpi(self,fpi):
	return self.fpi_hash[fpi]

    def resolveDocumentFPI(self,fpi):
	"Returns the correct system identifier for this document FPI."
	try:
	    return self.fpi_hash[fpi]
	except KeyError,e:
	    return ""

    def resolveDoctypeFPI(self,fpi):
	"Returns the correct system identifier for this document type FPI."
	try:
	    try:
		return self.doctype_hash[fpi]
	    except KeyError,e:
		return self.fpi_hash[fpi]
	except KeyError,e:
	    return ""

    def resolveGE_FPI(self,fpi):
	"Returns the correct system identifier for this general entity FPI."
	try:
	    try:
		return self.g_entity_hash[fpi]
	    except KeyError,e:
		return self.fpi_hash[fpi]
	except KeyError,e:
	    return ""

    def resolvePE_FPI(self,fpi):
	"Returns the correct system identifier for this general entity FPI."
	try:
	    try:
		return self.p_entity_hash[fpi]
	    except KeyError,e:
		return self.fpi_hash[fpi]
	except KeyError,e:
	    return ""

	
# A SAX entity handler that uses the catalog object

class CatalogEntityHandler:
    "Uses a catalog file to resolve entity system identifiers."

    def __init__(catalog):
	self.catalog=catalog

    def resolveEntity(entityName, publicID, systemID):
	"""Given the system identifier (URI) systemID, possibly
	accompanied by an entity name (entityName, with the special
	values '[document]' for the document entity and '[external
	dtd]' for the external DTD subset) and/or public identifier
	(publicID), return the system identifier that the parser
	should use to obtain the entity, or null, to instruct the
	parser to skip the entity (in which case the parser may report
	a validation error). In most cases, this method should return
	the suggested system identifier."""

	if publicID=="":
	    # check if this sysID is mapped to something else
	    return systemID
	
	if entityName=="[document]":
	    sysID=catalog.resolveDocumentFPI(publicID)
	elif entityName=="[external dtd]":
	    sysID=catalog.resolveDoctypeFPI(publicID)
	elif entityName=="":
	    sysID=systemID
	else:
	    sysID=catalog.resolveGE_FPI(publicID)	    

	# check if sysID mapped to something else
	return sysID
	    
    def changeEntity(self,systemID):
	"""Handle a change in the current entity URI. The systemID
	argument specifies the base URI that is now in force."""
	pass
    
# A collection of useful functions

# Utility functions

def unhex(hex_value):
    "Converts a string hex-value to an integer."

    sum=0
    for char in hex_value:
	sum=sum*16
	char=ord(char)
	
	if char<58 and char>=48:
	    sum=sum+(char-48)
	elif char>=97 and char<=102:
	    sum=sum+(char-87)
	elif char>=65 and char<=70:
	    sum=sum+(char-55)
	# else ERROR, but it can't occur here

    return sum

def matches(regexp,str):
    mo=regexp.match(str)
    return mo!=None and len(mo.group(0))==len(str)

# Some useful regexps

reg_ws=re.compile("[\n\t \r]+")
reg_ver=re.compile("\"[-a-zA-Z0-9_.:]+\"|'[-a-zA-Z0-9_.:]+'")
reg_enc_name=re.compile("\"[A-Za-z][-A-Za-z0-9._]*\"|"+\
			"'[A-Za-z][-A-Za-z0-9._]*'")
reg_std_alone=re.compile("\"(yes|no)\"|'(yes|no)'")
reg_comment_content=re.compile("([^-]|-[^-])*")
reg_name=re.compile("[A-Za-z_:][\-A-Za-z_:.0-9]*")
reg_names=re.compile("[A-Za-z_:][\-A-Za-z_:.0-9]*"
		     "([\n\t \r]+[A-Za-z_:][\-A-Za-z_:.0-9]*)*")
reg_nmtoken=re.compile("[\-A-Za-z_:.0-9]+")
reg_nmtokens=re.compile("[\-A-Za-z_:.0-9]+([\n\t \r]+[\-A-Za-z_:.0-9]+)*")
reg_sysid_quote=re.compile("[^\"]*")
reg_sysid_apo=re.compile("[^']*")
reg_pubid_quote=re.compile("[- \n\t\ra-zA-Z0-9'()+,./:=?;!*#@$_%]*")
reg_pubid_apo=re.compile("[- \n\t\ra-zA-Z0-9()+,./:=?;!*#@$_%]*")
reg_start_tag=re.compile("<[A-Za-z_:]")
reg_quoted_attr=re.compile("[^<\"]*")
reg_apo_attr=re.compile("[^<']*")
reg_c_data=re.compile("[<&]")
reg_pe_ref=re.compile("%[A-Za-z_:][\-A-Za-z_:.0-9]*;")

reg_ent_val_quote=re.compile("[^\"]+")
reg_ent_val_apo=re.compile("[^\']+")

reg_attr_type=re.compile(r"CDATA|IDREFS|IDREF|ID|ENTITY|ENTITIES|NMTOKENS|"
			 "NMTOKEN")
reg_attr_def=re.compile(r"#REQUIRED|#IMPLIED")

reg_digits=re.compile("[0-9]+")
reg_hex_digits=re.compile("[0-9a-fA-F]+")

reg_res_pi=re.compile("xml",re.I)

reg_int_dtd=re.compile("\"|'|<\\?|<!--|\\]>|<!\\[")

# RFC 1766 language codes

reg_lang_code=re.compile("([a-zA-Z][a-zA-Z]|[iIxX]-([a-zA-Z])+)(-[a-zA-Z])*")

# Some useful variables

predef_ents={"lt":"&#60;","gt":"&#62;","amp":"&#38;","apos":"&#39;",
             "quot":'&#34;'}

# Translation tables

ws_trans=string.maketrans("\r\t\n","   ")  # Whitespace normalization
id_trans=string.maketrans("","")           # Identity transform 
