
"""These are the DTD-aware classes of xmlproc. They provide both the
DTD event consumers for the DTD parser as well as the objects that
store DTD information for retrieval by clients (including the
validating parser)."""

import types

from xmlutils import *
from xmlapp import *

# ==============================
# WFC-DTD
# ==============================

class WFCDTD(DTDConsumer):
    "DTD-representing class for the WFC parser."

    def __init__(self,err):
	DTDConsumer.__init__(self,err)
	self.gen_ents={}
	self.param_ents={}
	self.elems={}

        self.dtd_listener=DTDConsumer(err)
        
	# Adding predefined entities
	for name in predef_ents.keys():
	    self.new_general_entity(name,predef_ents[name])

    def set_dtd_listener(self,listener):
        "Registers an object that listens for DTD parse events."
        self.dtd_listener=listener
            
    def resolve_pe(self,name):
	"""Returns the entitiy object associated with this parameter entity
        name. Throws KeyError if the entity is not declared."""
        return self.param_ents[name]

    def resolve_ge(self,name):
	"""Returns the entitiy object associated with this general entity
        name. Throws KeyError if the entity is not declared."""
        return self.gen_ents[name]

    def get_elem(self,name):
	"""Returns the declaration of this element. Throws KeyError if the
	element does not exist."""
	return self.elems[name]

    def new_attribute(self,elem,attr,a_type,a_decl,a_def):
	"Receives the declaration of a new attribute."
        self.dtd_listener.new_attribute(elem,attr,a_type,a_decl,a_def)
        
        if not self.elems.has_key(elem):
	    self.elems[elem]=ElementTypeAny(elem) # Adding dummy

        self.elems[elem].add_attr(attr,a_type,a_decl,a_def,self.err)
                
    # --- Echoing DTD parse events
    
    def new_general_entity(self,name,val):
        ent=InternalEntity(name,val)
	self.gen_ents[ent.name]=ent
        self.dtd_listener.new_general_entity(name,val)

    def new_parameter_entity(self,name,val):
        ent=InternalEntity(name,val)
	self.param_ents[ent.name]=ent
        self.dtd_listener.new_parameter_entity(name,val)

    def new_external_entity(self,ent_name,sys_id,pub_id,ndata):
        ent=ExternalEntity(ent_name,sys_id,pub_id,ndata)
	self.gen_ents[ent.name]=ent
        self.dtd_listener.new_external_entity(ent_name,sys_id,pub_id,ndata)

    def new_external_pe(self,name,sysid,pubid):
        ent=ExternalEntity(name,sysid,pubid,"")
	self.param_ents[ent.name]=ent
        self.dtd_listener.new_external_pe(name,sysid,pubid)
	
    def dtd_end(self):
        self.dtd_listener.dtd_end()
    
    def new_comment(self,contents):
        self.dtd_listener.new_comment(contents)

    def new_pi(self,target,rem):
        self.dtd_listener.new_pi(target,rem)
    
    def new_notation(self,name,sysid,pubid):
        self.dtd_listener.new_notation(name,sysid,pubid)

    def new_element_type(self,elem_name,elem_cont):
        self.dtd_listener.new_element_type(elem_name,elem_cont)
    
# ==============================
# DTD consumer for the validating parser
# ==============================
    
class CompleteDTD(WFCDTD):
    "Complete DTD handler for the validating parser."

    def __init__(self,err):
	WFCDTD.__init__(self,err)
	self.notations={}
	self.attlists={}  # Attribute lists of elements not yet declared
	self.root_elem=None
	
    def get_root_elem(self):
	"Returns the name of the declared root element."
	return self.root_elem

    def get_notation(self,name):
	"""Returns the declaration of the notation. Throws KeyError if the
        notation does not exist."""
        return self.notations[name]

    # --- DTD parse events

    def dtd_end(self):
	for elem in self.attlists.keys():
	    self.err.warning("Element '%s' has attribute list, but no element "
			     "declaration" % elem)

	del self.attlists  # Not needed any more, can free this memory
        self.dtd_listener.dtd_end()
	
    def new_notation(self,name,sysid,pubid):
	self.notations[name]=(sysid,pubid)
        self.dtd_listener.new_notation(name,sysid,pubid)

    def new_element_type(self,elem_name,elem_cont):
	if self.elems.has_key(elem_name):
	    self.err.fatal("Element '%s' declared twice" % elem_name)

	if elem_cont==None:
	    model=make_empty_model()
	elif elem_cont!=1: 
	    model=fnda2fda(elem_cont.get_states(),self.err)

	if elem_cont==1:
	    self.elems[elem_name]=ElementTypeAny(elem_name)
	else:
	    self.elems[elem_name]=ElementType(elem_name,model)

	if self.attlists.has_key(elem_name):
	    for (attr,a_type,a_decl,a_def) in self.attlists[elem_name]:
		self.elems[elem_name].add_attr(attr,a_type,a_decl,a_def,\
					       self.err)
	    del self.attlists[elem_name]
            
        self.dtd_listener.new_element_type(elem_name,elem_cont)
	        
    def new_attribute(self,elem,attr,a_type,a_decl,a_def):
	"Receives the declaration of a new attribute."
        self.dtd_listener.new_attribute(elem,attr,a_type,a_decl,a_def)
	try:
	    self.elems[elem].add_attr(attr,a_type,a_decl,a_def,self.err)
	except KeyError,e:
	    try:
		self.attlists[elem].append((attr,a_type,a_decl,a_def))
	    except KeyError,e:
		self.attlists[elem]=[(attr,a_type,a_decl,a_def)]
                
# ==============================
# Represents an XML element type
# ==============================
    
class ElementType:
    "Represents an element type."

    def __init__(self,name,cont):
	self.name=name
	self.attrhash={}
	self.content_model=cont

    def get_name(self):
	"Returns the name of the element type."
	return self.name
	
    def get_attr_list(self):
	"Returns a list of the declared attribute names."
	return self.attrhash.keys()
	
    def get_attr(self,name):
	"Returns the attribute or throws a KeyError if it's not declared."
	return self.attrhash[name]
	
    def add_attr(self,attr,a_type,a_decl,a_def,err):
	"Adds a new attribute to the element."
	if self.attrhash.has_key(attr):
	    err.warning("Attribute '%s' defined more than once" % attr)
	
	if a_type=="ID":
	    for attr_name in self.attrhash.keys():
		if self.attrhash[attr_name].type=="ID":
		    err.error("Only one ID attribute allowed on each element "
			      "type")

	    if a_decl!="#REQUIRED" and a_decl!="#IMPLIED":
		err.error("ID attributes cannot be #FIXED")
	
	self.attrhash[attr]=Attribute(attr,a_type,a_decl,a_def)
	
    def get_start_state(self):
	"Return the start state of this content model."
	return self.content_model["start"]
	
    def final_state(self,state):
	"True if 'state' is a final state."
	return self.content_model["final"]<=state
	
    def next_state(self,state,elem_name):
	"""Returns the next state of the content model from the given one
        when elem_name is encountered. Character data is represented as
        '#PCDATA'. If 0 is returned the element is not allowed here."""
	
	for (to,trans_name) in self.content_model[state]:
	    if trans_name==elem_name:
		return to

	return 0

# --- Element types with ANY content

class ElementTypeAny(ElementType):

    def __init__(self,name):
	ElementType.__init__(self,name,None)

    def get_start_state(self):
	return 1

    def final_state(self,state):
	return 1

    def next_state(self,state,elem_name):
	return 1
    
# ==============================
# Attribute
# ==============================

class Attribute:
    "Represents a declared attribute."

    def __init__(self,name,type,decl,default):
	self.name=name
	self.type=type
	self.decl=decl
	self.default=default

        # Handling code for special attribute xml:space
        
        if name=="xml:space":
            if type(self.type)==types.StringType:
                err.error("xml:space must be declared an enumeration type")
                return

            if len(self.type)!=2:
                error=1
            else:
                if (self.type[0]=="default" and self.type[1]=="preserve") or \
                   (self.type[1]=="default" and self.type[0]=="preserve"):
                    error=0
                else:
                    error=1

            if error:
                err.error("xml:space must have exactly the values 'default'"
                          " and 'preserve'")                            
                
    def validate(self,value,err):
	"Validates given value for correctness."

	if type(self.type)!=types.StringType:
	    for val in self.type:
		if val==value: return
	    err.error("'%s' is not an allowed value for the '%s' attribute"\
		      % (value,self.name))
	elif self.type=="CDATA":
	    return
	elif self.type=="ID" or self.type=="IDREF":
	    if not matches(reg_name,value):
		err.error("Value of '%s' attribute must be a valid name" %\
			  self.name)
	elif self.type=="NMTOKEN":
	    if not matches(reg_nmtoken,value):
		err.error("Value of '%s' attribute not a valid name token" %\
			  self.name)
	elif self.type=="NMTOKENS":
	    if not matches(reg_nmtokens,value):
		err.error("Value of '%s' attribute not a valid name token" \
			  " sequence" % self.name)

    def get_name(self):
        "Returns the attribute name."
        return self.name
        
    def get_type(self):
        "Returns the type of the attribute. (ID, CDATA etc)"
        return self.type

    def get_decl(self):
        "Returns the declaration (#IMPLIED, #REQUIRED, #FIXED or #DEFAULT)."
        return self.decl

    def get_default(self):
        """Returns the default value of the attribute, or None if none has
        been declared."""
        return self.default    
                
# ==============================
# Entities
# ==============================

class InternalEntity:

    def __init__(self,name,value):
	self.name=name
	self.value=value

    def is_internal(self):
	return 1

class ExternalEntity:

    def __init__(self,name,pubid,sysid,notation):
	self.name=name
	self.pubid=pubid
	self.sysid=sysid
	self.notation=notation

    def is_parsed(self):
        "True if this is a parsed entity."
	return self.notation==""
	
    def is_internal(self):
	return 0

    def get_pubid(self):
        "Returns the public identifier of the entity."
        return self.pubid

    def get_sysid(self):
        "Returns the system identifier of the entity."
        return self.sysid
    

# ==============================
# Internal classes
# ==============================
    
# Content model class

class ContentModel:
    "Represents a content model. (Internal.)"

    def __init__(self,contents,modifier):
	self.contents=contents
	self.modifier=modifier

    def __getitem__(self,itemno):
	"Returns the requested content particle from the content model."
	return self.contents[itemno]

    def get_states(self):
	if type(self[0][0])==types.InstanceType:
	    transitions=self[0][0].get_states()
	else:
	    transitions=[[(1,self[0][0])],[]]

	first=0
	last=len(transitions)-1
	if self.modifier=="?":
	    transitions[first].append((last,""))
	elif self.modifier=="+":
	    transitions[last].append((first,""))	    
	elif self.modifier=="*":
	    transitions[first].append((last,""))
	    transitions[last].append((first,""))
	
	return transitions

# Sequential content model
    
class SeqContentModel(ContentModel):
    "Represents a sequential content model. (Internal.)"
    
    def get_states(cont_list):
	"Sketch of a function that creates a state model from the content\
	model."

	transitions=[[]]
	cur_state=0

	for (cp,mod) in cont_list:
	    orig_state=cur_state
	    if types.InstanceType==type(cp):
		for newstate in cp.get_states():
		    for (to,what) in newstate:
			transitions[cur_state].append((to+orig_state,what))
		    cur_state=cur_state+1
		    transitions.append([])
		transitions=transitions[:-1]
		cur_state=cur_state-1  # Siste m smeltes inn med gammel
	    else:
		cur_state=cur_state+1
		transitions.append([])
		transitions[orig_state].append((cur_state,cp))

	    if mod=="?":
		transitions[orig_state].append((cur_state,""))
	    elif mod=="+":
		transitions[cur_state].append((orig_state,""))
	    elif mod=="*":
		transitions[orig_state].append((cur_state,""))
		transitions[cur_state].append((orig_state,""))

	first=0
	last=len(transitions)-1
	if cont_list.modifier=="?":
	    transitions[first].append((last,""))
	elif cont_list.modifier=="+":
	    transitions[last].append((first,""))	    
	elif cont_list.modifier=="*":
	    transitions[first].append((last,""))
	    transitions[last].append((first,""))
	
	return transitions

# Choice content model

class ChoiceContentModel(ContentModel):
    "Represents a choice content model. (Internal.)"

    def get_states(cont_list):
	"Sketch of a function that creates a state model from the content\
	model."

	transitions=[[],[]]
	for (cp,mod) in cont_list:
	    transitions[0].append((1,cp))

	first=0
	last=len(transitions)-1
	if cont_list.modifier=="?":
	    transitions[first].append((last,""))
	elif cont_list.modifier=="+":
	    transitions[last].append((first,""))	    
	elif cont_list.modifier=="*":
	    transitions[first].append((last,""))
	    transitions[last].append((first,""))

	return transitions
    
# ==============================
# Conversion of FDAs
# ==============================

def hash(included):
    "Creates a hash number from the included array."
    no=0
    exp=1
    for state in included:
	if state:
	    no=no+exp
	exp=exp*2

    return no

def fnda2fda(transitions,err):
    """Converts a finite-state non-deterministic automaton into a deterministic
    one."""

    transitions.append([])
    new_states={}

    # Compute the e-closure of the start state
    included=[0]*len(transitions)
    compute_closure(0,included,transitions)
    state_key=hash(included)

    # Add transitions and the other states
    add_transitions(0,transitions,new_states,included,state_key,err)

    for state in new_states.keys():
	if state % 2==1:
	    new_states["start"]=state
	    break

    new_states["final"]=pow(2,len(transitions)-2) # 2 because we add []

    return new_states
    
def add_transitions(ix,transitions,new_states,included,state_key,err):
    "Set up transitions and create new states."

    if new_states.has_key(state_key):
	return                   # We've been here before
    else:
	new_states[state_key]=[] # OK, a new one, create it
    
    no=0
    for old_state in included:
	if old_state:
	    for (to,what) in transitions[no]:
		if what!="":
		    new_inc=[0]*len(transitions)
		    compute_closure(to,new_inc,transitions)
		    new_state=hash(new_inc)

		    for (to,over) in new_states[state_key]:
			if over==what:
			    err.warning("Ambiguous content model")
		    
		    new_states[state_key].append(new_state,what)
		    add_transitions(to,transitions,new_states,new_inc,\
				    new_state,err)

	no=no+1

    
def compute_closure(ix,included,transitions):
    "Computes the e-closure of this state."
    if included[ix]: return
    
    included[ix]=1
    for (to,what) in transitions[ix]:
	if what=="": compute_closure(to,included,transitions)

def print_states(states):
    assert not (states.has_key("start") or states.has_key("final"))
    
    for trans_key in states.keys():
	trans=states[trans_key]
	print "State: "+`trans_key`
	for (to,what) in trans:
	    try:
		print "  To: "+`to`+" over: "+what
	    except TypeError,e:
		print "ERROR: "+`what`

    raw_input()

def make_empty_model():
    "Constructs a state model for empty content models."
    return { 1:[], "final":1, "start":1 }
