;;    
;;
;;      Title : D-Parse
;;
;;      Function : An active chart parser with dictionary call
;;
;;      Author :   Alan W Black   November 1984
;;                 Dept of A.I.  University of Edinburgh
;;
;;      Copyright Graeme Ritchie, Alan Black,
;;                Steve Pulman and Graham Russell  1987
;;
;;         ---------------------------------------------
;;         |    Not to be used for military purposes   |
;;         ---------------------------------------------
;;
;;            >>>  TERM UNIFICATION VERSION <<<
;;
;;      Description :
;;        This forms the parser (morphology part) of the analyser.
;;
;;        The algorithm is based mainly on that at the end of Chapter 3 
;;        in Winograd  "Language as a cognitive process".  Also
;;        Thompson and Ritchie in O'Shea and Eisenstadt
;;        The unification stuff is along the lines of the HUG system
;;        (Karttunen SRI paper 1985)
;;
;;        The grammar formalism is based on GPSG (Gazdar and Pullum 1982)
;;        and is held in the global variable D-GRAMMAR.
;;
;;        In addition to the normal chart parser these functions
;;        call the morphographemic analyser (D-Recog)  which 
;;        segments words into morphemes using spelling rules.
;;
;;      1.2   13th February 1985
;;        Made changes to try and cut down the search time in GetRules
;;        Now rules are 'typed' into noun verb preposition adjective and other
;;        each typelist is held as a property of D-GRAMMAR.
;;      1.3   28th February 1985
;;        Change parser to run bottom up, and at the same time made
;;        changes to structure of code (although weakly equivalent)
;;      1.5   29th March 1985
;;        No longer expand feature value variables at compile time but
;;        have them instantiated on the fly during parsing. 
;;      1.13  16th December 1985
;;        Try to tidy this up and increase the speed.
;;      1.15  24th February 1986
;;        Try to correct bugs (restrictions) in the unification
;;        algorithm so that derivations have all variables
;;        instantiated where possible.  Also had a go at speed ups
;;        There were so over generalisations that I have now removed.
;;      1.17  3rd April 1986
;;         Made this part only deal with the chart parsing.  The analysis
;;         of the chart is now done by functions in the file analyse.
;;      2.2  14th October
;;         Changed daughter convention to act on each feature
;;         rather than whole group.
;;      2.4  3rd February 1987
;;         Made feature passing conventions work only on binary branching
;;         rules.
;;      2.7  1st June 1987
;;         Two versions Term unficiation adn Bay Area Unfication versions
;;
;;      Returns:
;;        The initial vertex of the chart
;;
;;      External references :
;;
;;        included by analyse
;;
;;        D-GRAMMAR       This should contain the GPSG(-ish) for the parse
;;        D-Recog
;;          Function with 1 arguments, status of surface form (as
;;          held in a vertex property STATUS)
;;        D-Tokenize
;;          This function is called by the parser to set up the initial
;;          configurations of the string.
;;
;;      System functions used :
;;        Name     type      Args #  type                 Comment
;;
;;      Restrictions :
;;        Variables may only appear in entries if they are unique
;;        within that entry - sorry.
;;

(declare                       ;; declarations for the compiler
   (special
      D-GRAMMAR     ;; the fully expanded grammar
      D-UNARYRULES  ;; grammar rules with only one daughter
      D-VARIABLES   ;; the names of the variables in the grammar
      D-ENDVERTEX   ;; end of chart
      D-WHEAD       ;; a list of Head feature names
      D-WDAUGHTER   ;; a list of Daughter feature names 
      D-FSD         ;; feature specification defaults
      D-DISTINGUISHEDCATEGORY ;; top category for grammar
      D-BASICVARIABLES
      D-AGENDA
      D-ALLEDGES
      D-ALLVERTICES
      D-LOOKUPFORMAT
      D-INITVERTEX
      D-Recog       ;; the dictionary call routine (in file autorun)
      D-Tokenize)   ;; make the word into a string to be parsed
   (localf
      D-Parse 
      D-InitGlobalsChart 
      D-InitChart 
      D-BuildWholeWFST 
      D-AddVertexLinks 
      D-CheckConventions 
      D-ApplyDefaults 
      D-Propose 
      D-CreateEdge
      D-CreateVertex 
      D-Combine 
      D-CombineEdge 
      D-ExtendChart 
      D-NextMorpheme 
      D-BuildEdge 
      D-MakeEmptyEdge 
      D-FindParseTree 
      D-ApplySubstitutions              
      D-ApplySubsts1
      D-FindVarValue
      D-CheckWordProp
   )
)

(include "keywords")
(include "unify")

(defun D-Parse (string)
;;
;;  This is the main calling routine.  The chart runs with a bottom up 
;;  policy.
;;
   (let ( (chart nil)
	  (newedge nil)
	  (tokenisedstring (D-Tokenize string)) )
      (D-InitGlobalsChart)
      (D-InitChart D-INITVERTEX tokenisedstring)
      (D-while D-AGENDA
	 (setq newedge (D-CheckConventions (car D-AGENDA)))
         (cond
	    (newedge        ;; if conventions holds then continue
	       (setq D-AGENDA (cdr D-AGENDA))
	       (D-Combine newedge)   ;; combine newedge with rest of chart
	       (setq chart (cons newedge chart))
	       (D-AddVertexLinks newedge)  ;; add pointers to/from vertices
            )
	    (t
	       (setq D-AGENDA (cdr D-AGENDA)) ; drop edge as it failed
            )
         )
      )
      D-INITVERTEX  ;; return the initial vertex
   )
)

(defun D-InitGlobalsChart ()
;;
;;  Initial ise the global variables for the chart
;;
   (setq D-AGENDA nil)
   (setq D-ALLEDGES nil)
   (setq D-ALLVERTICES nil)
   (setq D-INITVERTEX (D-CreateVertex))
   (setq D-ENDVERTEX (D-NewVertex (D-CreateVertex) 'END))
   ;(setq D-VARIABLES (copy D-BASICVARIABLES))
)

(defun D-InitChart (initvertex string)
;;
;;  Sets up the basic chart.  This sets global variables as required
;;  and also adds the basic edges to the agenda.
;;  If a STRINGSEGMENT option for LOOKUPFORMAT is selected the 
;;  whole string is segmented and added to the agenda
;;  in the simple word analysis case only the first morpheme
;;  is added to the chart and the basic chart is extended as required
;;
;;  The chart can be looked at as a search space where ENDVERTEX is
;;  the goal state.  When STRINGSEGMENT type formats are selected this
;;  search space is eagerly evaluated, while in the simple cases it
;;  is lazily evaluated.
;; 
   (let ()
      (cond
	 ((or (eq D-LOOKUPFORMAT 'D-CATEGORYFORM)
	      (eq D-LOOKUPFORMAT 'D-WORDSTRUCTURE))
            (D-NextMorpheme     ;; get the first morpheme(s) on the agenda
               (D-NewVertex     ;; create first vertex in chart
	           initvertex
	           string       ;; the words to be parsed
	        )
	        string
            )  
            (D-putvertexCLASSES initvertex 't) ;; mark this node as checked
         )
	 (t    ;; have to find whole segmentations 
	    (D-BuildWholeWFST 
	       (D-NewVertex initvertex string))
         )
      )
   )
)

(defun D-BuildWholeWFST (initvertex)
;;
;;  Builds the whole well formed substring table.  This is called
;;  when STRINGSEGMENT options have been selected for the look
;;  up format.  This effectively builds the whole search space
;;  rather than on on demand as it does in the non-STRINGSEGMENT cases.
;;
   (let ( (agenda (ncons initvertex)) )
      (D-repeat
      (
	 (cond
	    ((not (or (D-getvertexCLASSES (car agenda))
	              (eq (D-getvertexSTATUS (car agenda)) 'END)))
             (D-putvertexCLASSES (car agenda) 't)
             (setq agenda   ;; deal with first vertex on agenda
		(append
                   (mapcar      ;; create new edge for each new morpheme
                      #'(lambda (word)
                          (D-BuildEdge word (car agenda))
			  (cond
			     ((eq (cadr word) 'END) D-ENDVERTEX)
			     (t (car D-ALLVERTICES)) ;; the last made vertex
			  ))
                      (D-Recog (D-getvertexSTATUS (car agenda))))
                   (cdr agenda)))
            )
	    (t     ;; the vertex has been checked or is at END
	       (setq agenda (cdr agenda))
            )
        )
      )
      until (null agenda))
   )
)
 
(defun D-AddVertexLinks (edge)
;;
;;   Add the edge to the list held on each vertex for complete and
;;   incomplete edges going in and out
;;
   (cond
      ((null (D-getedgeREMAINDER edge))      ;; is this edge complete
	 (D-putvertexEDGEOUTC   ;; add to list of complete edges starting
	    (D-getedgeSTART edge)
	    (cons
	       edge
	       (D-getvertexEDGEOUTC
		  (D-getedgeSTART edge))))
      )
      (t                ;; incomplete edges
	 (D-putvertexEDGEINI ;; add to list of complete edges ending
	    (D-getedgeEND edge)
	    (cons
	       edge
	       (D-getvertexEDGEINI 
		  (D-getedgeEND edge))))
      )
   )
)

(defun D-CheckConventions (edge)
;;
;;    Adds defaults to complete edges
;;
   (cond
      ((or (D-getedgeREMAINDER edge)     ;; is it incomplete
	   (D-LexicalEdgeP edge))    ;; or a lexical edge
	 edge                    ;; no adding of defaults
      )
      (t                        ;; other lengthed rule
	 (D-ApplyDefaults edge)
	 (D-putedgeLABEL;; simplify label removing var chains
	    edge
	    (D-DereferenceVariables
	       (D-getedgeLABEL edge)
	       (D-getedgeBIND edge)
	    )
	 )
	 edge)
   )
)

(defun D-ApplyDefaults (edge)
;;
;;  Returns the edge with defaults added to the label
;;
   (cond
      ((null D-FSD)
	 edge
      )
      (t
	 (let ( (label (D-getedgeLABEL edge))
	        (bindings (D-getedgeBIND edge)) )
	    (mapc
	       #'(lambda (fsd)
		  (setq bindings
		     (D-AddDefault fsd label bindings)))
               D-FSD)
            (D-putedgeLABEL edge label)
	    (D-putedgeBIND edge bindings))
      )
   )
)

(defun D-Propose (cattype vertex)
;;
;;  This function proposes the given cattype to the grammar.  It 
;;  creates an empty edge for each grammar rule that could 
;;  potentially use this symbol using the D-CANMAKE index.
;;
   (mapc
      #'(lambda (rule)
	 (cond
	    ((memq (D-GetGRuleName rule) (D-getvertexRULES vertex))
	       nil   ;;  rule already in chart
            )
	    (t 
	       (D-MakeEmptyEdge
		  (D-UniquifyVariables rule)
		  vertex))))
      (mapcar 
	 #'(lambda (name)
	    (D-RNametoRule name D-GRAMMAR))
	 (cdr (assq cattype D-CANMAKE))))
)

(defun D-CreateEdge (Plabel Pstart Pend Premainder Precog 
				   Prulenum bindings)
;;
;;  This returns a new edge with the properties set to the
;;  given parmeters
;;
;;  Automatically puts it on the agenda.
;;
;;  19th December 1985:  change to be a structure rather than 
;;  property lists
;;
   (let ((newedge
            (D-MakeEdge Plabel Pstart Pend Premainder 
		  Precog Prulenum bindings nil)) )
      (setq D-AGENDA (cons newedge D-AGENDA))
      (setq D-ALLEDGES (cons newedge D-ALLEDGES))
      newedge
   )
)

(defun D-CreateVertex ()
;;
;;   returns an empty vertex with a new number at its end
;;   vertices are access via macros declared in subtout
;;
   (let ((newvertex
	       (D-MakeVertex
		  nil         ;; classes
		  nil         ;; incomple edges in
		  nil         ;; complete edges out
		  nil         ;; status
		  nil         ;; list of edges out
		  (gensym 'D) ;; name field possibly used in debugging
               )))
      (setq D-ALLVERTICES (cons newvertex D-ALLVERTICES))
      newvertex
   )
)

(defun D-Combine (newedge)
;;
;;  This combines the new edge with the edges in the chart
;;  returns a list of combined edges to add to the agenda
;;
   (cond
      ((null (D-getedgeREMAINDER newedge))   ;; is edge complete (inactive)
	                    ;; then find all edges in the chart this can
         (mapc
            #'(lambda (incompletedge)  
               (D-CombineEdge newedge incompletedge))
            (D-getvertexEDGEINI             ;; appropriate edges in chart
               (D-getedgeSTART newedge))
         )
      )
      (t                      ;; newedge is incomplete
	 (D-ExtendChart              ;; continue Outgoing edges and extend
	    newedge                ;; chart if need be
	    (D-getedgeEND newedge)    ;; vertex to extend from 
         )
      )
   )
)

(defun D-CombineEdge (inactiveedge activeedge)
;;
;;  This tries to combine the edges, this returns nil if the 
;;  edges do not combine, if they do this returns the new edge    
;;
;;  if this activeedge cannot be extended by inactiveedge 
;;  then nil is returned
;;
   (cond
      ;; don't check a complete ending edge against an active one that 
      ;; cannot complete with this inactive edge
      ((or (neq 'END (D-getvertexSTATUS (D-getedgeEND inactiveedge)))
	   (null (cdr (D-getedgeREMAINDER activeedge))))
	 (let ( (match
		  (D-Unify
		     (car (D-getedgeREMAINDER activeedge));; required category
		     (D-getedgeBIND activeedge)
		     (D-getedgeLABEL inactiveedge) 
		  )) )
	    (cond
	       ((eq match 'FAILED) nil) ;; failed match
	       (t
		  (D-CreateEdge
		     (D-getedgeLABEL activeedge)    
		     (D-getedgeSTART activeedge)
		     (D-getedgeEND  inactiveedge)
		     (cdr (D-getedgeREMAINDER activeedge))
		     (append                     ;; this is the parse tree
			(D-getedgeRECOG activeedge);; recognised from old edge
			(ncons inactiveedge);;add name of newly recognised edge
		     )
		     (D-getedgeRULENUM activeedge)  ;; rule number of this edge
		     match   ;; the bindings
		  )
	       )
	    )
	 )
      )
      (t nil)     ;; not a suitable edge
   )
)

(defun D-ExtendChart (newedge vertex)
;;
;;   When dealing with incomplete edges and looking for things
;;   to extend newedge, if the dictionary has never been checked
;;   from this point then the next morpheme function is called and the
;;   the new morpheme(s) edges are added to the agenda for returning
;;
;;   At present we only have one morpheme dictionary so
;;   the marker in CLASSES on the vertex is just a flag
;;
;;
   (mapc
      #'(lambda (completedge)   ;; new edge in can combine
           (D-CombineEdge completedge newedge))
      (D-getvertexEDGEOUTC (D-getedgeEND newedge))
   )
   (D-if (null (D-getvertexCLASSES vertex)) ;; check dictionary ?
   then
      (D-putvertexCLASSES vertex 't)
      (D-NextMorpheme    ;; check for new morphemes
	 vertex
         (D-getvertexSTATUS vertex) ;;  remainder of surface string at the 
      )                       ;;  vertex
   )
)

(defun D-NextMorpheme (vertex word)
;;
;;  This function returns a list of new edges, one for each
;;  morpheme found in the morpheme dictionary
;;
   (cond
      ((eq 'END word)   ;; no more to look up
	 nil
      )
      (t             ;; find next morphemes and 
          (mapc      ;; create new edge for each new morpheme
             #'(lambda (word)
                 (D-BuildEdge word vertex))
             (D-Recog word))   ;; call morpheme segmenter
      )
   )
)

(defun D-BuildEdge (word vertex)
;;
;;  The thing returned from the Recog function has a list of
;;  entries that have the same citation form.  An edge needs to be
;;  built for each entry from the given vertex to a new one
;;  returns the new vertex
;;
   (cond
      ((eq (cadr word) 'END)     ;; if end of chart
	 (mapc
	    #'(lambda (entry)   ;; no proposing in this case
		 (D-CreateEdge
		    (D-Syntax-Field entry) ;; label: feature list
		    vertex                 ;; Start Vertex
		    D-ENDVERTEX            ;; End vertex
		    nil                    ;; Remainder - inactive edge
		    nil                    ;; no sub edges as terminal
		    entry                  ;; lexical entry
		    '((t t))))              ;; no bindings
	    (car word)))    ;; the list of entries with same citation form
      (t                         ;; not end of chart
	 (mapc
	    #'(lambda (entry)
		 (D-Propose
		 (D-GetCategoryType 
		 (D-getedgeLABEL
		 (D-CreateEdge
		    (D-Syntax-Field entry) ;; label
		    vertex                 ;; Start Vertex
		    (D-NewVertex (D-CreateVertex) (cadr word))
		    nil                    ;; Remainder - inactive edge
		    nil                    ;; no sub edges as terminal
		    entry                  ;; lexical entry
		    '((t t)))              ;; no bindings
		 ))
		 vertex)
	      )
	    (car word)    ;; the list of entries with same citation form
	 )
      )
   )
)

(defun D-MakeEmptyEdge (rule vertex)
;;
;;   This creates an empty edge for the given rule
;;
;;   This marks the vertex with the rule name to stop the rule
;;   being added again at this vertex.  This is for the
;;   left recursion check
;;   Note that this means that the RULES field in vertex holds 
;;   both chart and agenda edges.  This is what is required.
;;
   (D-putvertexRULES vertex  
      (cons (car rule) (D-getvertexRULES vertex)))
   (D-CreateEdge
      (cadr rule) ;; label name
      vertex      ;; start vertex
      vertex      ;; end vertex
      (cddr rule) ;; remainder  (whole RHS)
      nil         ;; currently recognised
      (car rule)  ;; rule number
      '((t t))    ;; no bindings
   )
)

(defun D-FindParseTree (cat edge bindings)
;;
;;   This finds the parse tree of the given edge by recursing down
;;   edges within the recog part of this edge
;;
   (cond
      ((D-LexicalEdgeP edge)  ;; lexical entry (rather than rule)
	 (list
	    (D-ApplySubstitutions
	       (D-Syntax-Field (D-getedgeRULENUM edge))
	       bindings)
	    ;cat
	    'ENTRY
	    (list
	       (D-CitationForm (D-getedgeRULENUM edge))
	       (D-PhonologicalForm (D-getedgeRULENUM edge))
	       (D-MakePCategory (D-Syntax-Field (D-getedgeRULENUM edge)))
	       (D-Semantic-Field (D-getedgeRULENUM edge))
	       (D-User-Field (D-getedgeRULENUM edge)))
	 ) 
      )
      (t
	 (cons
	    (D-ApplySubstitutions
	       cat                      ;; the current category
	       bindings)
	    (cons
	       (D-getedgeRULENUM edge)  ;; rule used
	       (mapcar
		  #'(lambda (daughter)
		       (D-FindParseTree
			  (D-getedgeLABEL daughter) ;; the category
			  daughter ;; the sub edge
			  (cons (D-getedgeBIND daughter) bindings)))
		  (D-getedgeRECOG edge))))
      )
   )
)

(defun D-ApplySubstitutions (category bindings)
;;
;;  This substitutes all variables in the given category for
;;  values by looking (hierarchically) through the given list
;;  of bindings.  Note that the binds are a list of bindings
;;  from each ancestor in the parse tree
;;
;;
   (D-MakePCategory
      (D-ApplySubsts1
	 category bindings))
)

(defun D-ApplySubsts1 (category bindings)
;;  applies substitutions without changing the category into a printable one
   (cons
      (D-GetCategoryType category)
      (mapcar
	 #'(lambda (fval)
	 (let ( (realitem (D-FindVarValue fval bindings)) )
	    (cond
	       ((and (not (D-VariableP realitem))
		     (listp realitem))  ;; category valued
		  (D-ApplySubsts1 realitem bindings))
	       (t realitem))))
	 (D-GetCategoryValues category)
      )
   )
)

(defun D-FindVarValue (term bindings)
;;
;;  checks the bindings for a value of term
;;  the bindings is a list of a list of bindings and they are searched
;;  upwards until a literal value is found
;;
;;  if it is an unbound variable return <UNBOUND-VARIABLE> and the 
;;  variable range
;;
   (cond
      ((null bindings)
	 (cond
	    ((D-VariableP term) 
	       (cons
		  (car term)
	          (cons '<UNBOUND-VARIABLE>
		  (D-GetVarRange term))))
	    (t term))   ;; no more to search
      )
      (t
	 (let ( (bind (D-FindType term (car bindings))) )
	    (cond
	       ((eq (car bind) 'LITERAL)
		  (cadr bind)   ;; return the literal value
               )
	       (t       ;; it is a variable at this level so search
		  (D-FindVarValue   ;; search further in the bindings
		     (cadr bind) 
		     (cdr bindings)))))
      )
   )
)


(defun D-CheckWordProp (edge)
;;
;;  checks if the category type of the label on the edge is a 
;;  distinguished category type
;;
;;  returns (edge) if it is nil otherwise
;;  
   (cond
      ((memq (D-GetCategoryType (D-getedgeLABEL edge)) 
	     D-DISTINGUISHEDCATEGORY)
         (ncons edge)
      )
      (t nil)
   )
)

