001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.List;
019    
020    /**
021     * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
022     * Prolog-based format called "Codeco" that can be transformed into Java
023     * (at compile time).
024     * 
025     * @author Tobias Kuhn
026     * @see Grammar
027     */
028    public class ChartParser {
029            
030            private final Grammar grammar;
031            private final Chart chart = new Chart();
032            private final ArrayList<Terminal[]> tokens = new ArrayList<Terminal[]>();
033            private boolean debug;
034            
035            /**
036             * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
037             * 
038             * @param grammar The grammar to be used by the chart parser.
039             */
040            public ChartParser(Grammar grammar) {
041                    this.grammar = grammar;
042                    init();
043                    completeAndPredict();
044            }
045            
046            /**
047             * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
048             * of the chart parser are printed onto the standard error device.
049             * 
050             * @param debug true to switch on debug mode or false to switch it off.
051             */
052            public void debug(boolean debug) {
053                    this.debug = debug;
054            }
055            
056            /**
057             * Adds the token to the token sequence and makes one more parsing step to process it. If several tokens
058             * are given then these tokens are treated in a parallel way representing a token that can have multiple
059             * forms.
060             * 
061             * @param token The token to be added to the token sequence.
062             */
063            public void addToken(Terminal... token) {
064                    tokens.add(token);
065                    for (Terminal t : token) {
066                            t.setFeature("id", tokens.size() + "");
067                            Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
068                            chart.addEdge(edge);
069                            if (debug) log("SCANNER: " + edge + "\n");
070                    }
071                    completeAndPredict();
072                    //if (debug) log("CHART:");
073                    //if (debug) log(chart);
074            }
075            
076            /**
077             * Adds the tokens to the token sequence and processes them.
078             * 
079             * @param tokens The tokens to be added to the token sequence.
080             */
081            public void addTokens(List<Terminal> tokens) {
082                    for (Terminal t : tokens) {
083                            addToken(t);
084                    }
085            }
086            
087            /**
088             * Removes the last token and reverts the last parsing step.
089             */
090            public void removeToken() {
091                    chart.removeEdgesWithEndPos(tokens.size());
092                    tokens.remove(tokens.size()-1);
093            }
094            
095            /**
096             * Removes all tokens in the current token sequence and resets the chart.
097             */
098            public void removeAllTokens() {
099                    tokens.clear();
100                    chart.clear();
101                    init();
102                    completeAndPredict();
103            }
104            
105            /**
106             * Returns the current token sequence.
107             * 
108             * @return The current token sequence.
109             */
110            public List<Terminal[]> getTokens() {
111                    return new ArrayList<Terminal[]>(tokens);
112            }
113    
114            /**
115             * This method looks ahead and returns the restrictions at least one of which the next token
116             * has to fulfill.
117             * 
118             * @return The restrictions.
119             */
120            public List<Restriction> getNextTokenRestrictions() {
121                    if (debug) log("LOOKING FORWARD:");
122                    
123                    ArrayList<Edge> edges = new ArrayList<Edge>();
124                    for (Edge e : chart.getEdgesByEndPos(tokens.size())) {
125                            if (!e.isActive()) continue;
126                            if (!(e.getNextActive() instanceof Terminal)) continue;
127                            
128                            if (e.getApplicableBackref() != null) {
129                                    // For edges with backwards references, the possible bindings have to be performed:
130                                    for (int i = 0 ; i < e.getExternalAnteList().length ; i++) {
131                                            Edge eC = e.deepCopy();
132                                            try {
133                                                    eC.getExternalAnteList()[i].unify(eC.getApplicableBackref());
134                                                    edges.add(eC);
135                                            } catch (UnificationFailedException ex) {}
136                                    }
137                            } else {
138                                    edges.add(e);
139                            }
140                    }
141    
142                    ArrayList<Restriction> restrictions = new ArrayList<Restriction>();
143                    for (Edge e : edges) {
144                            Terminal t = (Terminal) e.getNextActive();
145                            List<Terminal> exceptions = new ArrayList<Terminal>();
146                            if (e.getApplicableNegbackref() != null) {
147                                    // Edges with negative backwards references lead to exceptions:
148                                    for (int i = 0 ; i < e.getExternalAnteList().length ; i++) {
149                                            Edge eC = e.deepCopy();
150                                            try {
151                                                    eC.getExternalAnteList()[i].unify(eC.getApplicableNegbackref());
152                                                    exceptions.add((Terminal) eC.getNextActive());
153                                            } catch (UnificationFailedException ex) {}
154                                    }
155                            }
156                            restrictions.add(new Restriction((Terminal) t.deepCopy(), exceptions));
157                            if (debug) log(" " + t);
158                    }
159                    if (debug) log("\n");
160                    
161                    return restrictions;
162            }
163            
164            private void init() {
165                    Category startCat = grammar.getStartCategory();
166                    for (Rule rule : grammar.getRulesByHeadName(startCat.getName())) {
167                            try {
168                                    if (!startCat.isSimilar(rule.getHead())) continue;
169                                    Nonterminal categoryC = (Nonterminal) startCat.deepCopy();
170                                    Rule ruleC = rule.deepCopy();
171                                    categoryC.unify(ruleC.getHead());
172                                    Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
173                                    chart.addEdge(edge);
174                                    if (debug) log("INIT: " + rule + "  --->  " + edge + "\n");
175                            } catch (UnificationFailedException ex) {
176                                    continue;
177                            }
178                    }
179            }
180            
181            private void completeAndPredict() {
182                    // Process the epsilon rules:
183                    for (Rule rule : grammar.getEpsilonRules()) {
184                            // Epsilon edges get no antecedent lists (they cannot have references):
185                            Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
186                            boolean isNewEdge = chart.addEdge(edge);
187                            if (debug) log("EDGE FOR EPSILON RULE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + "\n");
188                    }
189                    
190                    // Run completion/predition until neither of them generates a new edge:
191                    int s = 0;
192                    while (chart.getSize() > s) {
193                            s = chart.getSize();
194                            complete();
195                            resolve();
196                            predict();
197                    }
198            }
199            
200            private void predict() {
201                    ArrayList<Edge> l = chart.getEdgesByEndPos(tokens.size());
202                    for (int i = 0 ; i < l.size() ; i++) {
203                            // During this loop, elements might be added to the end of the list l.
204                            
205                            Edge existingEdge = l.get(i);
206                            Category category = existingEdge.getNextActive();
207                            if (category == null) continue;
208                            if (debug) log("PREDICTION FOR CATEGORY: " + category + "\n");
209                            
210                            for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
211                                    if (rule.hasEmptyBody()) continue;
212                                    
213                                    try {
214                                            if (!category.isSimilar(rule.getHead())) continue;
215                                            Category categoryC = category.deepCopy();
216                                            Rule ruleC = rule.deepCopy();
217                                            categoryC.unify(ruleC.getHead());
218                                            int p = tokens.size();
219                                            Edge edge = new Edge(p, ruleC, existingEdge.getCombinedAnteList());
220                                            boolean isNewEdge = chart.addEdge(edge);
221                                            if (debug) log("PREDICT (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + rule + "  --->  " + edge + "\n");
222                                    } catch (UnificationFailedException ex) {
223                                            continue;
224                                    }
225                            }
226                    }
227            }
228            
229            private void complete() {
230                    ArrayList<Edge> l1 = chart.getEdgesByEndPosAndActCat(tokens.size(), null);
231                    for (int i1 = 0 ; i1 < l1.size() ; i1++) {
232                            // During this loop, elements might be added to the end of the list l1.
233                            
234                            Edge passiveEdge = l1.get(i1);
235                            if (debug) log("COMPLETION FOR EDGE: " + passiveEdge + "\n");
236                            
237                            ArrayList<Edge> l2 = chart.getEdgesByEndPosAndActCat(passiveEdge.getStartPos(), passiveEdge.getHead().getName());
238                            for (int i2 = 0 ; i2 < l2.size() ; i2++) {
239                                    // During this loop, elements might be added to the end of the list l2.
240                                    
241                                    Edge edge = l2.get(i2);
242                                    if (!edge.isActive()) continue;
243                                    
244                                    try {
245                                            if (!passiveEdge.getHead().isSimilar(edge.getNextActive())) continue;
246                                            Edge passiveEdgeC = passiveEdge.deepCopy();
247                                            Edge edgeC = edge.deepCopy();
248                                            passiveEdgeC.getHead().unify(edgeC.getNextActive());
249                                            if (!passiveEdge.hasEmptyBody()) {
250                                                    // Antecedent lists have to match (except for the case of epsilon edges):
251                                                    FeatureMap[] al1 = edgeC.getCombinedAnteList();
252                                                    FeatureMap[] al2 = passiveEdgeC.getExternalAnteList();
253                                                    if (al1.length != al2.length) throw new UnificationFailedException();
254                                                    for (int i = 0 ; i < al1.length ; i++) {
255                                                            al1[i].unify(al2[i]);
256                                                    }
257                                            }
258                                            edgeC.step(tokens.size(), passiveEdgeC);
259                                            boolean isNewEdge = chart.addEdge(edgeC);
260                                            if (debug) log("COMPLETE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + "  --->  " + edgeC + "\n");
261                                    } catch (UnificationFailedException ex) {
262                                            continue;
263                                    }
264                            }
265                    }
266            }
267            
268            private void resolve() {
269                    ArrayList<Edge> l1 = chart.getEdgesByEndPos(tokens.size());
270                    for (int i1 = 0 ; i1 < l1.size() ; i1++) {
271                            // During this loop, elements might be added to the end of the list l1.
272                            
273                            Edge edge = l1.get(i1);
274                            if (!edge.isActive()) continue;
275                            
276                            String n = edge.getNextActive().getName();
277                            Edge edgeC = null;
278                            if (n.equals(">>>")) {
279                                    edgeC = edge.deepCopy();
280                                    edgeC.addAntecedents(edgeC.getNextActive().getFeatureMap());
281                            } else if (n.equals("<<<")) {
282                                    FeatureMap[] ante = edge.getCombinedAnteList();
283                                    for (int i = ante.length-1 ; i >= 0 ; i--) {
284                                            if (ante[i].canUnify(edge.getNextActive().getFeatureMap())) {
285                                                    try {
286                                                            edgeC = edge.deepCopy();
287                                                            edgeC.getExternalAnteList()[i].unify(edgeC.getNextActive().getFeatureMap());
288                                                            break;
289                                                    } catch (UnificationFailedException ex) {
290                                                            edgeC = null;
291                                                    }
292                                            }
293                                    }
294                            } else if (n.equals("</<")) {
295                                    edgeC = edge.deepCopy();
296                                    for (FeatureMap f : edge.getCombinedAnteList()) {
297                                            // TODO: This check is probably not sufficient:
298                                            if (f.canUnify(edge.getNextActive().getFeatureMap())) {
299                                                    edgeC = null;
300                                                    break;
301                                            }
302                                    }
303                            } else {
304                                    continue;
305                            }
306                            
307                            if (edgeC == null) {
308                                    if (debug) log("CANNOT RESOLVE: " + edge + "\n");
309                            } else {
310                                    edgeC.step();
311                                    boolean isNewEdge = chart.addEdge(edgeC);
312                                    if (debug) log("RESOLVE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + "  --->  " + edgeC + "\n");
313                            }
314                    }
315            }
316            
317            private void log(String text) {
318                    System.err.print(text);
319            }
320    
321    }