001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.HashMap;
019    import java.util.List;
020    
021    /**
022     * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
023     * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java
024     * (at compile time).
025     * 
026     * @author Tobias Kuhn
027     * @see Grammar
028     */
029    public class ChartParser {
030            
031            private final Grammar grammar;
032            private final Chart chart = new Chart();
033            private final ArrayList<Terminal> tokens = new ArrayList<Terminal>();
034            private boolean debug;
035            
036            /**
037             * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
038             * 
039             * @param grammar The grammar to be used by the chart parser.
040             */
041            public ChartParser(Grammar grammar) {
042                    this.grammar = grammar;
043                    init(grammar.getStartCategory());
044                    completeAndPredict();
045            }
046            
047            /**
048             * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
049             * of the chart parser are printed onto the standard error device.
050             * 
051             * @param debug true to switch on debug mode or false to switch it off.
052             */
053            public void debug(boolean debug) {
054                    this.debug = debug;
055            }
056            
057            /**
058             * Adds the token to the token sequence and makes one more parsing step to process it.
059             * 
060             * @param token The token to be added to the token sequence.
061             */
062            public void addToken(String token) {
063                    Terminal t = new Terminal(token);
064                    tokens.add(t);
065                    Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
066                    chart.addEdge(edge);
067                    if (debug) System.err.println("SCANNER: " + edge);
068                    completeAndPredict();
069            }
070            
071            /**
072             * Adds the tokens to the token sequence and processes them.
073             * 
074             * @param tokens The tokens to be added to the token sequence.
075             */
076            public void addTokens(List<String> tokens) {
077                    for (String s : tokens) {
078                            addToken(s);
079                    }
080            }
081            
082            /**
083             * Removes the last token and reverts the last parsing step.
084             */
085            public void removeToken() {
086                    chart.removeEdgesWithEndPos(tokens.size());
087                    tokens.remove(tokens.size()-1);
088            }
089            
090            /**
091             * Removes all tokens in the current token sequence and resets the chart.
092             */
093            public void removeAllTokens() {
094                    tokens.clear();
095                    chart.clear();
096                    init(grammar.getStartCategory());
097                    completeAndPredict();
098            }
099            
100            /**
101             * Returns the current token sequence.
102             * 
103             * @return The current token sequence.
104             */
105            public List<Terminal> getTokens() {
106                    return new ArrayList<Terminal>(tokens);
107            }
108            
109            /**
110             * Returns all tokens that are allowed to follow the current token sequence according to the grammar.
111             * 
112             * @return The possible next tokens.
113             */
114            public List<Terminal> nextTokens() {
115                    ArrayList<Terminal> terminals = new ArrayList<Terminal>();
116                    if (debug) System.err.print("LOOKING FORWARD:");
117                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
118                            if (!e.isActive()) continue;
119                            if (!(e.getNextActive() instanceof Terminal)) continue;
120                            
121                            Terminal t = (Terminal) e.getNextActive();
122                            if (!terminals.contains(t)) {
123                                    if (debug) System.err.print(" " + t);
124                                    terminals.add(t);
125                            }
126                    }
127                    if (debug) System.err.println();
128                    
129                    return terminals;
130            }
131            
132            /**
133             * Returns a boolean array that describes which of the tokens of the current token sequence are
134             * accessible (true) and which are not (false) for the given next token.
135             * 
136             * @param nextToken The next token for which the tokens should be checked for accessibility.
137             * @return A boolean array where each element stands for one token of the token sequence.
138             */
139            public boolean[] getAccessiblePositions(String nextToken) {
140                    boolean[] pos = new boolean[tokens.size()];
141                    ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>();
142                    
143                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) {
144                            if (!e.isActive()) continue;
145                            
146                            ArrayList<Edge> partialPath = new ArrayList<Edge>();
147                            partialPath.add(e.deepCopy());
148                            collectActivePaths(0, partialPath, paths);
149                    }
150                    
151                    for (ArrayList<Edge> path : paths) {
152                            for (Edge edge : path) {
153                                    scanForAccessiblePositions(edge, pos, new ArrayList<Edge>());
154                            }
155                    }
156                    
157                    return pos;
158            }
159            
160            private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) {
161                    if (edge.getStartPos() == edge.getEndPos()) return;
162                    if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) {
163                            pos[edge.getStartPos()] = true;
164                            return;
165                    }
166                    
167                    for (Edge e : edge.getLinks()) {
168                            if (!e.isAccessible()) continue;
169                            if (visitedEdges.contains(e)) continue;
170                            visitedEdges.add(e);
171                            scanForAccessiblePositions(e, pos, visitedEdges);
172                    }
173            }
174            
175            private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) {
176                    Edge edge = partialPath.get(partialPath.size()-1);
177                    if (edge.getStartPos() == startPos) {
178                            paths.add(partialPath);
179                            return;
180                    }
181                    if (edge.getStartPos() < startPos) {
182                            return;
183                    }
184                    ArrayList<Edge> edgesToCheck = new ArrayList<Edge>();
185                    for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) {
186                            Category[] newBody = new Category[e.getProgress()+1];
187                            System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1);
188                            Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true);
189                            ec.addLinksFrom(e);
190                            boolean isNew = true;
191                            for (Edge ee : edgesToCheck) {
192                                    if (ee.subsumes(ec)) {
193                                            isNew = false;
194                                            break;
195                                    }
196                            }
197                            if (isNew) {
198                                    for (Edge ee : edgesToCheck) {
199                                            if (ec.subsumes(ee)) {
200                                                    edgesToCheck.remove(ee);
201                                            }
202                                    }
203                                    edgesToCheck.add(ec);
204                            }
205                    }
206                    for (Edge e : edgesToCheck) {
207                            try {
208                                    Edge eC = e.deepCopy();
209                                    ArrayList<Edge> newPartialPath = copyEdgeList(partialPath);
210                                    Edge newEdge = newPartialPath.get(partialPath.size()-1);
211                                    newEdge.getHead().unify(eC.getNextActive());
212                                    newPartialPath.add(eC);
213                                    collectActivePaths(startPos, newPartialPath, paths);
214                            } catch (UnificationFailedException ex) {
215                                    continue;
216                            }
217                    }
218            }
219            
220            private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) {
221                    ArrayList<Edge> edgeListCopy = new ArrayList<Edge>();
222                    HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>();
223                    for (Edge edge : edgeList) {
224                            edgeListCopy.add(edge.deepCopy(entities));
225                    }
226                    return edgeListCopy;
227            }
228            
229            private void init(Nonterminal category) {
230                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
231                            try {
232                                    Nonterminal categoryC = (Nonterminal) category.deepCopy();
233                                    Rule ruleC = rule.deepCopy();
234                                    categoryC.unify(ruleC.getHead());
235                                    Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
236                                    chart.addEdge(edge);
237                                    if (debug) System.err.println("INIT: " + rule + "  >>>  " + edge);
238                            } catch (UnificationFailedException ex) {
239                                    continue;
240                            }
241                    }
242            }
243            
244            private void completeAndPredict() {
245                    int c = 0;
246                    do {
247                            complete();
248                            c = predict();
249                    } while (c > 0);
250            }
251            
252            private int predict() {
253                    int count = 0;
254                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
255                            Category cat = e.getNextActive();
256                            if (cat == null) continue;
257                            
258                            if (debug) System.err.println("PREDICT FOR EDGE: " + e);
259                            predict(cat);
260                    }
261                    for (Rule rule : grammar.getEpsilonRules()) {
262                            Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
263                            boolean newEdge = chart.addEdge(edge);
264                            if (newEdge) count++;
265                            if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge);
266                    }
267                    return count;
268            }
269            
270            private void predict(Category category) {
271                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
272                            if (rule.hasEmptyBody()) continue;
273                            
274                            try {
275                                    Category categoryC = category.deepCopy();
276                                    Rule ruleC = rule.deepCopy();
277                                    categoryC.unify(ruleC.getHead());
278                                    int p = tokens.size();
279                                    Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
280                                    if (debug) System.err.println("PREDICTOR: " + rule + "  >>>  " + edge);
281                                    boolean newEdge = chart.addEdge(edge);
282                                    if (newEdge) {
283                                            if (debug) System.err.println("PREDICT FOR EDGE: " + edge);
284                                            predict(edge.getNextActive());
285                                    }
286                            } catch (UnificationFailedException ex) {
287                                    continue;
288                            }
289                    }
290            }
291            
292            private void complete() {
293                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) {
294                            if (debug) System.err.println("COMPLETE FOR EDGE: " + e);
295                            complete(e.getStartPos(), e);
296                    }
297            }
298            
299            private void complete(int pos, Edge passiveEdge) {
300                    Category category = passiveEdge.getHead();
301                    
302                    for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) {
303                            if (!edge.isActive()) continue;
304                            
305                            try {
306                                    Category categoryC = category.deepCopy();
307                                    Edge edgeC = edge.deepCopy();
308                                    categoryC.unify(edgeC.getNextActive());
309                                    edgeC.step(tokens.size(), passiveEdge);
310                                    if (debug) System.err.println("COMPLETOR: " + edge + "  >>>  " + edgeC);
311                                    chart.addEdge(edgeC);
312                                    if (!edgeC.isActive()) {
313                                            complete(edgeC.getStartPos(), edgeC);
314                                    }
315                            } catch (UnificationFailedException ex) {
316                                    continue;
317                            }
318                    }
319            }
320    
321    }