001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.HashMap;
019    import java.util.List;
020    
021    /**
022     * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
023     * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java
024     * (at compile time).
025     * 
026     * @author Tobias Kuhn
027     * @see Grammar
028     */
029    public class ChartParser {
030            
031            private final Grammar grammar;
032            private final Chart chart = new Chart();
033            private final ArrayList<Object> tokens = new ArrayList<Object>();
034            private boolean debug;
035            
036            /**
037             * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
038             * 
039             * @param grammar The grammar to be used by the chart parser.
040             */
041            public ChartParser(Grammar grammar) {
042                    this.grammar = grammar;
043                    init(grammar.getStartCategory());
044                    completeAndPredict();
045            }
046            
047            /**
048             * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
049             * of the chart parser are printed onto the standard error device.
050             * 
051             * @param debug true to switch on debug mode or false to switch it off.
052             */
053            public void debug(boolean debug) {
054                    this.debug = debug;
055            }
056            
057            /**
058             * Adds the token to the token sequence and makes one more parsing step to process it. If several tokens
059             * are given then these tokens are treated in a parallel way representing a token that can have multiple
060             * forms.
061             * 
062             * @param token The token to be added to the token sequence.
063             */
064            public void addToken(Terminal... token) {
065                    if (token.length == 1) {
066                            tokens.add(token[0]);
067                    } else {
068                            tokens.add(token);
069                    }
070                    for (Terminal t : token) {
071                            Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
072                            chart.addEdge(edge);
073                            if (debug) System.err.println("SCANNER: " + edge);
074                    }
075                    completeAndPredict();
076                    //if (debug) System.err.println("CHART:");
077                    //if (debug) System.err.println(chart);
078            }
079            
080            /**
081             * Adds the tokens to the token sequence and processes them.
082             * 
083             * @param tokens The tokens to be added to the token sequence.
084             */
085            public void addTokens(List<Terminal> tokens) {
086                    for (Terminal t : tokens) {
087                            addToken(t);
088                    }
089            }
090            
091            /**
092             * Removes the last token and reverts the last parsing step.
093             */
094            public void removeToken() {
095                    chart.removeEdgesWithEndPos(tokens.size());
096                    tokens.remove(tokens.size()-1);
097            }
098            
099            /**
100             * Removes all tokens in the current token sequence and resets the chart.
101             */
102            public void removeAllTokens() {
103                    tokens.clear();
104                    chart.clear();
105                    init(grammar.getStartCategory());
106                    completeAndPredict();
107            }
108            
109            /**
110             * Returns the current token sequence. The elements of the list are strings (in the normal case)
111             * or arrays of strings (in the case of parallel tokens).
112             * 
113             * @return The current token sequence.
114             */
115            public List<Object> getTokens() {
116                    return new ArrayList<Object>(tokens);
117            }
118            
119            /**
120             * Returns all tokens that are allowed to follow the current token sequence according to the grammar.
121             * 
122             * @return The possible next tokens.
123             */
124            public List<Terminal> nextTokens() {
125                    ArrayList<Terminal> terminals = new ArrayList<Terminal>();
126                    if (debug) System.err.print("LOOKING FORWARD:");
127                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
128                            if (!e.isActive()) continue;
129                            if (!(e.getNextActive() instanceof Terminal)) continue;
130                            
131                            Terminal t = (Terminal) e.getNextActive();
132                            boolean isNew = true;
133                            for (Terminal knownTerminal : new ArrayList<Terminal>(terminals)) {
134                                    if (t.subsumes(knownTerminal)) {
135                                            terminals.remove(knownTerminal);
136                                    } else if (knownTerminal.subsumes(t)) {
137                                            isNew = false;
138                                            break;
139                                    }
140                            }
141                            if (isNew) {
142                                    terminals.add(t);
143                            }
144                    }
145                    ArrayList<Terminal> copiedTerminals = new ArrayList<Terminal>();
146                    for (Terminal t : terminals) {
147                            copiedTerminals.add((Terminal) t.deepCopy());
148                            if (debug) System.err.print(" " + t);
149                    }
150                    if (debug) System.err.println();
151                    
152                    return copiedTerminals;
153            }
154            
155            /**
156             * Returns a boolean array that describes which of the tokens of the current token sequence are
157             * accessible (true) and which are not (false) for the given next token.
158             * 
159             * @param nextToken The next token for which the tokens should be checked for accessibility.
160             * @return A boolean array where each element stands for one token of the token sequence.
161             */
162            public boolean[] getAccessiblePositions(String nextToken) {
163                    boolean[] pos = new boolean[tokens.size()];
164                    ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>();
165                    
166                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) {
167                            if (!e.isActive()) continue;
168                            
169                            ArrayList<Edge> partialPath = new ArrayList<Edge>();
170                            partialPath.add(e.deepCopy());
171                            collectActivePaths(0, partialPath, paths);
172                    }
173                    
174                    for (ArrayList<Edge> path : paths) {
175                            for (Edge edge : path) {
176                                    scanForAccessiblePositions(edge, pos, new ArrayList<Edge>());
177                            }
178                    }
179                    
180                    return pos;
181            }
182            
183            private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) {
184                    if (edge.getStartPos() == edge.getEndPos()) return;
185                    if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) {
186                            pos[edge.getStartPos()] = true;
187                            return;
188                    }
189                    
190                    for (Edge e : edge.getLinks()) {
191                            if (!e.isAccessible()) continue;
192                            if (visitedEdges.contains(e)) continue;
193                            visitedEdges.add(e);
194                            scanForAccessiblePositions(e, pos, visitedEdges);
195                    }
196            }
197            
198            private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) {
199                    Edge edge = partialPath.get(partialPath.size()-1);
200                    if (edge.getStartPos() == startPos) {
201                            paths.add(partialPath);
202                            return;
203                    }
204                    if (edge.getStartPos() < startPos) {
205                            return;
206                    }
207                    ArrayList<Edge> edgesToCheck = new ArrayList<Edge>();
208                    for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) {
209                            Category[] newBody = new Category[e.getProgress()+1];
210                            System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1);
211                            Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true);
212                            ec.addLinksFrom(e);
213                            boolean isNew = true;
214                            for (Edge ee : edgesToCheck) {
215                                    if (ee.subsumes(ec)) {
216                                            isNew = false;
217                                            break;
218                                    }
219                            }
220                            if (isNew) {
221                                    for (Edge ee : new ArrayList<Edge>(edgesToCheck)) {
222                                            if (ec.subsumes(ee)) {
223                                                    edgesToCheck.remove(ee);
224                                            }
225                                    }
226                                    edgesToCheck.add(ec);
227                            }
228                    }
229                    for (Edge e : edgesToCheck) {
230                            try {
231                                    Edge eC = e.deepCopy();
232                                    ArrayList<Edge> newPartialPath = copyEdgeList(partialPath);
233                                    Edge newEdge = newPartialPath.get(partialPath.size()-1);
234                                    newEdge.getHead().unify(eC.getNextActive());
235                                    newPartialPath.add(eC);
236                                    collectActivePaths(startPos, newPartialPath, paths);
237                            } catch (UnificationFailedException ex) {
238                                    continue;
239                            }
240                    }
241            }
242            
243            private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) {
244                    ArrayList<Edge> edgeListCopy = new ArrayList<Edge>();
245                    HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>();
246                    for (Edge edge : edgeList) {
247                            edgeListCopy.add(edge.deepCopy(entities));
248                    }
249                    return edgeListCopy;
250            }
251            
252            private void init(Nonterminal category) {
253                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
254                            try {
255                                    Nonterminal categoryC = (Nonterminal) category.deepCopy();
256                                    Rule ruleC = rule.deepCopy();
257                                    categoryC.unify(ruleC.getHead());
258                                    Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
259                                    chart.addEdge(edge);
260                                    if (debug) System.err.println("INIT: " + rule + "  >>>  " + edge);
261                            } catch (UnificationFailedException ex) {
262                                    continue;
263                            }
264                    }
265            }
266            
267            private void completeAndPredict() {
268                    int c = 0;
269                    do {
270                            complete();
271                            c = predict();
272                    } while (c > 0);
273            }
274            
275            private int predict() {
276                    int count = 0;
277                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
278                            Category cat = e.getNextActive();
279                            if (cat == null) continue;
280                            
281                            if (debug) System.err.println("PREDICT FOR EDGE: " + e);
282                            int subcount = predict(cat);
283                            count += subcount;
284                    }
285                    for (Rule rule : grammar.getEpsilonRules()) {
286                            Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
287                            boolean isNewEdge = chart.addEdge(edge);
288                            if (isNewEdge) {
289                                    count++;
290                                    if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge);
291                            }
292                    }
293                    return count;
294            }
295            
296            private int predict(Category category) {
297                    int count = 0;
298                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
299                            if (rule.hasEmptyBody()) continue;
300                            
301                            try {
302                                    Category categoryC = category.deepCopy();
303                                    Rule ruleC = rule.deepCopy();
304                                    categoryC.unify(ruleC.getHead());
305                                    int p = tokens.size();
306                                    Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
307                                    if (debug) System.err.println("PREDICTOR: " + rule + "  >>>  " + edge);
308                                    boolean isNewEdge = chart.addEdge(edge);
309                                    if (isNewEdge) {
310                                            if (debug) System.err.println("PREDICT FOR EDGE: " + edge);
311                                            int subcount = predict(edge.getNextActive());
312                                            count += subcount + 1;
313                                    }
314                            } catch (UnificationFailedException ex) {
315                                    continue;
316                            }
317                    }
318                    return count;
319            }
320            
321            private void complete() {
322                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) {
323                            if (debug) System.err.println("COMPLETE FOR EDGE: " + e);
324                            complete(e.getStartPos(), e);
325                    }
326            }
327            
328            private void complete(int pos, Edge passiveEdge) {
329                    Category category = passiveEdge.getHead();
330                    
331                    for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) {
332                            if (!edge.isActive()) continue;
333                            
334                            try {
335                                    Category categoryC = category.deepCopy();
336                                    Edge edgeC = edge.deepCopy();
337                                    categoryC.unify(edgeC.getNextActive());
338                                    edgeC.step(tokens.size(), passiveEdge);
339                                    if (debug) System.err.println("COMPLETOR: " + edge + "  >>>  " + edgeC);
340                                    boolean isNewEdge = chart.addEdge(edgeC);
341                                    if (isNewEdge && !edgeC.isActive()) {
342                                            complete(edgeC.getStartPos(), edgeC);
343                                    }
344                            } catch (UnificationFailedException ex) {
345                                    continue;
346                            }
347                    }
348            }
349    
350    }