001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.HashMap;
019    import java.util.List;
020    
021    /**
022     * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
023     * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java
024     * (at compile time).
025     * 
026     * @author Tobias Kuhn
027     * @see Grammar
028     */
029    public class ChartParser {
030            
031            private final Grammar grammar;
032            private final Chart chart = new Chart();
033            private final ArrayList<Terminal> tokens = new ArrayList<Terminal>();
034            private boolean debug;
035            
036            /**
037             * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
038             * 
039             * @param grammar The grammar to be used by the chart parser.
040             */
041            public ChartParser(Grammar grammar) {
042                    this.grammar = grammar;
043                    init(grammar.getStartCategory());
044                    completeAndPredict();
045            }
046            
047            /**
048             * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
049             * of the chart parser are printed onto the standard error device.
050             * 
051             * @param debug true to switch on debug mode or false to switch it off.
052             */
053            public void debug(boolean debug) {
054                    this.debug = debug;
055            }
056            
057            /**
058             * Adds the token to the token sequence and makes one more parsing step to process it.
059             * 
060             * @param token The token to be added to the token sequence.
061             */
062            public void addToken(String token) {
063                    Terminal t = new Terminal(token);
064                    tokens.add(t);
065                    Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
066                    chart.addEdge(edge);
067                    if (debug) System.err.println("SCANNER: " + edge);
068                    completeAndPredict();
069                    //if (debug) System.err.println("CHART:");
070                    //if (debug) System.err.println(chart);
071            }
072            
073            /**
074             * Adds the tokens to the token sequence and processes them.
075             * 
076             * @param tokens The tokens to be added to the token sequence.
077             */
078            public void addTokens(List<String> tokens) {
079                    for (String s : tokens) {
080                            addToken(s);
081                    }
082            }
083            
084            /**
085             * Removes the last token and reverts the last parsing step.
086             */
087            public void removeToken() {
088                    chart.removeEdgesWithEndPos(tokens.size());
089                    tokens.remove(tokens.size()-1);
090            }
091            
092            /**
093             * Removes all tokens in the current token sequence and resets the chart.
094             */
095            public void removeAllTokens() {
096                    tokens.clear();
097                    chart.clear();
098                    init(grammar.getStartCategory());
099                    completeAndPredict();
100            }
101            
102            /**
103             * Returns the current token sequence.
104             * 
105             * @return The current token sequence.
106             */
107            public List<Terminal> getTokens() {
108                    return new ArrayList<Terminal>(tokens);
109            }
110            
111            /**
112             * Returns all tokens that are allowed to follow the current token sequence according to the grammar.
113             * 
114             * @return The possible next tokens.
115             */
116            public List<Terminal> nextTokens() {
117                    ArrayList<Terminal> terminals = new ArrayList<Terminal>();
118                    if (debug) System.err.print("LOOKING FORWARD:");
119                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
120                            if (!e.isActive()) continue;
121                            if (!(e.getNextActive() instanceof Terminal)) continue;
122                            
123                            Terminal t = (Terminal) e.getNextActive();
124                            if (!terminals.contains(t)) {
125                                    if (debug) System.err.print(" " + t);
126                                    terminals.add(t);
127                            }
128                    }
129                    if (debug) System.err.println();
130                    
131                    return terminals;
132            }
133            
134            /**
135             * Returns a boolean array that describes which of the tokens of the current token sequence are
136             * accessible (true) and which are not (false) for the given next token.
137             * 
138             * @param nextToken The next token for which the tokens should be checked for accessibility.
139             * @return A boolean array where each element stands for one token of the token sequence.
140             */
141            public boolean[] getAccessiblePositions(String nextToken) {
142                    boolean[] pos = new boolean[tokens.size()];
143                    ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>();
144                    
145                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) {
146                            if (!e.isActive()) continue;
147                            
148                            ArrayList<Edge> partialPath = new ArrayList<Edge>();
149                            partialPath.add(e.deepCopy());
150                            collectActivePaths(0, partialPath, paths);
151                    }
152                    
153                    for (ArrayList<Edge> path : paths) {
154                            for (Edge edge : path) {
155                                    scanForAccessiblePositions(edge, pos, new ArrayList<Edge>());
156                            }
157                    }
158                    
159                    return pos;
160            }
161            
162            private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) {
163                    if (edge.getStartPos() == edge.getEndPos()) return;
164                    if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) {
165                            pos[edge.getStartPos()] = true;
166                            return;
167                    }
168                    
169                    for (Edge e : edge.getLinks()) {
170                            if (!e.isAccessible()) continue;
171                            if (visitedEdges.contains(e)) continue;
172                            visitedEdges.add(e);
173                            scanForAccessiblePositions(e, pos, visitedEdges);
174                    }
175            }
176            
177            private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) {
178                    Edge edge = partialPath.get(partialPath.size()-1);
179                    if (edge.getStartPos() == startPos) {
180                            paths.add(partialPath);
181                            return;
182                    }
183                    if (edge.getStartPos() < startPos) {
184                            return;
185                    }
186                    ArrayList<Edge> edgesToCheck = new ArrayList<Edge>();
187                    for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) {
188                            Category[] newBody = new Category[e.getProgress()+1];
189                            System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1);
190                            Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true);
191                            ec.addLinksFrom(e);
192                            boolean isNew = true;
193                            for (Edge ee : edgesToCheck) {
194                                    if (ee.subsumes(ec)) {
195                                            isNew = false;
196                                            break;
197                                    }
198                            }
199                            if (isNew) {
200                                    for (Edge ee : edgesToCheck) {
201                                            if (ec.subsumes(ee)) {
202                                                    edgesToCheck.remove(ee);
203                                            }
204                                    }
205                                    edgesToCheck.add(ec);
206                            }
207                    }
208                    for (Edge e : edgesToCheck) {
209                            try {
210                                    Edge eC = e.deepCopy();
211                                    ArrayList<Edge> newPartialPath = copyEdgeList(partialPath);
212                                    Edge newEdge = newPartialPath.get(partialPath.size()-1);
213                                    newEdge.getHead().unify(eC.getNextActive());
214                                    newPartialPath.add(eC);
215                                    collectActivePaths(startPos, newPartialPath, paths);
216                            } catch (UnificationFailedException ex) {
217                                    continue;
218                            }
219                    }
220            }
221            
222            private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) {
223                    ArrayList<Edge> edgeListCopy = new ArrayList<Edge>();
224                    HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>();
225                    for (Edge edge : edgeList) {
226                            edgeListCopy.add(edge.deepCopy(entities));
227                    }
228                    return edgeListCopy;
229            }
230            
231            private void init(Nonterminal category) {
232                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
233                            try {
234                                    Nonterminal categoryC = (Nonterminal) category.deepCopy();
235                                    Rule ruleC = rule.deepCopy();
236                                    categoryC.unify(ruleC.getHead());
237                                    Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
238                                    chart.addEdge(edge);
239                                    if (debug) System.err.println("INIT: " + rule + "  >>>  " + edge);
240                            } catch (UnificationFailedException ex) {
241                                    continue;
242                            }
243                    }
244            }
245            
246            private void completeAndPredict() {
247                    int c = 0;
248                    do {
249                            complete();
250                            c = predict();
251                    } while (c > 0);
252            }
253            
254            private int predict() {
255                    int count = 0;
256                    for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
257                            Category cat = e.getNextActive();
258                            if (cat == null) continue;
259                            
260                            if (debug) System.err.println("PREDICT FOR EDGE: " + e);
261                            predict(cat);
262                    }
263                    for (Rule rule : grammar.getEpsilonRules()) {
264                            Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
265                            boolean isNewEdge = chart.addEdge(edge);
266                            if (isNewEdge) count++;
267                            if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge);
268                    }
269                    return count;
270            }
271            
272            private void predict(Category category) {
273                    for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
274                            if (rule.hasEmptyBody()) continue;
275                            
276                            try {
277                                    Category categoryC = category.deepCopy();
278                                    Rule ruleC = rule.deepCopy();
279                                    categoryC.unify(ruleC.getHead());
280                                    int p = tokens.size();
281                                    Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
282                                    if (debug) System.err.println("PREDICTOR: " + rule + "  >>>  " + edge);
283                                    boolean isNewEdge = chart.addEdge(edge);
284                                    if (isNewEdge) {
285                                            if (debug) System.err.println("PREDICT FOR EDGE: " + edge);
286                                            predict(edge.getNextActive());
287                                    }
288                            } catch (UnificationFailedException ex) {
289                                    continue;
290                            }
291                    }
292            }
293            
294            private void complete() {
295                    for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) {
296                            if (debug) System.err.println("COMPLETE FOR EDGE: " + e);
297                            complete(e.getStartPos(), e);
298                    }
299            }
300            
301            private void complete(int pos, Edge passiveEdge) {
302                    Category category = passiveEdge.getHead();
303                    
304                    for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) {
305                            if (!edge.isActive()) continue;
306                            
307                            try {
308                                    Category categoryC = category.deepCopy();
309                                    Edge edgeC = edge.deepCopy();
310                                    categoryC.unify(edgeC.getNextActive());
311                                    edgeC.step(tokens.size(), passiveEdge);
312                                    if (debug) System.err.println("COMPLETOR: " + edge + "  >>>  " + edgeC);
313                                    boolean isNewEdge = chart.addEdge(edgeC);
314                                    if (isNewEdge && !edgeC.isActive()) {
315                                            complete(edgeC.getStartPos(), edgeC);
316                                    }
317                            } catch (UnificationFailedException ex) {
318                                    continue;
319                            }
320                    }
321            }
322    
323    }