001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.chartparser;
016
017 import java.util.ArrayList;
018 import java.util.HashMap;
019 import java.util.List;
020
021 /**
022 * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
023 * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java
024 * (at compile time).
025 *
026 * @author Tobias Kuhn
027 * @see Grammar
028 */
029 public class ChartParser {
030
031 private final Grammar grammar;
032 private final Chart chart = new Chart();
033 private final ArrayList<Terminal> tokens = new ArrayList<Terminal>();
034 private boolean debug;
035
036 /**
037 * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
038 *
039 * @param grammar The grammar to be used by the chart parser.
040 */
041 public ChartParser(Grammar grammar) {
042 this.grammar = grammar;
043 init(grammar.getStartCategory());
044 completeAndPredict();
045 }
046
047 /**
048 * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
049 * of the chart parser are printed onto the standard error device.
050 *
051 * @param debug true to switch on debug mode or false to switch it off.
052 */
053 public void debug(boolean debug) {
054 this.debug = debug;
055 }
056
057 /**
058 * Adds the token to the token sequence and makes one more parsing step to process it.
059 *
060 * @param token The token to be added to the token sequence.
061 */
062 public void addToken(String token) {
063 Terminal t = new Terminal(token);
064 tokens.add(t);
065 Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
066 chart.addEdge(edge);
067 if (debug) System.err.println("SCANNER: " + edge);
068 completeAndPredict();
069 //if (debug) System.err.println("CHART:");
070 //if (debug) System.err.println(chart);
071 }
072
073 /**
074 * Adds the tokens to the token sequence and processes them.
075 *
076 * @param tokens The tokens to be added to the token sequence.
077 */
078 public void addTokens(List<String> tokens) {
079 for (String s : tokens) {
080 addToken(s);
081 }
082 }
083
084 /**
085 * Removes the last token and reverts the last parsing step.
086 */
087 public void removeToken() {
088 chart.removeEdgesWithEndPos(tokens.size());
089 tokens.remove(tokens.size()-1);
090 }
091
092 /**
093 * Removes all tokens in the current token sequence and resets the chart.
094 */
095 public void removeAllTokens() {
096 tokens.clear();
097 chart.clear();
098 init(grammar.getStartCategory());
099 completeAndPredict();
100 }
101
102 /**
103 * Returns the current token sequence.
104 *
105 * @return The current token sequence.
106 */
107 public List<Terminal> getTokens() {
108 return new ArrayList<Terminal>(tokens);
109 }
110
111 /**
112 * Returns all tokens that are allowed to follow the current token sequence according to the grammar.
113 *
114 * @return The possible next tokens.
115 */
116 public List<Terminal> nextTokens() {
117 ArrayList<Terminal> terminals = new ArrayList<Terminal>();
118 if (debug) System.err.print("LOOKING FORWARD:");
119 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
120 if (!e.isActive()) continue;
121 if (!(e.getNextActive() instanceof Terminal)) continue;
122
123 Terminal t = (Terminal) e.getNextActive();
124 if (!terminals.contains(t)) {
125 if (debug) System.err.print(" " + t);
126 terminals.add(t);
127 }
128 }
129 if (debug) System.err.println();
130
131 return terminals;
132 }
133
134 /**
135 * Returns a boolean array that describes which of the tokens of the current token sequence are
136 * accessible (true) and which are not (false) for the given next token.
137 *
138 * @param nextToken The next token for which the tokens should be checked for accessibility.
139 * @return A boolean array where each element stands for one token of the token sequence.
140 */
141 public boolean[] getAccessiblePositions(String nextToken) {
142 boolean[] pos = new boolean[tokens.size()];
143 ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>();
144
145 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) {
146 if (!e.isActive()) continue;
147
148 ArrayList<Edge> partialPath = new ArrayList<Edge>();
149 partialPath.add(e.deepCopy());
150 collectActivePaths(0, partialPath, paths);
151 }
152
153 for (ArrayList<Edge> path : paths) {
154 for (Edge edge : path) {
155 scanForAccessiblePositions(edge, pos, new ArrayList<Edge>());
156 }
157 }
158
159 return pos;
160 }
161
162 private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) {
163 if (edge.getStartPos() == edge.getEndPos()) return;
164 if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) {
165 pos[edge.getStartPos()] = true;
166 return;
167 }
168
169 for (Edge e : edge.getLinks()) {
170 if (!e.isAccessible()) continue;
171 if (visitedEdges.contains(e)) continue;
172 visitedEdges.add(e);
173 scanForAccessiblePositions(e, pos, visitedEdges);
174 }
175 }
176
177 private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) {
178 Edge edge = partialPath.get(partialPath.size()-1);
179 if (edge.getStartPos() == startPos) {
180 paths.add(partialPath);
181 return;
182 }
183 if (edge.getStartPos() < startPos) {
184 return;
185 }
186 ArrayList<Edge> edgesToCheck = new ArrayList<Edge>();
187 for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) {
188 Category[] newBody = new Category[e.getProgress()+1];
189 System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1);
190 Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true);
191 ec.addLinksFrom(e);
192 boolean isNew = true;
193 for (Edge ee : edgesToCheck) {
194 if (ee.subsumes(ec)) {
195 isNew = false;
196 break;
197 }
198 }
199 if (isNew) {
200 for (Edge ee : edgesToCheck) {
201 if (ec.subsumes(ee)) {
202 edgesToCheck.remove(ee);
203 }
204 }
205 edgesToCheck.add(ec);
206 }
207 }
208 for (Edge e : edgesToCheck) {
209 try {
210 Edge eC = e.deepCopy();
211 ArrayList<Edge> newPartialPath = copyEdgeList(partialPath);
212 Edge newEdge = newPartialPath.get(partialPath.size()-1);
213 newEdge.getHead().unify(eC.getNextActive());
214 newPartialPath.add(eC);
215 collectActivePaths(startPos, newPartialPath, paths);
216 } catch (UnificationFailedException ex) {
217 continue;
218 }
219 }
220 }
221
222 private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) {
223 ArrayList<Edge> edgeListCopy = new ArrayList<Edge>();
224 HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>();
225 for (Edge edge : edgeList) {
226 edgeListCopy.add(edge.deepCopy(entities));
227 }
228 return edgeListCopy;
229 }
230
231 private void init(Nonterminal category) {
232 for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
233 try {
234 Nonterminal categoryC = (Nonterminal) category.deepCopy();
235 Rule ruleC = rule.deepCopy();
236 categoryC.unify(ruleC.getHead());
237 Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
238 chart.addEdge(edge);
239 if (debug) System.err.println("INIT: " + rule + " >>> " + edge);
240 } catch (UnificationFailedException ex) {
241 continue;
242 }
243 }
244 }
245
246 private void completeAndPredict() {
247 int c = 0;
248 do {
249 complete();
250 c = predict();
251 } while (c > 0);
252 }
253
254 private int predict() {
255 int count = 0;
256 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
257 Category cat = e.getNextActive();
258 if (cat == null) continue;
259
260 if (debug) System.err.println("PREDICT FOR EDGE: " + e);
261 predict(cat);
262 }
263 for (Rule rule : grammar.getEpsilonRules()) {
264 Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
265 boolean isNewEdge = chart.addEdge(edge);
266 if (isNewEdge) count++;
267 if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge);
268 }
269 return count;
270 }
271
272 private void predict(Category category) {
273 for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
274 if (rule.hasEmptyBody()) continue;
275
276 try {
277 Category categoryC = category.deepCopy();
278 Rule ruleC = rule.deepCopy();
279 categoryC.unify(ruleC.getHead());
280 int p = tokens.size();
281 Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
282 if (debug) System.err.println("PREDICTOR: " + rule + " >>> " + edge);
283 boolean isNewEdge = chart.addEdge(edge);
284 if (isNewEdge) {
285 if (debug) System.err.println("PREDICT FOR EDGE: " + edge);
286 predict(edge.getNextActive());
287 }
288 } catch (UnificationFailedException ex) {
289 continue;
290 }
291 }
292 }
293
294 private void complete() {
295 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) {
296 if (debug) System.err.println("COMPLETE FOR EDGE: " + e);
297 complete(e.getStartPos(), e);
298 }
299 }
300
301 private void complete(int pos, Edge passiveEdge) {
302 Category category = passiveEdge.getHead();
303
304 for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) {
305 if (!edge.isActive()) continue;
306
307 try {
308 Category categoryC = category.deepCopy();
309 Edge edgeC = edge.deepCopy();
310 categoryC.unify(edgeC.getNextActive());
311 edgeC.step(tokens.size(), passiveEdge);
312 if (debug) System.err.println("COMPLETOR: " + edge + " >>> " + edgeC);
313 boolean isNewEdge = chart.addEdge(edgeC);
314 if (isNewEdge && !edgeC.isActive()) {
315 complete(edgeC.getStartPos(), edgeC);
316 }
317 } catch (UnificationFailedException ex) {
318 continue;
319 }
320 }
321 }
322
323 }