001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.chartparser;
016
017 import java.util.ArrayList;
018 import java.util.HashMap;
019 import java.util.List;
020
021 /**
022 * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a
023 * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java
024 * (at compile time).
025 *
026 * @author Tobias Kuhn
027 * @see Grammar
028 */
029 public class ChartParser {
030
031 private final Grammar grammar;
032 private final Chart chart = new Chart();
033 private final ArrayList<Terminal> tokens = new ArrayList<Terminal>();
034 private boolean debug;
035
036 /**
037 * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards.
038 *
039 * @param grammar The grammar to be used by the chart parser.
040 */
041 public ChartParser(Grammar grammar) {
042 this.grammar = grammar;
043 init(grammar.getStartCategory());
044 completeAndPredict();
045 }
046
047 /**
048 * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions
049 * of the chart parser are printed onto the standard error device.
050 *
051 * @param debug true to switch on debug mode or false to switch it off.
052 */
053 public void debug(boolean debug) {
054 this.debug = debug;
055 }
056
057 /**
058 * Adds the token to the token sequence and makes one more parsing step to process it.
059 *
060 * @param token The token to be added to the token sequence.
061 */
062 public void addToken(String token) {
063 Terminal t = new Terminal(token);
064 tokens.add(t);
065 Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true);
066 chart.addEdge(edge);
067 if (debug) System.err.println("SCANNER: " + edge);
068 completeAndPredict();
069 }
070
071 /**
072 * Adds the tokens to the token sequence and processes them.
073 *
074 * @param tokens The tokens to be added to the token sequence.
075 */
076 public void addTokens(List<String> tokens) {
077 for (String s : tokens) {
078 addToken(s);
079 }
080 }
081
082 /**
083 * Removes the last token and reverts the last parsing step.
084 */
085 public void removeToken() {
086 chart.removeEdgesWithEndPos(tokens.size());
087 tokens.remove(tokens.size()-1);
088 }
089
090 /**
091 * Removes all tokens in the current token sequence and resets the chart.
092 */
093 public void removeAllTokens() {
094 tokens.clear();
095 chart.clear();
096 init(grammar.getStartCategory());
097 completeAndPredict();
098 }
099
100 /**
101 * Returns the current token sequence.
102 *
103 * @return The current token sequence.
104 */
105 public List<Terminal> getTokens() {
106 return new ArrayList<Terminal>(tokens);
107 }
108
109 /**
110 * Returns all tokens that are allowed to follow the current token sequence according to the grammar.
111 *
112 * @return The possible next tokens.
113 */
114 public List<Terminal> nextTokens() {
115 ArrayList<Terminal> terminals = new ArrayList<Terminal>();
116 if (debug) System.err.print("LOOKING FORWARD:");
117 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
118 if (!e.isActive()) continue;
119 if (!(e.getNextActive() instanceof Terminal)) continue;
120
121 Terminal t = (Terminal) e.getNextActive();
122 if (!terminals.contains(t)) {
123 if (debug) System.err.print(" " + t);
124 terminals.add(t);
125 }
126 }
127 if (debug) System.err.println();
128
129 return terminals;
130 }
131
132 /**
133 * Returns a boolean array that describes which of the tokens of the current token sequence are
134 * accessible (true) and which are not (false) for the given next token.
135 *
136 * @param nextToken The next token for which the tokens should be checked for accessibility.
137 * @return A boolean array where each element stands for one token of the token sequence.
138 */
139 public boolean[] getAccessiblePositions(String nextToken) {
140 boolean[] pos = new boolean[tokens.size()];
141 ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>();
142
143 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) {
144 if (!e.isActive()) continue;
145
146 ArrayList<Edge> partialPath = new ArrayList<Edge>();
147 partialPath.add(e.deepCopy());
148 collectActivePaths(0, partialPath, paths);
149 }
150
151 for (ArrayList<Edge> path : paths) {
152 for (Edge edge : path) {
153 scanForAccessiblePositions(edge, pos, new ArrayList<Edge>());
154 }
155 }
156
157 return pos;
158 }
159
160 private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) {
161 if (edge.getStartPos() == edge.getEndPos()) return;
162 if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) {
163 pos[edge.getStartPos()] = true;
164 return;
165 }
166
167 for (Edge e : edge.getLinks()) {
168 if (!e.isAccessible()) continue;
169 if (visitedEdges.contains(e)) continue;
170 visitedEdges.add(e);
171 scanForAccessiblePositions(e, pos, visitedEdges);
172 }
173 }
174
175 private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) {
176 Edge edge = partialPath.get(partialPath.size()-1);
177 if (edge.getStartPos() == startPos) {
178 paths.add(partialPath);
179 return;
180 }
181 if (edge.getStartPos() < startPos) {
182 return;
183 }
184 ArrayList<Edge> edgesToCheck = new ArrayList<Edge>();
185 for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) {
186 Category[] newBody = new Category[e.getProgress()+1];
187 System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1);
188 Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true);
189 ec.addLinksFrom(e);
190 boolean isNew = true;
191 for (Edge ee : edgesToCheck) {
192 if (ee.subsumes(ec)) {
193 isNew = false;
194 break;
195 }
196 }
197 if (isNew) {
198 for (Edge ee : edgesToCheck) {
199 if (ec.subsumes(ee)) {
200 edgesToCheck.remove(ee);
201 }
202 }
203 edgesToCheck.add(ec);
204 }
205 }
206 for (Edge e : edgesToCheck) {
207 try {
208 Edge eC = e.deepCopy();
209 ArrayList<Edge> newPartialPath = copyEdgeList(partialPath);
210 Edge newEdge = newPartialPath.get(partialPath.size()-1);
211 newEdge.getHead().unify(eC.getNextActive());
212 newPartialPath.add(eC);
213 collectActivePaths(startPos, newPartialPath, paths);
214 } catch (UnificationFailedException ex) {
215 continue;
216 }
217 }
218 }
219
220 private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) {
221 ArrayList<Edge> edgeListCopy = new ArrayList<Edge>();
222 HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>();
223 for (Edge edge : edgeList) {
224 edgeListCopy.add(edge.deepCopy(entities));
225 }
226 return edgeListCopy;
227 }
228
229 private void init(Nonterminal category) {
230 for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
231 try {
232 Nonterminal categoryC = (Nonterminal) category.deepCopy();
233 Rule ruleC = rule.deepCopy();
234 categoryC.unify(ruleC.getHead());
235 Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
236 chart.addEdge(edge);
237 if (debug) System.err.println("INIT: " + rule + " >>> " + edge);
238 } catch (UnificationFailedException ex) {
239 continue;
240 }
241 }
242 }
243
244 private void completeAndPredict() {
245 int c = 0;
246 do {
247 complete();
248 c = predict();
249 } while (c > 0);
250 }
251
252 private int predict() {
253 int count = 0;
254 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) {
255 Category cat = e.getNextActive();
256 if (cat == null) continue;
257
258 if (debug) System.err.println("PREDICT FOR EDGE: " + e);
259 predict(cat);
260 }
261 for (Rule rule : grammar.getEpsilonRules()) {
262 Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible());
263 boolean newEdge = chart.addEdge(edge);
264 if (newEdge) count++;
265 if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge);
266 }
267 return count;
268 }
269
270 private void predict(Category category) {
271 for (Rule rule : grammar.getRulesByHeadName(category.getName())) {
272 if (rule.hasEmptyBody()) continue;
273
274 try {
275 Category categoryC = category.deepCopy();
276 Rule ruleC = rule.deepCopy();
277 categoryC.unify(ruleC.getHead());
278 int p = tokens.size();
279 Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible());
280 if (debug) System.err.println("PREDICTOR: " + rule + " >>> " + edge);
281 boolean newEdge = chart.addEdge(edge);
282 if (newEdge) {
283 if (debug) System.err.println("PREDICT FOR EDGE: " + edge);
284 predict(edge.getNextActive());
285 }
286 } catch (UnificationFailedException ex) {
287 continue;
288 }
289 }
290 }
291
292 private void complete() {
293 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) {
294 if (debug) System.err.println("COMPLETE FOR EDGE: " + e);
295 complete(e.getStartPos(), e);
296 }
297 }
298
299 private void complete(int pos, Edge passiveEdge) {
300 Category category = passiveEdge.getHead();
301
302 for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) {
303 if (!edge.isActive()) continue;
304
305 try {
306 Category categoryC = category.deepCopy();
307 Edge edgeC = edge.deepCopy();
308 categoryC.unify(edgeC.getNextActive());
309 edgeC.step(tokens.size(), passiveEdge);
310 if (debug) System.err.println("COMPLETOR: " + edge + " >>> " + edgeC);
311 chart.addEdge(edgeC);
312 if (!edgeC.isActive()) {
313 complete(edgeC.getStartPos(), edgeC);
314 }
315 } catch (UnificationFailedException ex) {
316 continue;
317 }
318 }
319 }
320
321 }