001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.chartparser; 016 017 import java.util.ArrayList; 018 import java.util.HashMap; 019 import java.util.List; 020 021 /** 022 * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a 023 * Prolog format ("Attempto Chartparser Grammar Notation" or "ACGN") that can be transformed into Java 024 * (at compile time). 025 * 026 * @author Tobias Kuhn 027 * @see Grammar 028 */ 029 public class ChartParser { 030 031 private final Grammar grammar; 032 private final Chart chart = new Chart(); 033 private final ArrayList<Terminal> tokens = new ArrayList<Terminal>(); 034 private boolean debug; 035 036 /** 037 * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards. 038 * 039 * @param grammar The grammar to be used by the chart parser. 040 */ 041 public ChartParser(Grammar grammar) { 042 this.grammar = grammar; 043 init(grammar.getStartCategory()); 044 completeAndPredict(); 045 } 046 047 /** 048 * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions 049 * of the chart parser are printed onto the standard error device. 050 * 051 * @param debug true to switch on debug mode or false to switch it off. 052 */ 053 public void debug(boolean debug) { 054 this.debug = debug; 055 } 056 057 /** 058 * Adds the token to the token sequence and makes one more parsing step to process it. 059 * 060 * @param token The token to be added to the token sequence. 061 */ 062 public void addToken(String token) { 063 Terminal t = new Terminal(token); 064 tokens.add(t); 065 Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true); 066 chart.addEdge(edge); 067 if (debug) System.err.println("SCANNER: " + edge); 068 completeAndPredict(); 069 } 070 071 /** 072 * Adds the tokens to the token sequence and processes them. 073 * 074 * @param tokens The tokens to be added to the token sequence. 075 */ 076 public void addTokens(List<String> tokens) { 077 for (String s : tokens) { 078 addToken(s); 079 } 080 } 081 082 /** 083 * Removes the last token and reverts the last parsing step. 084 */ 085 public void removeToken() { 086 chart.removeEdgesWithEndPos(tokens.size()); 087 tokens.remove(tokens.size()-1); 088 } 089 090 /** 091 * Removes all tokens in the current token sequence and resets the chart. 092 */ 093 public void removeAllTokens() { 094 tokens.clear(); 095 chart.clear(); 096 init(grammar.getStartCategory()); 097 completeAndPredict(); 098 } 099 100 /** 101 * Returns the current token sequence. 102 * 103 * @return The current token sequence. 104 */ 105 public List<Terminal> getTokens() { 106 return new ArrayList<Terminal>(tokens); 107 } 108 109 /** 110 * Returns all tokens that are allowed to follow the current token sequence according to the grammar. 111 * 112 * @return The possible next tokens. 113 */ 114 public List<Terminal> nextTokens() { 115 ArrayList<Terminal> terminals = new ArrayList<Terminal>(); 116 if (debug) System.err.print("LOOKING FORWARD:"); 117 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) { 118 if (!e.isActive()) continue; 119 if (!(e.getNextActive() instanceof Terminal)) continue; 120 121 Terminal t = (Terminal) e.getNextActive(); 122 if (!terminals.contains(t)) { 123 if (debug) System.err.print(" " + t); 124 terminals.add(t); 125 } 126 } 127 if (debug) System.err.println(); 128 129 return terminals; 130 } 131 132 /** 133 * Returns a boolean array that describes which of the tokens of the current token sequence are 134 * accessible (true) and which are not (false) for the given next token. 135 * 136 * @param nextToken The next token for which the tokens should be checked for accessibility. 137 * @return A boolean array where each element stands for one token of the token sequence. 138 */ 139 public boolean[] getAccessiblePositions(String nextToken) { 140 boolean[] pos = new boolean[tokens.size()]; 141 ArrayList<ArrayList<Edge>> paths = new ArrayList<ArrayList<Edge>>(); 142 143 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), nextToken, false)) { 144 if (!e.isActive()) continue; 145 146 ArrayList<Edge> partialPath = new ArrayList<Edge>(); 147 partialPath.add(e.deepCopy()); 148 collectActivePaths(0, partialPath, paths); 149 } 150 151 for (ArrayList<Edge> path : paths) { 152 for (Edge edge : path) { 153 scanForAccessiblePositions(edge, pos, new ArrayList<Edge>()); 154 } 155 } 156 157 return pos; 158 } 159 160 private void scanForAccessiblePositions(Edge edge, boolean[] pos, ArrayList<Edge> visitedEdges) { 161 if (edge.getStartPos() == edge.getEndPos()) return; 162 if (edge.getBody().length == 0 && edge.getHead() instanceof Terminal) { 163 pos[edge.getStartPos()] = true; 164 return; 165 } 166 167 for (Edge e : edge.getLinks()) { 168 if (!e.isAccessible()) continue; 169 if (visitedEdges.contains(e)) continue; 170 visitedEdges.add(e); 171 scanForAccessiblePositions(e, pos, visitedEdges); 172 } 173 } 174 175 private void collectActivePaths(int startPos, ArrayList<Edge> partialPath, ArrayList<ArrayList<Edge>> paths) { 176 Edge edge = partialPath.get(partialPath.size()-1); 177 if (edge.getStartPos() == startPos) { 178 paths.add(partialPath); 179 return; 180 } 181 if (edge.getStartPos() < startPos) { 182 return; 183 } 184 ArrayList<Edge> edgesToCheck = new ArrayList<Edge>(); 185 for (Edge e : chart.getEdgesByEndPosAndActCat(edge.getStartPos(), edge.getHead().getName(), false)) { 186 Category[] newBody = new Category[e.getProgress()+1]; 187 System.arraycopy(e.getBody(), 0, newBody, 0, e.getProgress()+1); 188 Edge ec = new Edge(e.getStartPos(), e.getEndPos(), e.getHead(), newBody, e.getProgress(), true); 189 ec.addLinksFrom(e); 190 boolean isNew = true; 191 for (Edge ee : edgesToCheck) { 192 if (ee.subsumes(ec)) { 193 isNew = false; 194 break; 195 } 196 } 197 if (isNew) { 198 for (Edge ee : edgesToCheck) { 199 if (ec.subsumes(ee)) { 200 edgesToCheck.remove(ee); 201 } 202 } 203 edgesToCheck.add(ec); 204 } 205 } 206 for (Edge e : edgesToCheck) { 207 try { 208 Edge eC = e.deepCopy(); 209 ArrayList<Edge> newPartialPath = copyEdgeList(partialPath); 210 Edge newEdge = newPartialPath.get(partialPath.size()-1); 211 newEdge.getHead().unify(eC.getNextActive()); 212 newPartialPath.add(eC); 213 collectActivePaths(startPos, newPartialPath, paths); 214 } catch (UnificationFailedException ex) { 215 continue; 216 } 217 } 218 } 219 220 private ArrayList<Edge> copyEdgeList(ArrayList<Edge> edgeList) { 221 ArrayList<Edge> edgeListCopy = new ArrayList<Edge>(); 222 HashMap<Integer, StringEntity> entities = new HashMap<Integer, StringEntity>(); 223 for (Edge edge : edgeList) { 224 edgeListCopy.add(edge.deepCopy(entities)); 225 } 226 return edgeListCopy; 227 } 228 229 private void init(Nonterminal category) { 230 for (Rule rule : grammar.getRulesByHeadName(category.getName())) { 231 try { 232 Nonterminal categoryC = (Nonterminal) category.deepCopy(); 233 Rule ruleC = rule.deepCopy(); 234 categoryC.unify(ruleC.getHead()); 235 Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible()); 236 chart.addEdge(edge); 237 if (debug) System.err.println("INIT: " + rule + " >>> " + edge); 238 } catch (UnificationFailedException ex) { 239 continue; 240 } 241 } 242 } 243 244 private void completeAndPredict() { 245 int c = 0; 246 do { 247 complete(); 248 c = predict(); 249 } while (c > 0); 250 } 251 252 private int predict() { 253 int count = 0; 254 for (Edge e : chart.getEdgesByEndPos(tokens.size(), true)) { 255 Category cat = e.getNextActive(); 256 if (cat == null) continue; 257 258 if (debug) System.err.println("PREDICT FOR EDGE: " + e); 259 predict(cat); 260 } 261 for (Rule rule : grammar.getEpsilonRules()) { 262 Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible()); 263 boolean newEdge = chart.addEdge(edge); 264 if (newEdge) count++; 265 if (debug) System.err.println("EDGE FOR EPSILON RULE: " + edge); 266 } 267 return count; 268 } 269 270 private void predict(Category category) { 271 for (Rule rule : grammar.getRulesByHeadName(category.getName())) { 272 if (rule.hasEmptyBody()) continue; 273 274 try { 275 Category categoryC = category.deepCopy(); 276 Rule ruleC = rule.deepCopy(); 277 categoryC.unify(ruleC.getHead()); 278 int p = tokens.size(); 279 Edge edge = new Edge(p, p, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible()); 280 if (debug) System.err.println("PREDICTOR: " + rule + " >>> " + edge); 281 boolean newEdge = chart.addEdge(edge); 282 if (newEdge) { 283 if (debug) System.err.println("PREDICT FOR EDGE: " + edge); 284 predict(edge.getNextActive()); 285 } 286 } catch (UnificationFailedException ex) { 287 continue; 288 } 289 } 290 } 291 292 private void complete() { 293 for (Edge e : chart.getEdgesByEndPosAndActCat(tokens.size(), null, true)) { 294 if (debug) System.err.println("COMPLETE FOR EDGE: " + e); 295 complete(e.getStartPos(), e); 296 } 297 } 298 299 private void complete(int pos, Edge passiveEdge) { 300 Category category = passiveEdge.getHead(); 301 302 for (Edge edge : chart.getEdgesByEndPosAndActCat(pos, category.getName(), pos == tokens.size())) { 303 if (!edge.isActive()) continue; 304 305 try { 306 Category categoryC = category.deepCopy(); 307 Edge edgeC = edge.deepCopy(); 308 categoryC.unify(edgeC.getNextActive()); 309 edgeC.step(tokens.size(), passiveEdge); 310 if (debug) System.err.println("COMPLETOR: " + edge + " >>> " + edgeC); 311 chart.addEdge(edgeC); 312 if (!edgeC.isActive()) { 313 complete(edgeC.getStartPos(), edgeC); 314 } 315 } catch (UnificationFailedException ex) { 316 continue; 317 } 318 } 319 } 320 321 }