001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.chartparser; 016 017 import java.util.ArrayList; 018 import java.util.List; 019 020 /** 021 * This is a chart parser (concretely an Earley parser) fully implemented in Java. However, there is a 022 * Prolog-based format called "Codeco" that can be transformed into Java 023 * (at compile time). 024 * 025 * @author Tobias Kuhn 026 * @see Grammar 027 */ 028 public class ChartParser { 029 030 private final Grammar grammar; 031 private final Chart chart = new Chart(); 032 private final ArrayList<Terminal[]> tokens = new ArrayList<Terminal[]>(); 033 private boolean debug; 034 035 /** 036 * Creates a new chart parser for the given grammar. The grammar must not be changed afterwards. 037 * 038 * @param grammar The grammar to be used by the chart parser. 039 */ 040 public ChartParser(Grammar grammar) { 041 this.grammar = grammar; 042 init(); 043 completeAndPredict(); 044 } 045 046 /** 047 * This method can be used to switch on/off debug mode (default is off). In debug mode, messages about the actions 048 * of the chart parser are printed onto the standard error device. 049 * 050 * @param debug true to switch on debug mode or false to switch it off. 051 */ 052 public void debug(boolean debug) { 053 this.debug = debug; 054 } 055 056 /** 057 * Adds the token to the token sequence and makes one more parsing step to process it. If several tokens 058 * are given then these tokens are treated in a parallel way representing a token that can have multiple 059 * forms. 060 * 061 * @param token The token to be added to the token sequence. 062 */ 063 public void addToken(Terminal... token) { 064 tokens.add(token); 065 for (Terminal t : token) { 066 t.setFeature("id", tokens.size() + ""); 067 Edge edge = new Edge(tokens.size()-1, tokens.size(), t, true); 068 chart.addEdge(edge); 069 if (debug) log("SCANNER: " + edge + "\n"); 070 } 071 completeAndPredict(); 072 //if (debug) log("CHART:"); 073 //if (debug) log(chart); 074 } 075 076 /** 077 * Adds the tokens to the token sequence and processes them. 078 * 079 * @param tokens The tokens to be added to the token sequence. 080 */ 081 public void addTokens(List<Terminal> tokens) { 082 for (Terminal t : tokens) { 083 addToken(t); 084 } 085 } 086 087 /** 088 * Removes the last token and reverts the last parsing step. 089 */ 090 public void removeToken() { 091 chart.removeEdgesWithEndPos(tokens.size()); 092 tokens.remove(tokens.size()-1); 093 } 094 095 /** 096 * Removes all tokens in the current token sequence and resets the chart. 097 */ 098 public void removeAllTokens() { 099 tokens.clear(); 100 chart.clear(); 101 init(); 102 completeAndPredict(); 103 } 104 105 /** 106 * Returns the current token sequence. 107 * 108 * @return The current token sequence. 109 */ 110 public List<Terminal[]> getTokens() { 111 return new ArrayList<Terminal[]>(tokens); 112 } 113 114 /** 115 * This method looks ahead and returns the restrictions at least one of which the next token 116 * has to fulfill. 117 * 118 * @return The restrictions. 119 */ 120 public List<Restriction> getNextTokenRestrictions() { 121 if (debug) log("LOOKING FORWARD:"); 122 123 ArrayList<Edge> edges = new ArrayList<Edge>(); 124 for (Edge e : chart.getEdgesByEndPos(tokens.size())) { 125 if (!e.isActive()) continue; 126 if (!(e.getNextActive() instanceof Terminal)) continue; 127 128 if (e.getApplicableBackref() != null) { 129 // For edges with backwards references, the possible bindings have to be performed: 130 for (int i = 0 ; i < e.getExternalAnteList().length ; i++) { 131 Edge eC = e.deepCopy(); 132 try { 133 eC.getExternalAnteList()[i].unify(eC.getApplicableBackref()); 134 edges.add(eC); 135 } catch (UnificationFailedException ex) {} 136 } 137 } else { 138 edges.add(e); 139 } 140 } 141 142 ArrayList<Restriction> restrictions = new ArrayList<Restriction>(); 143 for (Edge e : edges) { 144 Terminal t = (Terminal) e.getNextActive(); 145 List<Terminal> exceptions = new ArrayList<Terminal>(); 146 if (e.getApplicableNegbackref() != null) { 147 // Edges with negative backwards references lead to exceptions: 148 for (int i = 0 ; i < e.getExternalAnteList().length ; i++) { 149 Edge eC = e.deepCopy(); 150 try { 151 eC.getExternalAnteList()[i].unify(eC.getApplicableNegbackref()); 152 exceptions.add((Terminal) eC.getNextActive()); 153 } catch (UnificationFailedException ex) {} 154 } 155 } 156 restrictions.add(new Restriction((Terminal) t.deepCopy(), exceptions)); 157 if (debug) log(" " + t); 158 } 159 if (debug) log("\n"); 160 161 return restrictions; 162 } 163 164 private void init() { 165 Category startCat = grammar.getStartCategory(); 166 for (Rule rule : grammar.getRulesByHeadName(startCat.getName())) { 167 try { 168 if (!startCat.isSimilar(rule.getHead())) continue; 169 Nonterminal categoryC = (Nonterminal) startCat.deepCopy(); 170 Rule ruleC = rule.deepCopy(); 171 categoryC.unify(ruleC.getHead()); 172 Edge edge = new Edge(0, 0, ruleC.getHead(), ruleC.getBody(), ruleC.isAccessible()); 173 chart.addEdge(edge); 174 if (debug) log("INIT: " + rule + " ---> " + edge + "\n"); 175 } catch (UnificationFailedException ex) { 176 continue; 177 } 178 } 179 } 180 181 private void completeAndPredict() { 182 // Process the epsilon rules: 183 for (Rule rule : grammar.getEpsilonRules()) { 184 // Epsilon edges get no antecedent lists (they cannot have references): 185 Edge edge = new Edge(tokens.size(), tokens.size(), rule.getHead(), rule.isAccessible()); 186 boolean isNewEdge = chart.addEdge(edge); 187 if (debug) log("EDGE FOR EPSILON RULE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + "\n"); 188 } 189 190 // Run completion/predition until neither of them generates a new edge: 191 int s = 0; 192 while (chart.getSize() > s) { 193 s = chart.getSize(); 194 complete(); 195 resolve(); 196 predict(); 197 } 198 } 199 200 private void predict() { 201 ArrayList<Edge> l = chart.getEdgesByEndPos(tokens.size()); 202 for (int i = 0 ; i < l.size() ; i++) { 203 // During this loop, elements might be added to the end of the list l. 204 205 Edge existingEdge = l.get(i); 206 Category category = existingEdge.getNextActive(); 207 if (category == null) continue; 208 if (debug) log("PREDICTION FOR CATEGORY: " + category + "\n"); 209 210 for (Rule rule : grammar.getRulesByHeadName(category.getName())) { 211 if (rule.hasEmptyBody()) continue; 212 213 try { 214 if (!category.isSimilar(rule.getHead())) continue; 215 Category categoryC = category.deepCopy(); 216 Rule ruleC = rule.deepCopy(); 217 categoryC.unify(ruleC.getHead()); 218 int p = tokens.size(); 219 Edge edge = new Edge(p, ruleC, existingEdge.getCombinedAnteList()); 220 boolean isNewEdge = chart.addEdge(edge); 221 if (debug) log("PREDICT (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + rule + " ---> " + edge + "\n"); 222 } catch (UnificationFailedException ex) { 223 continue; 224 } 225 } 226 } 227 } 228 229 private void complete() { 230 ArrayList<Edge> l1 = chart.getEdgesByEndPosAndActCat(tokens.size(), null); 231 for (int i1 = 0 ; i1 < l1.size() ; i1++) { 232 // During this loop, elements might be added to the end of the list l1. 233 234 Edge passiveEdge = l1.get(i1); 235 if (debug) log("COMPLETION FOR EDGE: " + passiveEdge + "\n"); 236 237 ArrayList<Edge> l2 = chart.getEdgesByEndPosAndActCat(passiveEdge.getStartPos(), passiveEdge.getHead().getName()); 238 for (int i2 = 0 ; i2 < l2.size() ; i2++) { 239 // During this loop, elements might be added to the end of the list l2. 240 241 Edge edge = l2.get(i2); 242 if (!edge.isActive()) continue; 243 244 try { 245 if (!passiveEdge.getHead().isSimilar(edge.getNextActive())) continue; 246 Edge passiveEdgeC = passiveEdge.deepCopy(); 247 Edge edgeC = edge.deepCopy(); 248 passiveEdgeC.getHead().unify(edgeC.getNextActive()); 249 if (!passiveEdge.hasEmptyBody()) { 250 // Antecedent lists have to match (except for the case of epsilon edges): 251 FeatureMap[] al1 = edgeC.getCombinedAnteList(); 252 FeatureMap[] al2 = passiveEdgeC.getExternalAnteList(); 253 if (al1.length != al2.length) throw new UnificationFailedException(); 254 for (int i = 0 ; i < al1.length ; i++) { 255 al1[i].unify(al2[i]); 256 } 257 } 258 edgeC.step(tokens.size(), passiveEdgeC); 259 boolean isNewEdge = chart.addEdge(edgeC); 260 if (debug) log("COMPLETE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + " ---> " + edgeC + "\n"); 261 } catch (UnificationFailedException ex) { 262 continue; 263 } 264 } 265 } 266 } 267 268 private void resolve() { 269 ArrayList<Edge> l1 = chart.getEdgesByEndPos(tokens.size()); 270 for (int i1 = 0 ; i1 < l1.size() ; i1++) { 271 // During this loop, elements might be added to the end of the list l1. 272 273 Edge edge = l1.get(i1); 274 if (!edge.isActive()) continue; 275 276 String n = edge.getNextActive().getName(); 277 Edge edgeC = null; 278 if (n.equals(">>>")) { 279 edgeC = edge.deepCopy(); 280 edgeC.addAntecedents(edgeC.getNextActive().getFeatureMap()); 281 } else if (n.equals("<<<")) { 282 FeatureMap[] ante = edge.getCombinedAnteList(); 283 for (int i = ante.length-1 ; i >= 0 ; i--) { 284 if (ante[i].canUnify(edge.getNextActive().getFeatureMap())) { 285 try { 286 edgeC = edge.deepCopy(); 287 edgeC.getExternalAnteList()[i].unify(edgeC.getNextActive().getFeatureMap()); 288 break; 289 } catch (UnificationFailedException ex) { 290 edgeC = null; 291 } 292 } 293 } 294 } else if (n.equals("</<")) { 295 edgeC = edge.deepCopy(); 296 for (FeatureMap f : edge.getCombinedAnteList()) { 297 // TODO: This check is probably not sufficient: 298 if (f.canUnify(edge.getNextActive().getFeatureMap())) { 299 edgeC = null; 300 break; 301 } 302 } 303 } else { 304 continue; 305 } 306 307 if (edgeC == null) { 308 if (debug) log("CANNOT RESOLVE: " + edge + "\n"); 309 } else { 310 edgeC.step(); 311 boolean isNewEdge = chart.addEdge(edgeC); 312 if (debug) log("RESOLVE (" + (isNewEdge ? "NEW" : "KNOWN") + "): " + edge + " ---> " + edgeC + "\n"); 313 } 314 } 315 } 316 317 private void log(String text) { 318 System.err.print(text); 319 } 320 321 }