001 // This file is part of AceWiki. 002 // Copyright 2008-2012, AceWiki developers. 003 // 004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU 005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of 006 // the License, or (at your option) any later version. 007 // 008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without 009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 010 // Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If 013 // not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.chartparser; 016 017 import java.util.ArrayList; 018 import java.util.HashMap; 019 import java.util.List; 020 import java.util.Map; 021 022 /** 023 * This class represents the parse tree of a successfully parsed text. 024 * 025 * @author Tobias Kuhn 026 */ 027 public class ParseTree { 028 029 private ParseTreeNode topNode; 030 private String lamFunctor = "lam"; 031 private String appFunctor = "app"; 032 private String concatFunctor = "+"; 033 private String semLabel = "sem"; 034 035 /** 036 * Creates a new parse tree object. 037 * 038 * @param topNode The top node. 039 */ 040 ParseTree(Edge edge) { 041 this.topNode = new ParseTreeNode(edge.deepCopy(true)); 042 } 043 044 private ParseTree(ParseTreeNode topNode) { 045 this.topNode = topNode; 046 } 047 048 private ParseTree createParseTree(ParseTreeNode topNode) { 049 ParseTree newParseTree = new ParseTree(topNode); 050 newParseTree.lamFunctor = lamFunctor; 051 newParseTree.appFunctor = appFunctor; 052 newParseTree.concatFunctor = concatFunctor; 053 newParseTree.semLabel = semLabel; 054 return newParseTree; 055 } 056 057 /** 058 * Returns the start position of this tree. This can be relevant for subtrees. 059 * 060 * @return The start position. 061 */ 062 public int getStartPos() { 063 return topNode.getStartPos(); 064 } 065 066 /** 067 * Returns the end position of this tree. This can be relevant for subtrees. 068 * 069 * @return The end position. 070 */ 071 public int getEndPos() { 072 return topNode.getEndPos(); 073 } 074 075 /** 076 * Sets the name of the annotation item that contains the semantics information. The default is 077 * "sem". 078 * 079 * @param semLabel The name of the annotation item containing the semantics. 080 */ 081 public void setSemanticsLabel(String semLabel) { 082 this.semLabel = semLabel; 083 } 084 085 /** 086 * Sets the functor of the lambda function for the calculation of lambda semantics. The default 087 * is "lam". 088 * 089 * @param lamFunctor The lambda functor. 090 */ 091 public void setLambdaFunctor(String lamFunctor) { 092 this.lamFunctor = lamFunctor; 093 } 094 095 /** 096 * Sets the functor of the application function for the calculation of lambda semantics. The 097 * default is "app". 098 * 099 * @param appFunctor The application functor. 100 */ 101 public void setApplicationFunctor(String appFunctor) { 102 this.appFunctor = appFunctor; 103 } 104 105 /** 106 * Sets the functor of the concatenation function for the calculation of semantics. The default 107 * is "+". Terms using the concatenation functor are flattened. The functor can be set to null 108 * to avoid flattening. 109 * 110 * @param concatFunctor The concatentation functor. 111 */ 112 public void setConcatFunctor(String concatFunctor) { 113 this.concatFunctor = concatFunctor; 114 } 115 116 /** 117 * Returns the top node of the parse tree. 118 * 119 * @return The top node of the parse tree. 120 */ 121 public ParseTreeNode getTopNode() { 122 return topNode; 123 } 124 125 /** 126 * Returns the syntax tree. The leaves of the tree are objects of Category. All other nodes are 127 * arrays of Object containing the child nodes. 128 * 129 * @return The syntax tree. 130 */ 131 public Object getSynTree() { 132 return getSynTree(topNode); 133 } 134 135 private Object getSynTree(ParseTreeNode n) { 136 Category h = n.getCategory(); 137 List<ParseTreeNode> c = n.getChildren(); 138 Object[] o = new Object[c.size()+1]; 139 o[0] = h; 140 for (int i = 0 ; i < c.size() ; i++) { 141 o[i+1] = getSynTree(c.get(i)); 142 } 143 return o; 144 } 145 146 /** 147 * Returns a serialization of the syntax tree. 148 * 149 * @return A serialization of the syntax tree. 150 */ 151 public String getSerializedSynTree() { 152 return serializeStructure(getSynTree()); 153 } 154 155 /** 156 * Returns an ASCII representation of the syntax tree. 157 * 158 * @return An ASCII representation of the syntax tree. 159 */ 160 public String getAsciiSynTree() { 161 return structureToAsciiTree(getSynTree(), 0); 162 } 163 164 /** 165 * Returns the semantics tree. The semantics are retrieved from the annotations of the grammar 166 * rules. The leaves of the tree are objects of String or StringRef. All other nodes are arrays 167 * of Object containing the child nodes. 168 * 169 * @return The semantics tree. 170 */ 171 public Object getSemTree() { 172 Object o = getSemTree(topNode); 173 if (concatFunctor != null) { 174 o = applyConcatenation(o); 175 } 176 return o; 177 } 178 179 private Object getSemTree(ParseTreeNode n) { 180 Object structure = n.getAnnotation().getItem(semLabel); 181 if (structure == null) return null; 182 for (ParseTreeNode c : n.getChildren()) { 183 Object o = getSemTree(c); 184 if (o != null) { 185 structure = new Object[] {appFunctor, structure, o}; 186 } 187 } 188 return structure; 189 } 190 191 /** 192 * Returns a serialization of the semantics tree. 193 * 194 * @return A serialization of the semantics tree. 195 */ 196 public String getSerializedSemTree() { 197 return serializeStructure(getSemTree()); 198 } 199 200 /** 201 * Returns an ASCII representation of the semantics tree. 202 * 203 * @return An ASCII representation of the semantics tree. 204 */ 205 public String getAsciiSemTree() { 206 return structureToAsciiTree(getSemTree(), 0); 207 } 208 209 /** 210 * Returns the semantics tree, interpreted as a lambda expression. The returned tree is beta- 211 * reduced. The semantics are retrieved from the annotations of the grammar rules. The leaves 212 * of the tree are objects of String or StringRef. All other nodes are arrays of Object 213 * containing the child nodes. 214 * 215 * @return The beta-reduced semantics tree. 216 */ 217 public Object getLambdaSemTree() { 218 Object o = getSemTree(topNode); 219 Map<Integer, Object> replace = new HashMap<Integer, Object>(); 220 replace.put(-1, ""); 221 while (replace.containsKey(-1)) { 222 replace.remove(-1); 223 o = applyBetaReduction(o, replace); 224 } 225 if (concatFunctor != null) { 226 o = applyConcatenation(o); 227 } 228 return o; 229 } 230 231 /** 232 * Returns a serialization of the semantics tree under lambda interpretation. 233 * 234 * @return A serialization of the lambda semantics tree. 235 */ 236 public String getSerializedLambdaSemTree() { 237 return serializeStructure(getLambdaSemTree()); 238 } 239 240 /** 241 * Returns an ASCII representation of the semantics tree under lambda interpretation. 242 * 243 * @return An ASCII representation of the lambda semantics tree. 244 */ 245 public String getAsciiLambdaSemTree() { 246 return structureToAsciiTree(getLambdaSemTree(), 0); 247 } 248 249 /** 250 * Returns all subtrees that have the given category name as their top node. In the case of 251 * nested nodes with the given category, only the top-most subtree (containing all other 252 * potential subtrees) is returned. 253 * 254 * @param categoryName The category name. 255 * @return A list of all matching subtrees. 256 */ 257 public List<ParseTree> getSubTrees(String categoryName) { 258 List<ParseTreeNode> topNodeList = new ArrayList<ParseTreeNode>(); 259 topNodeList.add(topNode); 260 List<ParseTree> subTrees = new ArrayList<ParseTree>(); 261 collectSubTrees(categoryName, topNodeList, subTrees); 262 return subTrees; 263 } 264 265 private void collectSubTrees(String categoryName, List<ParseTreeNode> nodes, 266 List<ParseTree> subTrees) { 267 for (ParseTreeNode n : nodes) { 268 if (n.getCategory().getName().equals(categoryName)) { 269 subTrees.add(createParseTree(n)); 270 } else { 271 collectSubTrees(categoryName, n.getChildren(), subTrees); 272 } 273 } 274 } 275 276 /** 277 * Returns the list of terminals of this tree. 278 * 279 * @return The list of terminals. 280 */ 281 public List<Terminal> getTerminals() { 282 return topNode.getTerminals(); 283 } 284 285 private Object applyBetaReduction(Object obj, Map<Integer, Object> replace) { 286 // TODO improve this (just one run, see Blackburn & Bos) 287 if (obj == null) { 288 return null; 289 } else if (obj instanceof String) { 290 return obj; 291 } else if (obj instanceof StringRef) { 292 StringRef sr = (StringRef) obj; 293 if (replace.containsKey(sr.getID())) { 294 replace.put(-1, ""); 295 return applyBetaReduction(replace.get(sr.getID()), replace); 296 } else { 297 return obj; 298 } 299 } else if (obj instanceof Object[]) { 300 Object[] a = (Object[]) obj; 301 if (a.length == 0) { 302 return obj; 303 } 304 Object[] c = new Object[a.length]; 305 for (int i = 0 ; i < a.length ; i++) { 306 c[i] = applyBetaReduction(a[i], replace); 307 } 308 if (a.length == 3 && appFunctor.equals(a[0]) && a[1] instanceof Object[]) { 309 Object[] l = (Object[]) a[1]; 310 if (l.length == 3 && lamFunctor.equals(l[0]) && l[1] instanceof StringRef) { 311 replace.put(((StringRef) l[1]).getID(), a[2]); 312 replace.put(-1, ""); 313 return applyBetaReduction(l[2], replace); 314 } 315 } 316 return c; 317 } 318 return obj; 319 } 320 321 private String serializeStructure(Object obj) { 322 if (obj instanceof Object[]) { 323 Object[] a = (Object[]) obj; 324 if (a.length == 0) { 325 return "*empty*"; 326 } else if (a.length == 1) { 327 return elementToString(a[0]); 328 } else { 329 String s = elementToString(a[0]) + "("; 330 for (int i = 1 ; i < a.length ; i++) { 331 s += serializeStructure(a[i]) + ", "; 332 } 333 if (s.endsWith(", ")) s = s.substring(0, s.length()-2); 334 return s + ")"; 335 } 336 } else { 337 return elementToString(obj); 338 } 339 } 340 341 private String structureToAsciiTree(Object obj, int tab) { 342 String t = ""; 343 for (int i=0 ; i < tab ; i++) t += " "; 344 if (obj instanceof Object[]) { 345 Object[] a = (Object[]) obj; 346 if (a.length == 0) { 347 t += "*empty*\n"; 348 } else { 349 t += elementToString(a[0]) + "\n"; 350 for (int i = 1 ; i < a.length ; i++) { 351 t += structureToAsciiTree(a[i], tab+1); 352 } 353 } 354 } else { 355 return t + elementToString(obj) + "\n"; 356 } 357 return t; 358 } 359 360 private String elementToString(Object obj) { 361 if (obj == null) { 362 return "*null*"; 363 } else if (obj instanceof String) { 364 String s = (String) obj; 365 if (s.matches("[a-zA-Z0-9_]+")) { 366 return s; 367 } else { 368 return "'" + s + "'"; 369 } 370 } else if (obj instanceof StringRef) { 371 StringRef sr = (StringRef) obj; 372 if (sr.getString() == null) { 373 return "?" + sr.getID(); 374 } else { 375 return elementToString(sr.getString()); 376 } 377 } else if (obj instanceof Terminal) { 378 return obj.toString(); 379 } else if (obj instanceof Preterminal) { 380 return "$" + ((Preterminal) obj).getName(); 381 } else if (obj instanceof Category) { 382 return ((Category) obj).getName(); 383 } 384 return "*invalid*"; 385 } 386 387 private Object applyConcatenation(Object obj) { 388 if (isConcatFunction(obj)) { 389 Object[] a = (Object[]) obj; 390 List<Object> cl = new ArrayList<Object>(); 391 for (int i = 1 ; i < a.length ; i++) { 392 Object ci = applyConcatenation(a[i]); 393 if (isConcatFunction(ci)) { 394 Object[] ai = (Object[]) ci; 395 for (int j = 1 ; j < ai.length ; j++) { 396 addFlat(cl, ai[j]); 397 } 398 } else if (!"".equals(ci)) { 399 addFlat(cl, ci); 400 } 401 } 402 if (cl.size() == 0) { 403 return ""; 404 } else if (cl.size() == 1) { 405 return cl.get(0); 406 } else { 407 cl.add(0, concatFunctor); 408 return cl.toArray(); 409 } 410 } else if (obj instanceof Object[]) { 411 Object[] a = (Object[]) obj; 412 Object[] c = new Object[a.length]; 413 for (int i = 0 ; i < a.length ; i++) { 414 c[i] = applyConcatenation(a[i]); 415 } 416 return c; 417 } else { 418 return obj; 419 } 420 } 421 422 private boolean isConcatFunction(Object obj) { 423 if (!(obj instanceof Object[])) return false; 424 Object[] a = (Object[]) obj; 425 if (a.length == 0) return false; 426 if (!(a[0] instanceof String)) return false; 427 if (!a[0].toString().equals(concatFunctor)) return false; 428 return true; 429 } 430 431 private void addFlat(List<Object> list, Object obj) { 432 if (obj instanceof StringRef && ((StringRef) obj).getString() != null) { 433 obj = ((StringRef) obj).getString(); 434 } 435 if (list.isEmpty() || !(obj instanceof String)) { 436 list.add(obj); 437 } else { 438 if (list.get(list.size()-1) instanceof String) { 439 String s = (String) list.remove(list.size()-1); 440 list.add(s + obj); 441 } else { 442 list.add(obj); 443 } 444 } 445 } 446 447 }