001 // This file is part of AceWiki.
002 // Copyright 2008-2012, AceWiki developers.
003 //
004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of
006 // the License, or (at your option) any later version.
007 //
008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010 // Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
013 // not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.chartparser;
016
017 import java.util.ArrayList;
018 import java.util.HashMap;
019 import java.util.List;
020 import java.util.Map;
021
022 /**
023 * This class represents the parse tree of a successfully parsed text.
024 *
025 * @author Tobias Kuhn
026 */
027 public class ParseTree {
028
029 private ParseTreeNode topNode;
030 private String lamFunctor = "lam";
031 private String appFunctor = "app";
032 private String concatFunctor = "+";
033 private String semLabel = "sem";
034
035 /**
036 * Creates a new parse tree object.
037 *
038 * @param topNode The top node.
039 */
040 ParseTree(Edge edge) {
041 this.topNode = new ParseTreeNode(edge.deepCopy(true));
042 }
043
044 private ParseTree(ParseTreeNode topNode) {
045 this.topNode = topNode;
046 }
047
048 private ParseTree createParseTree(ParseTreeNode topNode) {
049 ParseTree newParseTree = new ParseTree(topNode);
050 newParseTree.lamFunctor = lamFunctor;
051 newParseTree.appFunctor = appFunctor;
052 newParseTree.concatFunctor = concatFunctor;
053 newParseTree.semLabel = semLabel;
054 return newParseTree;
055 }
056
057 /**
058 * Returns the start position of this tree. This can be relevant for subtrees.
059 *
060 * @return The start position.
061 */
062 public int getStartPos() {
063 return topNode.getStartPos();
064 }
065
066 /**
067 * Returns the end position of this tree. This can be relevant for subtrees.
068 *
069 * @return The end position.
070 */
071 public int getEndPos() {
072 return topNode.getEndPos();
073 }
074
075 /**
076 * Sets the name of the annotation item that contains the semantics information. The default is
077 * "sem".
078 *
079 * @param semLabel The name of the annotation item containing the semantics.
080 */
081 public void setSemanticsLabel(String semLabel) {
082 this.semLabel = semLabel;
083 }
084
085 /**
086 * Sets the functor of the lambda function for the calculation of lambda semantics. The default
087 * is "lam".
088 *
089 * @param lamFunctor The lambda functor.
090 */
091 public void setLambdaFunctor(String lamFunctor) {
092 this.lamFunctor = lamFunctor;
093 }
094
095 /**
096 * Sets the functor of the application function for the calculation of lambda semantics. The
097 * default is "app".
098 *
099 * @param appFunctor The application functor.
100 */
101 public void setApplicationFunctor(String appFunctor) {
102 this.appFunctor = appFunctor;
103 }
104
105 /**
106 * Sets the functor of the concatenation function for the calculation of semantics. The default
107 * is "+". Terms using the concatenation functor are flattened. The functor can be set to null
108 * to avoid flattening.
109 *
110 * @param concatFunctor The concatentation functor.
111 */
112 public void setConcatFunctor(String concatFunctor) {
113 this.concatFunctor = concatFunctor;
114 }
115
116 /**
117 * Returns the top node of the parse tree.
118 *
119 * @return The top node of the parse tree.
120 */
121 public ParseTreeNode getTopNode() {
122 return topNode;
123 }
124
125 /**
126 * Returns the syntax tree. The leaves of the tree are objects of Category. All other nodes are
127 * arrays of Object containing the child nodes.
128 *
129 * @return The syntax tree.
130 */
131 public Object getSynTree() {
132 return getSynTree(topNode);
133 }
134
135 private Object getSynTree(ParseTreeNode n) {
136 Category h = n.getCategory();
137 List<ParseTreeNode> c = n.getChildren();
138 Object[] o = new Object[c.size()+1];
139 o[0] = h;
140 for (int i = 0 ; i < c.size() ; i++) {
141 o[i+1] = getSynTree(c.get(i));
142 }
143 return o;
144 }
145
146 /**
147 * Returns a serialization of the syntax tree.
148 *
149 * @return A serialization of the syntax tree.
150 */
151 public String getSerializedSynTree() {
152 return serializeStructure(getSynTree());
153 }
154
155 /**
156 * Returns an ASCII representation of the syntax tree.
157 *
158 * @return An ASCII representation of the syntax tree.
159 */
160 public String getAsciiSynTree() {
161 return structureToAsciiTree(getSynTree(), 0);
162 }
163
164 /**
165 * Returns the semantics tree. The semantics are retrieved from the annotations of the grammar
166 * rules. The leaves of the tree are objects of String or StringRef. All other nodes are arrays
167 * of Object containing the child nodes.
168 *
169 * @return The semantics tree.
170 */
171 public Object getSemTree() {
172 Object o = getSemTree(topNode);
173 if (concatFunctor != null) {
174 o = applyConcatenation(o);
175 }
176 return o;
177 }
178
179 private Object getSemTree(ParseTreeNode n) {
180 Object structure = n.getAnnotation().getItem(semLabel);
181 if (structure == null) return null;
182 for (ParseTreeNode c : n.getChildren()) {
183 Object o = getSemTree(c);
184 if (o != null) {
185 structure = new Object[] {appFunctor, structure, o};
186 }
187 }
188 return structure;
189 }
190
191 /**
192 * Returns a serialization of the semantics tree.
193 *
194 * @return A serialization of the semantics tree.
195 */
196 public String getSerializedSemTree() {
197 return serializeStructure(getSemTree());
198 }
199
200 /**
201 * Returns an ASCII representation of the semantics tree.
202 *
203 * @return An ASCII representation of the semantics tree.
204 */
205 public String getAsciiSemTree() {
206 return structureToAsciiTree(getSemTree(), 0);
207 }
208
209 /**
210 * Returns the semantics tree, interpreted as a lambda expression. The returned tree is beta-
211 * reduced. The semantics are retrieved from the annotations of the grammar rules. The leaves
212 * of the tree are objects of String or StringRef. All other nodes are arrays of Object
213 * containing the child nodes.
214 *
215 * @return The beta-reduced semantics tree.
216 */
217 public Object getLambdaSemTree() {
218 Object o = getSemTree(topNode);
219 Map<Integer, Object> replace = new HashMap<Integer, Object>();
220 replace.put(-1, "");
221 while (replace.containsKey(-1)) {
222 replace.remove(-1);
223 o = applyBetaReduction(o, replace);
224 }
225 if (concatFunctor != null) {
226 o = applyConcatenation(o);
227 }
228 return o;
229 }
230
231 /**
232 * Returns a serialization of the semantics tree under lambda interpretation.
233 *
234 * @return A serialization of the lambda semantics tree.
235 */
236 public String getSerializedLambdaSemTree() {
237 return serializeStructure(getLambdaSemTree());
238 }
239
240 /**
241 * Returns an ASCII representation of the semantics tree under lambda interpretation.
242 *
243 * @return An ASCII representation of the lambda semantics tree.
244 */
245 public String getAsciiLambdaSemTree() {
246 return structureToAsciiTree(getLambdaSemTree(), 0);
247 }
248
249 /**
250 * Returns all subtrees that have the given category name as their top node. In the case of
251 * nested nodes with the given category, only the top-most subtree (containing all other
252 * potential subtrees) is returned.
253 *
254 * @param categoryName The category name.
255 * @return A list of all matching subtrees.
256 */
257 public List<ParseTree> getSubTrees(String categoryName) {
258 List<ParseTreeNode> topNodeList = new ArrayList<ParseTreeNode>();
259 topNodeList.add(topNode);
260 List<ParseTree> subTrees = new ArrayList<ParseTree>();
261 collectSubTrees(categoryName, topNodeList, subTrees);
262 return subTrees;
263 }
264
265 private void collectSubTrees(String categoryName, List<ParseTreeNode> nodes,
266 List<ParseTree> subTrees) {
267 for (ParseTreeNode n : nodes) {
268 if (n.getCategory().getName().equals(categoryName)) {
269 subTrees.add(createParseTree(n));
270 } else {
271 collectSubTrees(categoryName, n.getChildren(), subTrees);
272 }
273 }
274 }
275
276 /**
277 * Returns the list of terminals of this tree.
278 *
279 * @return The list of terminals.
280 */
281 public List<Terminal> getTerminals() {
282 return topNode.getTerminals();
283 }
284
285 private Object applyBetaReduction(Object obj, Map<Integer, Object> replace) {
286 // TODO improve this (just one run, see Blackburn & Bos)
287 if (obj == null) {
288 return null;
289 } else if (obj instanceof String) {
290 return obj;
291 } else if (obj instanceof StringRef) {
292 StringRef sr = (StringRef) obj;
293 if (replace.containsKey(sr.getID())) {
294 replace.put(-1, "");
295 return applyBetaReduction(replace.get(sr.getID()), replace);
296 } else {
297 return obj;
298 }
299 } else if (obj instanceof Object[]) {
300 Object[] a = (Object[]) obj;
301 if (a.length == 0) {
302 return obj;
303 }
304 Object[] c = new Object[a.length];
305 for (int i = 0 ; i < a.length ; i++) {
306 c[i] = applyBetaReduction(a[i], replace);
307 }
308 if (a.length == 3 && appFunctor.equals(a[0]) && a[1] instanceof Object[]) {
309 Object[] l = (Object[]) a[1];
310 if (l.length == 3 && lamFunctor.equals(l[0]) && l[1] instanceof StringRef) {
311 replace.put(((StringRef) l[1]).getID(), a[2]);
312 replace.put(-1, "");
313 return applyBetaReduction(l[2], replace);
314 }
315 }
316 return c;
317 }
318 return obj;
319 }
320
321 private String serializeStructure(Object obj) {
322 if (obj instanceof Object[]) {
323 Object[] a = (Object[]) obj;
324 if (a.length == 0) {
325 return "*empty*";
326 } else if (a.length == 1) {
327 return elementToString(a[0]);
328 } else {
329 String s = elementToString(a[0]) + "(";
330 for (int i = 1 ; i < a.length ; i++) {
331 s += serializeStructure(a[i]) + ", ";
332 }
333 if (s.endsWith(", ")) s = s.substring(0, s.length()-2);
334 return s + ")";
335 }
336 } else {
337 return elementToString(obj);
338 }
339 }
340
341 private String structureToAsciiTree(Object obj, int tab) {
342 String t = "";
343 for (int i=0 ; i < tab ; i++) t += " ";
344 if (obj instanceof Object[]) {
345 Object[] a = (Object[]) obj;
346 if (a.length == 0) {
347 t += "*empty*\n";
348 } else {
349 t += elementToString(a[0]) + "\n";
350 for (int i = 1 ; i < a.length ; i++) {
351 t += structureToAsciiTree(a[i], tab+1);
352 }
353 }
354 } else {
355 return t + elementToString(obj) + "\n";
356 }
357 return t;
358 }
359
360 private String elementToString(Object obj) {
361 if (obj == null) {
362 return "*null*";
363 } else if (obj instanceof String) {
364 String s = (String) obj;
365 if (s.matches("[a-zA-Z0-9_]+")) {
366 return s;
367 } else {
368 return "'" + s + "'";
369 }
370 } else if (obj instanceof StringRef) {
371 StringRef sr = (StringRef) obj;
372 if (sr.getString() == null) {
373 return "?" + sr.getID();
374 } else {
375 return elementToString(sr.getString());
376 }
377 } else if (obj instanceof Terminal) {
378 return obj.toString();
379 } else if (obj instanceof Preterminal) {
380 return "$" + ((Preterminal) obj).getName();
381 } else if (obj instanceof Category) {
382 return ((Category) obj).getName();
383 }
384 return "*invalid*";
385 }
386
387 private Object applyConcatenation(Object obj) {
388 if (isConcatFunction(obj)) {
389 Object[] a = (Object[]) obj;
390 List<Object> cl = new ArrayList<Object>();
391 for (int i = 1 ; i < a.length ; i++) {
392 Object ci = applyConcatenation(a[i]);
393 if (isConcatFunction(ci)) {
394 Object[] ai = (Object[]) ci;
395 for (int j = 1 ; j < ai.length ; j++) {
396 addFlat(cl, ai[j]);
397 }
398 } else if (!"".equals(ci)) {
399 addFlat(cl, ci);
400 }
401 }
402 if (cl.size() == 0) {
403 return "";
404 } else if (cl.size() == 1) {
405 return cl.get(0);
406 } else {
407 cl.add(0, concatFunctor);
408 return cl.toArray();
409 }
410 } else if (obj instanceof Object[]) {
411 Object[] a = (Object[]) obj;
412 Object[] c = new Object[a.length];
413 for (int i = 0 ; i < a.length ; i++) {
414 c[i] = applyConcatenation(a[i]);
415 }
416 return c;
417 } else {
418 return obj;
419 }
420 }
421
422 private boolean isConcatFunction(Object obj) {
423 if (!(obj instanceof Object[])) return false;
424 Object[] a = (Object[]) obj;
425 if (a.length == 0) return false;
426 if (!(a[0] instanceof String)) return false;
427 if (!a[0].toString().equals(concatFunctor)) return false;
428 return true;
429 }
430
431 private void addFlat(List<Object> list, Object obj) {
432 if (obj instanceof StringRef && ((StringRef) obj).getString() != null) {
433 obj = ((StringRef) obj).getString();
434 }
435 if (list.isEmpty() || !(obj instanceof String)) {
436 list.add(obj);
437 } else {
438 if (list.get(list.size()-1) instanceof String) {
439 String s = (String) list.remove(list.size()-1);
440 list.add(s + obj);
441 } else {
442 list.add(obj);
443 }
444 }
445 }
446
447 }