001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.chartparser; 016 017 import java.util.ArrayList; 018 import java.util.HashMap; 019 020 /** 021 * This class represents a grammar that is needed to run the chart parser. A grammar can be created 022 * either directly in Java or on the basis of a file in the ACGN format. 023 *<p> 024 * ACGN stands for "Attempto Chartparser Grammar Notation" and uses Prolog notation to provide a nicer 025 * grammar representation. Simple grammar rules in ACGN look almost the same as common Prolog DCG rules. 026 * Just replace the operator 027 * "<code>--></code>" by "<code>=></code>": 028 *<blockquote><pre> 029 * vp => v, np. 030 * v => [does, not], verb. 031 *</pre></blockquote> 032 * Complex grammar rules in ACGN are different from common Prolog DCG rules in the sense that they are using 033 * features rather than arguments with fixed positions. Arguments are not recognized by their position but by their 034 * name: 035 *<blockquote><pre> 036 * vp(num:Num,neg:Neg) => v(num:Num,neg:Neg,type:tr), np(case:acc). 037 * v(neg:plus,type:Type) => [does, not], verb(type:Type). 038 *</pre></blockquote> 039 * Every feature has the form <code>Name:Value</code> where <code>Name</code> has to be an atom and <code>Value</code> 040 * can be a variable, an atom, or a number (but not a compound term). 041 *<p> 042 * ACGN supports a special type of rules that are called "inaccessible rules". The operator "<code>~></code>" 043 * instead of "<code>=></code>" is used for inaccessible rules: 044 *<blockquote><pre> 045 * vp(num:Num,neg:plus) ~> v(num:Num,neg:plus,type:tr), np(case:acc). 046 *</pre></blockquote> 047 * In the case of an inaccessible rule, the potential antecedents that are covered by this rule are not accessible 048 * for anaphoric references from outside. In this way, the information about which tokens are accessible for anaphoric 049 * references can be retrieved by the method {@link ChartParser#getAccessiblePositions(String) getAccessiblePositions} 050 * of the {@link ChartParser} class. 051 *<p> 052 * ACGN grammars can be translated automatically into a Java class or into a Prolog DCG using the SWI Prolog programs 053 * "generate_java.pl" or "generate_dcg.pl", respectively. Those programs can be found in the directory 054 * "src/ch/uzh/ifi/attempto/utils" of the source code of this package. The Java class can be generated like this: 055 *<blockquote><pre> 056 * swipl -s generate_java.pl -g "generate_java('my_acgn_grammar.pl', 'my.package', 'MyJavaGrammar', 'my_start_category')" -t halt 057 *</pre></blockquote> 058 * Note that the SWI Prolog command might be different on your machine (e.g. "<code>plcon</code>" or "<code>pl</code>"). 059 * The Prolog DCG file can be generated like this: 060 *<blockquote><pre> 061 * swipl -s generate_dcg.pl -g "generate_dcg('my_acgn_grammar.pl', 'my_dcg_grammar.pl')" -t halt 062 *</pre></blockquote> 063 * Note that the information about accessible and inaccessible rules gets lost in the Prolog DCG file. 064 * 065 * @author Tobias Kuhn 066 */ 067 public class Grammar { 068 069 private final Nonterminal startCategory; 070 private ArrayList<Rule> rules = new ArrayList<Rule>(); 071 private HashMap<String, ArrayList<Rule>> rulesByHeadName = new HashMap<String, ArrayList<Rule>>(); 072 private ArrayList<Rule> epsilonRules = new ArrayList<Rule>(); 073 074 /** 075 * Creates a empty grammar with the given start category. 076 * 077 * @param startCategory The start category for the grammar. 078 */ 079 public Grammar(Nonterminal startCategory) { 080 this.startCategory = startCategory; 081 } 082 083 /** 084 * Creates a empty grammar with a start category of the given name. 085 * 086 * @param startCategoryName The name of the start category for the grammar. 087 */ 088 public Grammar(String startCategoryName) { 089 this.startCategory = new Nonterminal(startCategoryName); 090 } 091 092 /** 093 * Returns the start category. 094 * 095 * @return The start category. 096 */ 097 public Nonterminal getStartCategory() { 098 return startCategory; 099 } 100 101 /** 102 * Adds the rule to the grammar. 103 * 104 * @param rule The rule to be added. 105 */ 106 public void addRule(Rule rule) { 107 rules.add(rule); 108 ArrayList<Rule> l = rulesByHeadName.get(rule.getHead().getName()); 109 if (l == null) { 110 l = new ArrayList<Rule>(); 111 l.add(rule); 112 rulesByHeadName.put(rule.getHead().getName(), l); 113 } else { 114 l.add(rule); 115 } 116 rulesByHeadName.get(rule.getHead().getName()).add(rule); 117 if (rule.hasEmptyBody()) { 118 epsilonRules.add(rule); 119 } 120 } 121 122 /** 123 * Returns the rules whose head category has the given name. 124 * 125 * @param name The name of the head category. 126 * @return A list of rules. 127 */ 128 public ArrayList<Rule> getRulesByHeadName(String name) { 129 ArrayList<Rule> l = rulesByHeadName.get(name); 130 if (l != null) { 131 return l; 132 } 133 return new ArrayList<Rule>(); 134 } 135 136 /** 137 * Returns all the rules that have no body categories. 138 * 139 * @return A list of rules. 140 */ 141 public ArrayList<Rule> getEpsilonRules() { 142 return epsilonRules; 143 } 144 145 public String toString() { 146 String s = ""; 147 for (Rule r : rules) { 148 s += r + "\n"; 149 } 150 return s; 151 } 152 153 }