001 // This file is part of AceWiki.
002 // Copyright 2008-2012, AceWiki developers.
003 //
004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of
006 // the License, or (at your option) any later version.
007 //
008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010 // Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
013 // not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.chartparser;
016
017 import java.util.ArrayList;
018 import java.util.HashMap;
019 import java.util.List;
020 import java.util.Map;
021 import java.util.Set;
022 import java.util.TreeSet;
023
024 /**
025 * This class represents a grammar that is needed to run the chart parser. A grammar can be created
026 * either directly in Java or on the basis of a file in the Codeco notation. See the package
027 * description of {@link ch.uzh.ifi.attempto.codeco} for more information about the Codeco notation.
028 *
029 * @author Tobias Kuhn
030 */
031 public class Grammar {
032
033 private List<GrammarRule> rules = new ArrayList<GrammarRule>();
034 private List<LexicalRule> lexRules = new ArrayList<LexicalRule>();
035 private Map<String, List<GrammarRule>> rulesByHeadName = new HashMap<String, List<GrammarRule>>();
036 private Map<String, List<LexicalRule>> lexRulesByCat = new HashMap<String, List<LexicalRule>>();
037 private Map<String, List<LexicalRule>> lexRulesByWord = new HashMap<String, List<LexicalRule>>();
038 private Set<String> terminalSymbols = new TreeSet<String>();
039 private Set<String> preterminalSymbols = new TreeSet<String>();
040 private Set<String> nonterminalSymbols = new TreeSet<String>();
041 private Set<String> featureNames = new TreeSet<String>();
042
043 /**
044 * Creates an empty grammar.
045 */
046 public Grammar() {
047 }
048
049 /**
050 * Adds a grammar rule.
051 *
052 * @param rule The grammar rule to be added.
053 */
054 public void addGrammarRule(GrammarRule rule) {
055 rules.add(rule);
056 rulesByHeadName(rule.getHead().getName()).add(rule);
057 processCategory(rule.getHead());
058 for (Category c : rule.getBody()) {
059 processCategory(c);
060 }
061 }
062
063 /**
064 * Adds a lexical rule. Lexical rules could also be called "lexicon entries".
065 *
066 * @param lexRule The lexical rule to be added.
067 */
068 public void addLexicalRule(LexicalRule lexRule) {
069 lexRules.add(lexRule);
070 lexRulesByCat(lexRule.getCategory().getName()).add(lexRule);
071 lexRulesByWord(lexRule.getWord().getName()).add(lexRule);
072 processCategory(lexRule.getWord());
073 processCategory(lexRule.getCategory());
074 }
075
076 /**
077 * Returns the internal list of grammar rules with a head category of the specified name.
078 *
079 * @param name The name of the head category.
080 * @return The internal list of grammar rules.
081 */
082 List<GrammarRule> rulesByHeadName(String name) {
083 List<GrammarRule> l = rulesByHeadName.get(name);
084 if (l == null) {
085 l = new ArrayList<GrammarRule>();
086 rulesByHeadName.put(name, l);
087 }
088 return l;
089 }
090
091 /**
092 * Returns the grammar rules with a head category of the specified name.
093 *
094 * @param name The name of the head category.
095 * @return A list of grammar rules.
096 */
097 public List<GrammarRule> getRulesByHeadName(String name) {
098 List<GrammarRule> l = rulesByHeadName.get(name);
099 if (l == null) {
100 l = new ArrayList<GrammarRule>();
101 rulesByHeadName.put(name, l);
102 }
103 return l;
104 }
105
106 /**
107 * Returns the internal list of lexical rules with a pre-terminal category of the specified
108 * name.
109 *
110 * @param categoryName The name of the pre-terminal category.
111 * @return The internal list of lexical rules.
112 */
113 List<LexicalRule> lexRulesByCat(String categoryName) {
114 List<LexicalRule> l = lexRulesByCat.get(categoryName);
115 if (l == null) {
116 l = new ArrayList<LexicalRule>();
117 lexRulesByCat.put(categoryName, l);
118 }
119 return l;
120 }
121
122 /**
123 * Returns the list of lexical rules with a pre-terminal category of the specified name.
124 *
125 * @param categoryName The name of the pre-terminal category.
126 * @return A list of lexical rules.
127 */
128 public List<LexicalRule> getLexicalRulesByCategory(String categoryName) {
129 return new ArrayList<LexicalRule>(lexRulesByCat(categoryName));
130 }
131
132 /**
133 * Returns the internal list of lexical rules for the specified word. The word corresponds to a
134 * terminal category.
135 *
136 * @param word The word.
137 * @return The internal list of lexical rules.
138 */
139 List<LexicalRule> lexRulesByWord(String word) {
140 List<LexicalRule> l = lexRulesByWord.get(word);
141 if (l == null) {
142 l = new ArrayList<LexicalRule>();
143 lexRulesByWord.put(word, l);
144 }
145 return l;
146 }
147
148 /**
149 * Returns the list of lexical rules for the specified word. The word corresponds to a terminal
150 * category.
151 *
152 * @param word The word.
153 * @return A list of lexical rules.
154 */
155 public List<LexicalRule> getLexicalRulesByWord(String word) {
156 return new ArrayList<LexicalRule>(lexRulesByWord(word));
157 }
158
159 /**
160 * Returns an array of all names of features used in feature structures of categories contained
161 * in this grammar. The list contains no duplicates and the elements are sorted alphabetically.
162 *
163 * @return An array of all used feature names in alphabetical order.
164 */
165 String[] getFeatureNamesArray() {
166 return featureNames.toArray(new String[]{});
167 }
168
169 /**
170 * Returns a set of all names of features used in feature structures of categories contained in
171 * this grammar.
172 *
173 * @return A set of all used feature names.
174 */
175 public Set<String> getFeatureNames() {
176 return new TreeSet<String>(featureNames);
177 }
178
179 /**
180 * Returns whether the given feature name is used in this grammar.
181 *
182 * @param featureName The feature name.
183 * @return true if the feature name is used.
184 */
185 public boolean containsFeatureName(String featureName) {
186 return featureNames.contains(featureName);
187 }
188
189 /**
190 * Returns a set of all terminal symbols used in this grammar.
191 *
192 * @return A set of all terminal symbols.
193 */
194 public Set<String> getTerminalSymbols() {
195 return new TreeSet<String>(terminalSymbols);
196 }
197
198 /**
199 * Returns whether the given terminal symbol is used in this grammar.
200 *
201 * @param terminalSymbol The terminal symbol.
202 * @return true if the symbol is used.
203 */
204 public boolean containsTerminalSymbol(String terminalSymbol) {
205 return terminalSymbols.contains(terminalSymbol);
206 }
207
208 /**
209 * Returns a set of all preterminal symbols used in this grammar.
210 *
211 * @return A set of all preterminal symbols.
212 */
213 public Set<String> getPreterminalSymbols() {
214 return new TreeSet<String>(preterminalSymbols);
215 }
216
217 /**
218 * Returns whether the given preterminal symbol is used in this grammar.
219 *
220 * @param preterminalSymbol The preterminal symbol.
221 * @return true if the symbol is used.
222 */
223 public boolean containsPreterminalSymbol(String preterminalSymbol) {
224 return preterminalSymbols.contains(preterminalSymbol);
225 }
226
227 /**
228 * Returns a set of all nonterminal symbols used in this grammar.
229 *
230 * @return A set of all nonterminal symbols.
231 */
232 public Set<String> getNonterminalSymbols() {
233 return new TreeSet<String>(nonterminalSymbols);
234 }
235
236 /**
237 * Returns whether the given nonterminal symbol is used in this grammar.
238 *
239 * @param nonterminalSymbol The nonterminal symbol.
240 * @return true if the symbol is used.
241 */
242 public boolean containsNonterminalSymbol(String nonterminalSymbol) {
243 return nonterminalSymbols.contains(nonterminalSymbol);
244 }
245
246 /**
247 * This is an auxiliary method for grammar classes that are automatically generated out of a
248 * Codeco representation. It sets a feature of a feature map to a certain unbound variable.
249 *
250 * @param fm The feature map for which a feature should be set.
251 * @param featureName The name of the feature to be set.
252 * @param varID The identifier of the unbound variable to which the feature should be set.
253 * @param featureHash A hash map with variable identiers as keys and the string reference
254 * objects that represent the respective variables as values.
255 */
256 protected static void setFeature(FeatureMap fm, String featureName, int varID,
257 HashMap<Integer, StringRef> featureHash) {
258 if (featureHash.get(varID) == null) {
259 StringRef stringRef = new StringRef();
260 fm.setFeature(featureName, stringRef);
261 featureHash.put(varID, stringRef);
262 } else {
263 fm.setFeature(featureName, featureHash.get(varID));
264 }
265 }
266
267 /**
268 * This is an auxiliary method for grammar classes that are automatically generated out of a
269 * Codeco representation. It returns a string reference object that represents a certain
270 * unbound variable.
271 *
272 * @param varID The identifier of the unbound variable for which a string reference object
273 * should be returned.
274 * @param featureHash A hash map with variable identiers as keys and the string reference
275 * objects that represent the respective variables as values.
276 * @return A string reference object.
277 */
278 protected static StringRef getStringRef(int varID, HashMap<Integer, StringRef> featureHash) {
279 StringRef stringRef = featureHash.get(varID);
280 if (stringRef == null) {
281 stringRef = new StringRef();
282 featureHash.put(varID, stringRef);
283 }
284 return stringRef;
285 }
286
287 private void processCategory(Category c) {
288 if (c instanceof Terminal) {
289 terminalSymbols.add(c.getName());
290 } else if (c instanceof Preterminal) {
291 preterminalSymbols.add(c.getName());
292 } else if (c instanceof Nonterminal) {
293 nonterminalSymbols.add(c.getName());
294 }
295 Set<String> fnames = c.getFeatureNames();
296 if (fnames != null) featureNames.addAll(fnames);
297 }
298
299 public String toString() {
300 String s = "";
301 for (GrammarRule r : rules) {
302 s += r + "\n";
303 }
304 s += "\n";
305 for (LexicalRule le : lexRules) {
306 s += le + "\n";
307 }
308 return s;
309 }
310
311 }