001 // This file is part of AceWiki. 002 // Copyright 2008-2012, AceWiki developers. 003 // 004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU 005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of 006 // the License, or (at your option) any later version. 007 // 008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without 009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 010 // Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If 013 // not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.chartparser; 016 017 import java.util.ArrayList; 018 import java.util.HashMap; 019 import java.util.List; 020 import java.util.Map; 021 import java.util.Set; 022 import java.util.TreeSet; 023 024 /** 025 * This class represents a grammar that is needed to run the chart parser. A grammar can be created 026 * either directly in Java or on the basis of a file in the Codeco notation. See the package 027 * description of {@link ch.uzh.ifi.attempto.codeco} for more information about the Codeco notation. 028 * 029 * @author Tobias Kuhn 030 */ 031 public class Grammar { 032 033 private List<GrammarRule> rules = new ArrayList<GrammarRule>(); 034 private List<LexicalRule> lexRules = new ArrayList<LexicalRule>(); 035 private Map<String, List<GrammarRule>> rulesByHeadName = new HashMap<String, List<GrammarRule>>(); 036 private Map<String, List<LexicalRule>> lexRulesByCat = new HashMap<String, List<LexicalRule>>(); 037 private Map<String, List<LexicalRule>> lexRulesByWord = new HashMap<String, List<LexicalRule>>(); 038 private Set<String> terminalSymbols = new TreeSet<String>(); 039 private Set<String> preterminalSymbols = new TreeSet<String>(); 040 private Set<String> nonterminalSymbols = new TreeSet<String>(); 041 private Set<String> featureNames = new TreeSet<String>(); 042 043 /** 044 * Creates an empty grammar. 045 */ 046 public Grammar() { 047 } 048 049 /** 050 * Adds a grammar rule. 051 * 052 * @param rule The grammar rule to be added. 053 */ 054 public void addGrammarRule(GrammarRule rule) { 055 rules.add(rule); 056 rulesByHeadName(rule.getHead().getName()).add(rule); 057 processCategory(rule.getHead()); 058 for (Category c : rule.getBody()) { 059 processCategory(c); 060 } 061 } 062 063 /** 064 * Adds a lexical rule. Lexical rules could also be called "lexicon entries". 065 * 066 * @param lexRule The lexical rule to be added. 067 */ 068 public void addLexicalRule(LexicalRule lexRule) { 069 lexRules.add(lexRule); 070 lexRulesByCat(lexRule.getCategory().getName()).add(lexRule); 071 lexRulesByWord(lexRule.getWord().getName()).add(lexRule); 072 processCategory(lexRule.getWord()); 073 processCategory(lexRule.getCategory()); 074 } 075 076 /** 077 * Returns the internal list of grammar rules with a head category of the specified name. 078 * 079 * @param name The name of the head category. 080 * @return The internal list of grammar rules. 081 */ 082 List<GrammarRule> rulesByHeadName(String name) { 083 List<GrammarRule> l = rulesByHeadName.get(name); 084 if (l == null) { 085 l = new ArrayList<GrammarRule>(); 086 rulesByHeadName.put(name, l); 087 } 088 return l; 089 } 090 091 /** 092 * Returns the grammar rules with a head category of the specified name. 093 * 094 * @param name The name of the head category. 095 * @return A list of grammar rules. 096 */ 097 public List<GrammarRule> getRulesByHeadName(String name) { 098 List<GrammarRule> l = rulesByHeadName.get(name); 099 if (l == null) { 100 l = new ArrayList<GrammarRule>(); 101 rulesByHeadName.put(name, l); 102 } 103 return l; 104 } 105 106 /** 107 * Returns the internal list of lexical rules with a pre-terminal category of the specified 108 * name. 109 * 110 * @param categoryName The name of the pre-terminal category. 111 * @return The internal list of lexical rules. 112 */ 113 List<LexicalRule> lexRulesByCat(String categoryName) { 114 List<LexicalRule> l = lexRulesByCat.get(categoryName); 115 if (l == null) { 116 l = new ArrayList<LexicalRule>(); 117 lexRulesByCat.put(categoryName, l); 118 } 119 return l; 120 } 121 122 /** 123 * Returns the list of lexical rules with a pre-terminal category of the specified name. 124 * 125 * @param categoryName The name of the pre-terminal category. 126 * @return A list of lexical rules. 127 */ 128 public List<LexicalRule> getLexicalRulesByCategory(String categoryName) { 129 return new ArrayList<LexicalRule>(lexRulesByCat(categoryName)); 130 } 131 132 /** 133 * Returns the internal list of lexical rules for the specified word. The word corresponds to a 134 * terminal category. 135 * 136 * @param word The word. 137 * @return The internal list of lexical rules. 138 */ 139 List<LexicalRule> lexRulesByWord(String word) { 140 List<LexicalRule> l = lexRulesByWord.get(word); 141 if (l == null) { 142 l = new ArrayList<LexicalRule>(); 143 lexRulesByWord.put(word, l); 144 } 145 return l; 146 } 147 148 /** 149 * Returns the list of lexical rules for the specified word. The word corresponds to a terminal 150 * category. 151 * 152 * @param word The word. 153 * @return A list of lexical rules. 154 */ 155 public List<LexicalRule> getLexicalRulesByWord(String word) { 156 return new ArrayList<LexicalRule>(lexRulesByWord(word)); 157 } 158 159 /** 160 * Returns an array of all names of features used in feature structures of categories contained 161 * in this grammar. The list contains no duplicates and the elements are sorted alphabetically. 162 * 163 * @return An array of all used feature names in alphabetical order. 164 */ 165 String[] getFeatureNamesArray() { 166 return featureNames.toArray(new String[]{}); 167 } 168 169 /** 170 * Returns a set of all names of features used in feature structures of categories contained in 171 * this grammar. 172 * 173 * @return A set of all used feature names. 174 */ 175 public Set<String> getFeatureNames() { 176 return new TreeSet<String>(featureNames); 177 } 178 179 /** 180 * Returns whether the given feature name is used in this grammar. 181 * 182 * @param featureName The feature name. 183 * @return true if the feature name is used. 184 */ 185 public boolean containsFeatureName(String featureName) { 186 return featureNames.contains(featureName); 187 } 188 189 /** 190 * Returns a set of all terminal symbols used in this grammar. 191 * 192 * @return A set of all terminal symbols. 193 */ 194 public Set<String> getTerminalSymbols() { 195 return new TreeSet<String>(terminalSymbols); 196 } 197 198 /** 199 * Returns whether the given terminal symbol is used in this grammar. 200 * 201 * @param terminalSymbol The terminal symbol. 202 * @return true if the symbol is used. 203 */ 204 public boolean containsTerminalSymbol(String terminalSymbol) { 205 return terminalSymbols.contains(terminalSymbol); 206 } 207 208 /** 209 * Returns a set of all preterminal symbols used in this grammar. 210 * 211 * @return A set of all preterminal symbols. 212 */ 213 public Set<String> getPreterminalSymbols() { 214 return new TreeSet<String>(preterminalSymbols); 215 } 216 217 /** 218 * Returns whether the given preterminal symbol is used in this grammar. 219 * 220 * @param preterminalSymbol The preterminal symbol. 221 * @return true if the symbol is used. 222 */ 223 public boolean containsPreterminalSymbol(String preterminalSymbol) { 224 return preterminalSymbols.contains(preterminalSymbol); 225 } 226 227 /** 228 * Returns a set of all nonterminal symbols used in this grammar. 229 * 230 * @return A set of all nonterminal symbols. 231 */ 232 public Set<String> getNonterminalSymbols() { 233 return new TreeSet<String>(nonterminalSymbols); 234 } 235 236 /** 237 * Returns whether the given nonterminal symbol is used in this grammar. 238 * 239 * @param nonterminalSymbol The nonterminal symbol. 240 * @return true if the symbol is used. 241 */ 242 public boolean containsNonterminalSymbol(String nonterminalSymbol) { 243 return nonterminalSymbols.contains(nonterminalSymbol); 244 } 245 246 /** 247 * This is an auxiliary method for grammar classes that are automatically generated out of a 248 * Codeco representation. It sets a feature of a feature map to a certain unbound variable. 249 * 250 * @param fm The feature map for which a feature should be set. 251 * @param featureName The name of the feature to be set. 252 * @param varID The identifier of the unbound variable to which the feature should be set. 253 * @param featureHash A hash map with variable identiers as keys and the string reference 254 * objects that represent the respective variables as values. 255 */ 256 protected static void setFeature(FeatureMap fm, String featureName, int varID, 257 HashMap<Integer, StringRef> featureHash) { 258 if (featureHash.get(varID) == null) { 259 StringRef stringRef = new StringRef(); 260 fm.setFeature(featureName, stringRef); 261 featureHash.put(varID, stringRef); 262 } else { 263 fm.setFeature(featureName, featureHash.get(varID)); 264 } 265 } 266 267 /** 268 * This is an auxiliary method for grammar classes that are automatically generated out of a 269 * Codeco representation. It returns a string reference object that represents a certain 270 * unbound variable. 271 * 272 * @param varID The identifier of the unbound variable for which a string reference object 273 * should be returned. 274 * @param featureHash A hash map with variable identiers as keys and the string reference 275 * objects that represent the respective variables as values. 276 * @return A string reference object. 277 */ 278 protected static StringRef getStringRef(int varID, HashMap<Integer, StringRef> featureHash) { 279 StringRef stringRef = featureHash.get(varID); 280 if (stringRef == null) { 281 stringRef = new StringRef(); 282 featureHash.put(varID, stringRef); 283 } 284 return stringRef; 285 } 286 287 private void processCategory(Category c) { 288 if (c instanceof Terminal) { 289 terminalSymbols.add(c.getName()); 290 } else if (c instanceof Preterminal) { 291 preterminalSymbols.add(c.getName()); 292 } else if (c instanceof Nonterminal) { 293 nonterminalSymbols.add(c.getName()); 294 } 295 Set<String> fnames = c.getFeatureNames(); 296 if (fnames != null) featureNames.addAll(fnames); 297 } 298 299 public String toString() { 300 String s = ""; 301 for (GrammarRule r : rules) { 302 s += r + "\n"; 303 } 304 s += "\n"; 305 for (LexicalRule le : lexRules) { 306 s += le + "\n"; 307 } 308 return s; 309 } 310 311 }