001    // This file is part of AceWiki.
002    // Copyright 2008-2012, AceWiki developers.
003    // 
004    // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
005    // Lesser General Public License as published by the Free Software Foundation, either version 3 of
006    // the License, or (at your option) any later version.
007    // 
008    // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
009    // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010    // Lesser General Public License for more details.
011    // 
012    // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
013    // not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.HashMap;
019    import java.util.List;
020    import java.util.Map;
021    import java.util.Set;
022    import java.util.TreeSet;
023    
024    /**
025     * This class represents a grammar that is needed to run the chart parser. A grammar can be created
026     * either directly in Java or on the basis of a file in the Codeco notation. See the package
027     * description of {@link ch.uzh.ifi.attempto.codeco} for more information about the Codeco notation.
028     * 
029     * @author Tobias Kuhn
030     */
031    public class Grammar {
032            
033            private List<GrammarRule> rules = new ArrayList<GrammarRule>();
034            private List<LexicalRule> lexRules = new ArrayList<LexicalRule>();
035            private Map<String, List<GrammarRule>> rulesByHeadName = new HashMap<String, List<GrammarRule>>();
036            private Map<String, List<LexicalRule>> lexRulesByCat = new HashMap<String, List<LexicalRule>>();
037            private Map<String, List<LexicalRule>> lexRulesByWord = new HashMap<String, List<LexicalRule>>();
038            private Set<String> terminalSymbols = new TreeSet<String>();
039            private Set<String> preterminalSymbols = new TreeSet<String>();
040            private Set<String> nonterminalSymbols = new TreeSet<String>();
041            private Set<String> featureNames = new TreeSet<String>();
042            
043            /**
044             * Creates an empty grammar.
045             */
046            public Grammar() {
047            }
048            
049            /**
050             * Adds a grammar rule.
051             * 
052             * @param rule The grammar rule to be added.
053             */
054            public void addGrammarRule(GrammarRule rule) {
055                    rules.add(rule);
056                    rulesByHeadName(rule.getHead().getName()).add(rule);
057                    processCategory(rule.getHead());
058                    for (Category c : rule.getBody()) {
059                            processCategory(c);
060                    }
061            }
062            
063            /**
064             * Adds a lexical rule. Lexical rules could also be called "lexicon entries".
065             * 
066             * @param lexRule The lexical rule to be added.
067             */
068            public void addLexicalRule(LexicalRule lexRule) {
069                    lexRules.add(lexRule);
070                    lexRulesByCat(lexRule.getCategory().getName()).add(lexRule);
071                    lexRulesByWord(lexRule.getWord().getName()).add(lexRule);
072                    processCategory(lexRule.getWord());
073                    processCategory(lexRule.getCategory());
074            }
075            
076            /**
077             * Returns the internal list of grammar rules with a head category of the specified name.
078             * 
079             * @param name The name of the head category.
080             * @return The internal list of grammar rules.
081             */
082            List<GrammarRule> rulesByHeadName(String name) {
083                    List<GrammarRule> l = rulesByHeadName.get(name);
084                    if (l == null) {
085                            l = new ArrayList<GrammarRule>();
086                            rulesByHeadName.put(name, l);
087                    }
088                    return l;
089            }
090            
091            /**
092             * Returns the grammar rules with a head category of the specified name.
093             * 
094             * @param name The name of the head category.
095             * @return A list of grammar rules.
096             */
097            public List<GrammarRule> getRulesByHeadName(String name) {
098                    List<GrammarRule> l = rulesByHeadName.get(name);
099                    if (l == null) {
100                            l = new ArrayList<GrammarRule>();
101                            rulesByHeadName.put(name, l);
102                    }
103                    return l;
104            }
105            
106            /**
107             * Returns the internal list of lexical rules with a pre-terminal category of the specified
108             * name.
109             * 
110             * @param categoryName The name of the pre-terminal category.
111             * @return The internal list of lexical rules.
112             */
113            List<LexicalRule> lexRulesByCat(String categoryName) {
114                    List<LexicalRule> l = lexRulesByCat.get(categoryName);
115                    if (l == null) {
116                            l = new ArrayList<LexicalRule>();
117                            lexRulesByCat.put(categoryName, l);
118                    }
119                    return l;
120            }
121    
122            /**
123             * Returns the list of lexical rules with a pre-terminal category of the specified name.
124             * 
125             * @param categoryName The name of the pre-terminal category.
126             * @return A list of lexical rules.
127             */
128            public List<LexicalRule> getLexicalRulesByCategory(String categoryName) {
129                    return new ArrayList<LexicalRule>(lexRulesByCat(categoryName));
130            }
131            
132            /**
133             * Returns the internal list of lexical rules for the specified word. The word corresponds to a
134             * terminal category.
135             * 
136             * @param word The word.
137             * @return The internal list of lexical rules.
138             */
139            List<LexicalRule> lexRulesByWord(String word) {
140                    List<LexicalRule> l = lexRulesByWord.get(word);
141                    if (l == null) {
142                            l = new ArrayList<LexicalRule>();
143                            lexRulesByWord.put(word, l);
144                    }
145                    return l;
146            }
147    
148            /**
149             * Returns the list of lexical rules for the specified word. The word corresponds to a terminal
150             * category.
151             * 
152             * @param word The word.
153             * @return A list of lexical rules.
154             */
155            public List<LexicalRule> getLexicalRulesByWord(String word) {
156                    return new ArrayList<LexicalRule>(lexRulesByWord(word));
157            }
158            
159            /**
160             * Returns an array of all names of features used in feature structures of categories contained
161             * in this grammar. The list contains no duplicates and the elements are sorted alphabetically.
162             * 
163             * @return An array of all used feature names in alphabetical order.
164             */
165            String[] getFeatureNamesArray() {
166                    return featureNames.toArray(new String[]{});
167            }
168            
169            /**
170             * Returns a set of all names of features used in feature structures of categories contained in
171             * this grammar.
172             * 
173             * @return A set of all used feature names.
174             */
175            public Set<String> getFeatureNames() {
176                    return new TreeSet<String>(featureNames);
177            }
178            
179            /**
180             * Returns whether the given feature name is used in this grammar.
181             * 
182             * @param featureName The feature name.
183             * @return true if the feature name is used.
184             */
185            public boolean containsFeatureName(String featureName) {
186                    return featureNames.contains(featureName);
187            }
188            
189            /**
190             * Returns a set of all terminal symbols used in this grammar.
191             * 
192             * @return A set of all terminal symbols.
193             */
194            public Set<String> getTerminalSymbols() {
195                    return new TreeSet<String>(terminalSymbols);
196            }
197            
198            /**
199             * Returns whether the given terminal symbol is used in this grammar.
200             * 
201             * @param terminalSymbol The terminal symbol.
202             * @return true if the symbol is used.
203             */
204            public boolean containsTerminalSymbol(String terminalSymbol) {
205                    return terminalSymbols.contains(terminalSymbol);
206            }
207    
208            /**
209             * Returns a set of all preterminal symbols used in this grammar.
210             * 
211             * @return A set of all preterminal symbols.
212             */
213            public Set<String> getPreterminalSymbols() {
214                    return new TreeSet<String>(preterminalSymbols);
215            }
216    
217            /**
218             * Returns whether the given preterminal symbol is used in this grammar.
219             * 
220             * @param preterminalSymbol The preterminal symbol.
221             * @return true if the symbol is used.
222             */
223            public boolean containsPreterminalSymbol(String preterminalSymbol) {
224                    return preterminalSymbols.contains(preterminalSymbol);
225            }
226    
227            /**
228             * Returns a set of all nonterminal symbols used in this grammar.
229             * 
230             * @return A set of all nonterminal symbols.
231             */
232            public Set<String> getNonterminalSymbols() {
233                    return new TreeSet<String>(nonterminalSymbols);
234            }
235            
236            /**
237             * Returns whether the given nonterminal symbol is used in this grammar.
238             * 
239             * @param nonterminalSymbol The nonterminal symbol.
240             * @return true if the symbol is used.
241             */
242            public boolean containsNonterminalSymbol(String nonterminalSymbol) {
243                    return nonterminalSymbols.contains(nonterminalSymbol);
244            }
245            
246            /**
247             * This is an auxiliary method for grammar classes that are automatically generated out of a
248             * Codeco representation. It sets a feature of a feature map to a certain unbound variable.
249             * 
250             * @param fm The feature map for which a feature should be set.
251             * @param featureName The name of the feature to be set.
252             * @param varID The identifier of the unbound variable to which the feature should be set.
253             * @param featureHash A hash map with variable identiers as keys and the string reference
254             *   objects that represent the respective variables as values.
255             */
256            protected static void setFeature(FeatureMap fm, String featureName, int varID,
257                            HashMap<Integer, StringRef> featureHash) {
258                    if (featureHash.get(varID) == null) {
259                            StringRef stringRef = new StringRef();
260                            fm.setFeature(featureName, stringRef);
261                            featureHash.put(varID, stringRef);
262                    } else {
263                            fm.setFeature(featureName, featureHash.get(varID));
264                    }
265            }
266            
267            /**
268             * This is an auxiliary method for grammar classes that are automatically generated out of a
269             * Codeco representation. It returns a string reference object that represents a certain
270             * unbound variable.
271             * 
272             * @param varID The identifier of the unbound variable for which a string reference object
273             *   should be returned.
274             * @param featureHash A hash map with variable identiers as keys and the string reference
275             *   objects that represent the respective variables as values.
276             * @return A string reference object.
277             */
278            protected static StringRef getStringRef(int varID, HashMap<Integer, StringRef> featureHash) {
279                    StringRef stringRef = featureHash.get(varID);
280                    if (stringRef == null) {
281                            stringRef = new StringRef();
282                            featureHash.put(varID, stringRef);
283                    }
284                    return stringRef;
285            }
286            
287            private void processCategory(Category c) {
288                    if (c instanceof Terminal) {
289                            terminalSymbols.add(c.getName());
290                    } else if (c instanceof Preterminal) {
291                            preterminalSymbols.add(c.getName());
292                    } else if (c instanceof Nonterminal) {
293                            nonterminalSymbols.add(c.getName());
294                    }
295                    Set<String> fnames = c.getFeatureNames();
296                    if (fnames != null) featureNames.addAll(fnames);
297            }
298            
299            public String toString() {
300                    String s = "";
301                    for (GrammarRule r : rules) {
302                            s += r + "\n";
303                    }
304                    s += "\n";
305                    for (LexicalRule le : lexRules) {
306                            s += le + "\n";
307                    }
308                    return s;
309            }
310    
311    }