001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.chartparser;
016    
017    import java.util.ArrayList;
018    import java.util.HashMap;
019    
020    /**
021     * This class represents a grammar that is needed to run the chart parser. A grammar can be created
022     * either directly in Java or on the basis of a file in the ACGN format.
023     *<p>
024     * ACGN stands for "Attempto Chartparser Grammar Notation" and uses Prolog notation to provide a nicer
025     * grammar representation. Simple grammar rules in ACGN look almost the same as common Prolog DCG rules.
026     * Just replace the operator
027     * "<code>--></code>" by "<code>=></code>":
028     *<blockquote><pre>
029     * vp => v, np.
030     * v => [does, not], verb.
031     *</pre></blockquote>
032     * Complex grammar rules in ACGN are different from common Prolog DCG rules in the sense that they are using
033     * features rather than arguments with fixed positions. Arguments are not recognized by their position but by their
034     * name:
035     *<blockquote><pre>
036     * vp(num:Num,neg:Neg) => v(num:Num,neg:Neg,type:tr), np(case:acc).
037     * v(neg:plus,type:Type) => [does, not], verb(type:Type).
038     *</pre></blockquote>
039     * Every feature has the form <code>Name:Value</code> where <code>Name</code> has to be an atom and <code>Value</code>
040     * can be a variable, an atom, or a number (but not a compound term).
041     *<p>
042     * ACGN supports a special type of rules that are called "inaccessible rules". The operator "<code>~></code>"
043     * instead of "<code>=></code>" is used for inaccessible rules:
044     *<blockquote><pre>
045     * vp(num:Num,neg:plus) ~> v(num:Num,neg:plus,type:tr), np(case:acc).
046     *</pre></blockquote>
047     * In the case of an inaccessible rule, the potential antecedents that are covered by this rule are not accessible
048     * for anaphoric references from outside. In this way, the information about which tokens are accessible for anaphoric
049     * references can be retrieved by the method {@link ChartParser#getAccessiblePositions(String) getAccessiblePositions}
050     * of the {@link ChartParser} class.
051     *<p>
052     * ACGN grammars can be translated automatically into a Java class or into a Prolog DCG using the SWI Prolog programs
053     * "generate_java.pl" or "generate_dcg.pl", respectively. Those programs can be found in the directory
054     * "src/ch/uzh/ifi/attempto/utils" of the source code of this package. The Java class can be generated like this:
055     *<blockquote><pre>
056     * swipl -s generate_java.pl -g "generate_java('my_acgn_grammar.pl', 'my.package', 'MyJavaGrammar', 'my_start_category')" -t halt
057     *</pre></blockquote>
058     * Note that the SWI Prolog command might be different on your machine (e.g. "<code>plcon</code>" or "<code>pl</code>").
059     * The Prolog DCG file can be generated like this:
060     *<blockquote><pre>
061     * swipl -s generate_dcg.pl -g "generate_dcg('my_acgn_grammar.pl', 'my_dcg_grammar.pl')" -t halt
062     *</pre></blockquote>
063     * Note that the information about accessible and inaccessible rules gets lost in the Prolog DCG file.
064     * 
065     * @author Tobias Kuhn
066     */
067    public class Grammar {
068            
069            private final Nonterminal startCategory;
070            private ArrayList<Rule> rules = new ArrayList<Rule>();
071            private HashMap<String, ArrayList<Rule>> rulesByHeadName = new HashMap<String, ArrayList<Rule>>();
072            private ArrayList<Rule> epsilonRules = new ArrayList<Rule>();
073            
074            /**
075             * Creates a empty grammar with the given start category.
076             * 
077             * @param startCategory The start category for the grammar.
078             */
079            public Grammar(Nonterminal startCategory) {
080                    this.startCategory = startCategory;
081            }
082            
083            /**
084             * Creates a empty grammar with a start category of the given name.
085             * 
086             * @param startCategoryName The name of the start category for the grammar.
087             */
088            public Grammar(String startCategoryName) {
089                    this.startCategory = new Nonterminal(startCategoryName);
090            }
091            
092            /**
093             * Returns the start category.
094             * 
095             * @return The start category.
096             */
097            public Nonterminal getStartCategory() {
098                    return startCategory;
099            }
100            
101            /**
102             * Adds the rule to the grammar.
103             * 
104             * @param rule The rule to be added.
105             */
106            public void addRule(Rule rule) {
107                    rules.add(rule);
108                    ArrayList<Rule> l = rulesByHeadName.get(rule.getHead().getName());
109                    if (l == null) {
110                            l = new ArrayList<Rule>();
111                            l.add(rule);
112                            rulesByHeadName.put(rule.getHead().getName(), l);
113                    } else {
114                            l.add(rule);
115                    }
116                    rulesByHeadName.get(rule.getHead().getName()).add(rule);
117                    if (rule.hasEmptyBody()) {
118                            epsilonRules.add(rule);
119                    }
120            }
121            
122            /**
123             * Returns the rules whose head category has the given name.
124             * 
125             * @param name The name of the head category.
126             * @return A list of rules.
127             */
128            public ArrayList<Rule> getRulesByHeadName(String name) {
129                    ArrayList<Rule> l = rulesByHeadName.get(name);
130                    if (l != null) {
131                            return l;
132                    }
133                    return new ArrayList<Rule>();
134            }
135            
136            /**
137             * Returns all the rules that have no body categories.
138             * 
139             * @return A list of rules.
140             */
141            public ArrayList<Rule> getEpsilonRules() {
142                    return epsilonRules;
143            }
144            
145            public String toString() {
146                    String s = "";
147                    for (Rule r : rules) {
148                            s += r + "\n";
149                    }
150                    return s;
151            }
152    
153    }