// This file is part of the Attempto Java Packages.
// Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
//
// The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
// terms of the GNU Lesser General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License along with the Attempto
// Java Packages. If not, see http://www.gnu.org/licenses/.

package ch.uzh.ifi.attempto.chartparser;

import java.util.ArrayList;
import java.util.HashMap;

/**
 * This class represents a grammar that is needed to run the chart parser. A grammar can be created
 * either directly in Java or on the basis of a file in the ACGN format.
 *
 * <h4>ACGN Format</h4>
 * 
 * ACGN stands for "Attempto Chartparser Grammar Notation" and uses Prolog notation to provide a nice
 * grammar representation. Simple grammar rules in ACGN look almost the same as common Prolog DCG rules.
 * Just replace the operator
 * "<code>--></code>" by "<code>=></code>":
 *<blockquote><pre>
 * vp => v, np.
 * v => [does, not], verb.
 *</pre></blockquote>
 * Complex grammar rules in ACGN are different from common Prolog DCG rules in the sense that they are using
 * features rather than arguments with fixed positions. Arguments are not recognized by their position but by their
 * name:
 *<blockquote><pre>
 * vp(num:Num,neg:Neg) => v(num:Num,neg:Neg,type:tr), np(case:acc).
 * v(neg:plus,type:Type) => [does, not], verb(type:Type).
 *</pre></blockquote>
 * Every feature has the form <code>Name:Value</code> where <code>Name</code> has to be an atom and <code>Value</code>
 * can be a variable or an atom (but not a compound term).
 *<p>
 * ACGN provides special support for anaphoric references which are used in (controlled) natural languages to refer
 * to objects earlier in the sentence. For example, in the sentence
 *<blockquote><i>
 * A country contains an area that is not controlled by the country.
 *</i></blockquote>
 * the anaphoric reference "the country" refers to the antecedent "a country". Anaphoric references should be
 * introduced only if the previous text contains a matching antecedent that is accessible. For example, in the case
 * of the partial sentence
 *<blockquote><i>
 * A country does not contain a river and borders ...
 *</i></blockquote>
 * one can refer to "a country", but not to "a river" because being in the scope of a negation makes it inaccessible.
 *<p>
 * In order to define the accessibility information needed for anaphoric references in a declarative way, we
 * distinguish two types of grammar rules: accessible rules "<code>=></code>" and inaccessible rules "<code>~></code>".
 * The following example shows an inaccessible rule:
 *<blockquote><pre>
 * vp(num:Num,neg:plus) ~> v(num:Num,neg:plus,type:tr), np(case:acc).
 *</pre></blockquote>
 * Inaccessible rules are handled in the same way as accessible rules with the only exception that the components
 * that are in the scope of the rule are not accessible for subsequent anaphoric references.
 *<p>
 * This can be visualized by the introduction of a special node "~" in the syntax tree whenever an
 * inaccessible rule is used. For the partial sentence introduced before, the syntax tree could look as follows:
 *<p><center>
 * <img src="doc-files/tree.jpg" width="350" alt="example syntax tree">
 *</center><p>
 * In this case, several accessible rules and exactly one inaccessible rule (indicated by the "~"-node)
 * have been used. All preceding components that can be reached through the syntax tree without traversing a
 * "~"-node in the top-down direction are accessible. Thus, "a country" is accessible from the position
 * "*", but "a river'' is not. Furthermore, "a country" would be accessible from the position of "a river" because
 * the "~"-node is in this case traversed only in the bottum-up direction.
 *<p>
 * The described procedure allows us to determine all possible anaphoric references that can be used to continue a
 * partial sentence. In our example, one can refer only to "a country".
 * The concept of accessible and inaccessible rules is a simple but powerful instrument to define in a declarative
 * way the accessibility constraints for anaphoric references.
 *<p>
 * The information about which tokens are accessible for anaphoric
 * references can be retrieved by the method {@link ChartParser#getAccessiblePositions(String) getAccessiblePositions}
 * of the {@link ChartParser} class.
 *
 * <h4>Transformations</h4>
 * 
 * ACGN grammars can be translated automatically into a Java class or into a Prolog DCG using the SWI Prolog programs
 * "generate_java.pl" or "generate_dcg.pl", respectively. Those programs can be found in the directory
 * "src/ch/uzh/ifi/attempto/utils" of the source code of this package. The Java class can be generated like this:
 *<blockquote><pre>
 * swipl -s generate_java.pl -g "generate_java('my_acgn_grammar.pl', 'my.package', 'MyJavaGrammar', 'my_start_category')" -t halt
 *</pre></blockquote>
 * Note that the SWI Prolog command might be different on your machine (e.g. "<code>plcon</code>" or "<code>pl</code>").
 * The Prolog DCG file can be generated like this:
 *<blockquote><pre>
 * swipl -s generate_dcg.pl -g "generate_dcg('my_acgn_grammar.pl', 'my_dcg_grammar.pl')" -t halt
 *</pre></blockquote>
 * Note that the information about accessible and inaccessible rules gets lost in the Prolog DCG file.
 * 
 * @author Tobias Kuhn
 */
public class Grammar {
	
	private final Nonterminal startCategory;
	private ArrayList<Rule> rules = new ArrayList<Rule>();
	private HashMap<String, ArrayList<Rule>> rulesByHeadName = new HashMap<String, ArrayList<Rule>>();
	private ArrayList<Rule> epsilonRules = new ArrayList<Rule>();
	
	/**
	 * Creates a empty grammar with the given start category.
	 * 
	 * @param startCategory The start category for the grammar.
	 */
	public Grammar(Nonterminal startCategory) {
		this.startCategory = startCategory;
	}
	
	/**
	 * Creates a empty grammar with a start category of the given name.
	 * 
	 * @param startCategoryName The name of the start category for the grammar.
	 */
	public Grammar(String startCategoryName) {
		this.startCategory = new Nonterminal(startCategoryName);
	}
	
	/**
	 * Returns the start category.
	 * 
	 * @return The start category.
	 */
	public Nonterminal getStartCategory() {
		return startCategory;
	}
	
	/**
	 * Adds the rule to the grammar.
	 * 
	 * @param rule The rule to be added.
	 */
	public void addRule(Rule rule) {
		rules.add(rule);
		ArrayList<Rule> l = rulesByHeadName.get(rule.getHead().getName());
		if (l == null) {
			l = new ArrayList<Rule>();
			l.add(rule);
			rulesByHeadName.put(rule.getHead().getName(), l);
		} else {
			l.add(rule);
		}
		rulesByHeadName.get(rule.getHead().getName()).add(rule);
		if (rule.hasEmptyBody()) {
			epsilonRules.add(rule);
		}
	}
	
	/**
	 * Returns the rules whose head category has the given name.
	 * 
	 * @param name The name of the head category.
	 * @return A list of rules.
	 */
	public ArrayList<Rule> getRulesByHeadName(String name) {
		ArrayList<Rule> l = rulesByHeadName.get(name);
		if (l != null) {
			return l;
		}
		return new ArrayList<Rule>();
	}
	
	/**
	 * Returns all the rules that have no body categories.
	 * 
	 * @return A list of rules.
	 */
	public ArrayList<Rule> getEpsilonRules() {
		return epsilonRules;
	}
	
	public String toString() {
		String s = "";
		for (Rule r : rules) {
			s += r + "\n";
		}
		return s;
	}

}
