001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.ape;
016    
017    import java.io.StringReader;
018    
019    import org.jdom.Element;
020    import org.jdom.input.SAXBuilder;
021    
022    /**
023     * This is an interface to the Attempto Parsing Engine (APE), that translates sentences in
024     * Attempto Controlled English (ACE) into logic.
025     * 
026     * @author Kaarel Kaljurand
027     * @author Tobias Kuhn
028     */
029    public abstract class ACEParser {
030    
031            private boolean guessingEnabled = false;
032            private boolean clexEnabled = true;
033            private String uri;
034    
035            /**
036             * Creates a new ACEParser object.
037             */
038            protected ACEParser() {}
039    
040            /**
041             * Returns one single output for the given ACE text using the given lexicon. See the enumeration
042             * OutputType for further information about the possible outputs. If parsing succeeds then a string
043             * containing the result of the parser is returned. In the case of failure, an ACEParserException
044             * is thrown containing the error messages.
045             * 
046             * @param text The ACE text to be parsed.
047             * @param lexicon The lexicon to be loaded.
048             * @param outputType The kind of output that should be returned.
049             * @return The result as a string.
050             * @throws ACEParserException Contains the error messages if an error occurred.
051             * @see #getMultiOutput(String, Lexicon, OutputType...)
052             */
053            public abstract String getSoloOutput(String text, Lexicon lexicon, OutputType outputType) throws ACEParserException;
054    
055            /**
056             * Returns one single output for the given ACE text using no lexicon.
057             * 
058             * @param text The ACE text to be parsed.
059             * @param outputType The kind of output that should be returned.
060             * @return The result as a string.
061             * @throws ACEParserException Contains the error messages if an error occurred.
062             * @see #getSoloOutput(String, Lexicon, OutputType)
063             */
064            public final String getSoloOutput(String text, OutputType outputType) throws ACEParserException {
065                    return getSoloOutput(text, null, outputType);
066            }
067    
068            /**
069             * Returns multiple outputs for the given ACE text using the given lexicon. See the enumeration
070             * OutputType for further information about the possible outputs. The result is returned as an
071             * ACEParserResult object.
072             * 
073             * @param text The ACE text to be parsed.
074             * @param lexicon The lexicon to be loaded.
075             * @param outputTypes The kind of outputs that should be returned.
076             * @return A ParserResult object containing the outputs.
077             * @see #getSoloOutput(String, Lexicon, OutputType)
078             */
079            public abstract ACEParserResult getMultiOutput(String text, Lexicon lexicon, OutputType... outputTypes);
080    
081            /**
082             * Returns multiple outputs for the given ACE text using no lexicon.
083             * 
084             * @param text The ACE text to be parsed.
085             * @param outputTypes The kind of outputs that should be returned.
086             * @return A ParserResult object containing the outputs.
087             * @see #getMultiOutput(String, Lexicon, OutputType...)
088             */
089            public final ACEParserResult getMultiOutput(String text, OutputType... outputTypes) {
090                    return getMultiOutput(text, null, outputTypes);
091            }
092    
093            /**
094             * Determines whether unknown words should be guessed. If false, unknown words lead to an error
095             * message. Note that unknown word guessing is not always perfect. The default value is false.
096             * 
097             * @param guessingEnabled true if unknown words should be guessed. false otherwise.
098             */
099            public void setGuessingEnabled(boolean guessingEnabled) {
100                    this.guessingEnabled = guessingEnabled;
101            }
102    
103            /**
104             * Returns whether unknown words should be guessed.
105             * 
106             * @return true if guessing is enabled.
107             */
108            public boolean isGuessingEnabled() {
109                    return guessingEnabled;
110            }
111    
112            /**
113             * Sets the namespace URI to be used for outputs like OWL.
114             * 
115             * @param uri The namespace URI.
116             */
117            public void setURI(String uri) {
118                    // TODO: BUG: should we use a URI instead of String?
119                    this.uri = uri;
120            }
121    
122            /**
123             * Returns the namespace URI to be used for outputs like OWL.
124             * 
125             * @return The namespace URI.
126             */
127            public String getURI() {
128                    return uri;
129            }
130    
131            /**
132             * Determines whether the built-in lexicon should be used. The default value is true.
133             * 
134             * @param clexEnabled true if the built-in lexicon should be used. false otherwise.
135             */
136            public void setClexEnabled(boolean clexEnabled) {
137                    this.clexEnabled  = clexEnabled;
138            }
139    
140            /**
141             * Returns whether the built-in lexicon is used.
142             * 
143             * @return true if the built-in lexicon is used.
144             */
145            public boolean isClexEnabled() {
146                    return clexEnabled;
147            }
148    
149            String getOptions() {
150                    String guessString = "";
151                    if (isGuessingEnabled()) guessString = ",guess=on";
152                    String clexString = ",noclex=on";
153                    if (isClexEnabled()) clexString = ",noclex=off";
154                    String uriString = "";
155                    if (getURI() != null) {
156                            uriString = ",uri=" + PrologUtils.escape(getURI());
157                    }
158                    return guessString + clexString + uriString;
159            }
160    
161            static String checkForErrors(String s) throws ACEParserException {
162                    Element xmlElement = null;
163                    try {
164                            SAXBuilder sb = new SAXBuilder();
165                            sb.setValidation(false);
166                            xmlElement = sb.build(new StringReader(s)).getRootElement();
167                    } catch (Exception ex) {
168                            // String s does not represent an XML document 
169                            return s;
170                    }
171    
172                    if (xmlElement != null && xmlElement.getName().equals("messages")) {
173                            throw new ACEParserException(xmlElement);
174                    }
175                    return s;
176            }
177    }