001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.ape;
016
017 import java.io.StringReader;
018
019 import org.jdom.Element;
020 import org.jdom.input.SAXBuilder;
021
022 /**
023 * This is an interface to the Attempto Parsing Engine (APE), that translates sentences in
024 * Attempto Controlled English (ACE) into logic.
025 *
026 * @author Kaarel Kaljurand
027 * @author Tobias Kuhn
028 */
029 public abstract class ACEParser {
030
031 private boolean guessingEnabled = false;
032 private boolean clexEnabled = true;
033 private String uri;
034
035 /**
036 * Creates a new ACEParser object.
037 */
038 protected ACEParser() {}
039
040 /**
041 * Returns one single output for the given ACE text using the given lexicon. See the enumeration
042 * OutputType for further information about the possible outputs. If parsing succeeds then a string
043 * containing the result of the parser is returned. In the case of failure, an ACEParserException
044 * is thrown containing the error messages.
045 *
046 * @param text The ACE text to be parsed.
047 * @param lexicon The lexicon to be loaded.
048 * @param outputType The kind of output that should be returned.
049 * @return The result as a string.
050 * @throws ACEParserException Contains the error messages if an error occurred.
051 * @see #getMultiOutput(String, Lexicon, OutputType...)
052 */
053 public abstract String getSoloOutput(String text, Lexicon lexicon, OutputType outputType) throws ACEParserException;
054
055 /**
056 * Returns one single output for the given ACE text using no lexicon.
057 *
058 * @param text The ACE text to be parsed.
059 * @param outputType The kind of output that should be returned.
060 * @return The result as a string.
061 * @throws ACEParserException Contains the error messages if an error occurred.
062 * @see #getSoloOutput(String, Lexicon, OutputType)
063 */
064 public final String getSoloOutput(String text, OutputType outputType) throws ACEParserException {
065 return getSoloOutput(text, null, outputType);
066 }
067
068 /**
069 * Returns multiple outputs for the given ACE text using the given lexicon. See the enumeration
070 * OutputType for further information about the possible outputs. The result is returned as an
071 * ACEParserResult object.
072 *
073 * @param text The ACE text to be parsed.
074 * @param lexicon The lexicon to be loaded.
075 * @param outputTypes The kind of outputs that should be returned.
076 * @return A ParserResult object containing the outputs.
077 * @see #getSoloOutput(String, Lexicon, OutputType)
078 */
079 public abstract ACEParserResult getMultiOutput(String text, Lexicon lexicon, OutputType... outputTypes);
080
081 /**
082 * Returns multiple outputs for the given ACE text using no lexicon.
083 *
084 * @param text The ACE text to be parsed.
085 * @param outputTypes The kind of outputs that should be returned.
086 * @return A ParserResult object containing the outputs.
087 * @see #getMultiOutput(String, Lexicon, OutputType...)
088 */
089 public final ACEParserResult getMultiOutput(String text, OutputType... outputTypes) {
090 return getMultiOutput(text, null, outputTypes);
091 }
092
093 /**
094 * Determines whether unknown words should be guessed. If false, unknown words lead to an error
095 * message. Note that unknown word guessing is not always perfect. The default value is false.
096 *
097 * @param guessingEnabled true if unknown words should be guessed. false otherwise.
098 */
099 public void setGuessingEnabled(boolean guessingEnabled) {
100 this.guessingEnabled = guessingEnabled;
101 }
102
103 /**
104 * Returns whether unknown words should be guessed.
105 */
106 public boolean isGuessingEnabled() {
107 return guessingEnabled;
108 }
109
110 /**
111 * Sets the namespace URI to be used for outputs like OWL.
112 *
113 * @param uri The namespace URI.
114 */
115 public void setURI(String uri) {
116 // TODO: BUG: should we use a URI instead of String?
117 this.uri = uri;
118 }
119
120 /**
121 * Returns the namespace URI to be used for outputs like OWL.
122 *
123 * @return The namespace URI.
124 */
125 public String getURI() {
126 return uri;
127 }
128
129 /**
130 * Determines whether the built-in lexicon should be used. The default value is true.
131 *
132 * @param clexEnabled true if the built-in lexicon should be used. false otherwise.
133 */
134 public void setClexEnabled(boolean clexEnabled) {
135 this.clexEnabled = clexEnabled;
136 }
137
138 /**
139 * Returns whether the built-in lexicon should be used.
140 */
141 public boolean isClexEnabled() {
142 return clexEnabled;
143 }
144
145 String getOptions() {
146 String guessString = "";
147 if (isGuessingEnabled()) guessString = ",guess=on";
148 String clexString = ",noclex=on";
149 if (isClexEnabled()) clexString = ",noclex=off";
150 String uriString = "";
151 if (getURI() != null) {
152 uriString = ",uri=" + PrologUtils.escape(getURI());
153 }
154 return guessString + clexString + uriString;
155 }
156
157 static String checkForErrors(String s) throws ACEParserException {
158 Element xmlElement = null;
159 try {
160 SAXBuilder sb = new SAXBuilder();
161 sb.setValidation(false);
162 xmlElement = sb.build(new StringReader(s)).getRootElement();
163 } catch (Exception ex) {
164 // String s does not represent an XML document
165 return s;
166 }
167
168 if (xmlElement != null && xmlElement.getName().equals("messages")) {
169 throw new ACEParserException(xmlElement);
170 }
171 return s;
172 }
173 }