001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.ape;
016
017 import java.io.StringReader;
018
019 import org.jdom.Element;
020 import org.jdom.input.SAXBuilder;
021
022 /**
023 * This is an interface to the Attempto Parsing Engine (APE), that translates sentences in
024 * Attempto Controlled English (ACE) into logic.
025 *
026 * @author Kaarel Kaljurand
027 * @author Tobias Kuhn
028 */
029 public abstract class ACEParser {
030
031 private boolean guessingEnabled = false;
032 private boolean clexEnabled = true;
033 private String uri;
034
035 /**
036 * Creates a new ACEParser object.
037 */
038 protected ACEParser() {}
039
040 /**
041 * Returns one single output for the given ACE text using the given lexicon. See the enumeration
042 * OutputType for further information about the possible outputs. If parsing succeeds then a string
043 * containing the result of the parser is returned. In the case of failure, an ACEParserException
044 * is thrown containing the error messages.
045 *
046 * @param text The ACE text to be parsed.
047 * @param lexicon The lexicon to be loaded.
048 * @param outputType The kind of output that should be returned.
049 * @return The result as a string.
050 * @throws ACEParserException Contains the error messages if an error occurred.
051 * @see #getMultiOutput(String, Lexicon, OutputType...)
052 */
053 public abstract String getSoloOutput(String text, Lexicon lexicon, OutputType outputType) throws ACEParserException;
054
055 /**
056 * Returns one single output for the given ACE text using no lexicon.
057 *
058 * @param text The ACE text to be parsed.
059 * @param outputType The kind of output that should be returned.
060 * @return The result as a string.
061 * @throws ACEParserException Contains the error messages if an error occurred.
062 * @see #getSoloOutput(String, Lexicon, OutputType)
063 */
064 public final String getSoloOutput(String text, OutputType outputType) throws ACEParserException {
065 return getSoloOutput(text, null, outputType);
066 }
067
068 /**
069 * Returns multiple outputs for the given ACE text using the given lexicon. See the enumeration
070 * OutputType for further information about the possible outputs. The result is returned as an
071 * ACEParserResult object.
072 *
073 * @param text The ACE text to be parsed.
074 * @param lexicon The lexicon to be loaded.
075 * @param outputTypes The kind of outputs that should be returned.
076 * @return A ParserResult object containing the outputs.
077 * @see #getSoloOutput(String, Lexicon, OutputType)
078 */
079 public abstract ACEParserResult getMultiOutput(String text, Lexicon lexicon, OutputType... outputTypes);
080
081 /**
082 * Returns multiple outputs for the given ACE text using no lexicon.
083 *
084 * @param text The ACE text to be parsed.
085 * @param outputTypes The kind of outputs that should be returned.
086 * @return A ParserResult object containing the outputs.
087 * @see #getMultiOutput(String, Lexicon, OutputType...)
088 */
089 public final ACEParserResult getMultiOutput(String text, OutputType... outputTypes) {
090 return getMultiOutput(text, null, outputTypes);
091 }
092
093 /**
094 * Determines whether unknown words should be guessed. If false, unknown words lead to an error
095 * message. Note that unknown word guessing is not always perfect. The default value is false.
096 *
097 * @param guessingEnabled true if unknown words should be guessed. false otherwise.
098 */
099 public void setGuessingEnabled(boolean guessingEnabled) {
100 this.guessingEnabled = guessingEnabled;
101 }
102
103 /**
104 * Returns whether unknown words should be guessed.
105 *
106 * @return true if guessing is enabled.
107 */
108 public boolean isGuessingEnabled() {
109 return guessingEnabled;
110 }
111
112 /**
113 * Sets the namespace URI to be used for outputs like OWL.
114 *
115 * @param uri The namespace URI.
116 */
117 public void setURI(String uri) {
118 // TODO: BUG: should we use a URI instead of String?
119 this.uri = uri;
120 }
121
122 /**
123 * Returns the namespace URI to be used for outputs like OWL.
124 *
125 * @return The namespace URI.
126 */
127 public String getURI() {
128 return uri;
129 }
130
131 /**
132 * Determines whether the built-in lexicon should be used. The default value is true.
133 *
134 * @param clexEnabled true if the built-in lexicon should be used. false otherwise.
135 */
136 public void setClexEnabled(boolean clexEnabled) {
137 this.clexEnabled = clexEnabled;
138 }
139
140 /**
141 * Returns whether the built-in lexicon is used.
142 *
143 * @return true if the built-in lexicon is used.
144 */
145 public boolean isClexEnabled() {
146 return clexEnabled;
147 }
148
149 String getOptions() {
150 String guessString = "";
151 if (isGuessingEnabled()) guessString = ",guess=on";
152 String clexString = ",noclex=on";
153 if (isClexEnabled()) clexString = ",noclex=off";
154 String uriString = "";
155 if (getURI() != null) {
156 uriString = ",uri=" + PrologUtils.escape(getURI());
157 }
158 return guessString + clexString + uriString;
159 }
160
161 static String checkForErrors(String s) throws ACEParserException {
162 Element xmlElement = null;
163 try {
164 SAXBuilder sb = new SAXBuilder();
165 sb.setValidation(false);
166 xmlElement = sb.build(new StringReader(s)).getRootElement();
167 } catch (Exception ex) {
168 // String s does not represent an XML document
169 return s;
170 }
171
172 if (xmlElement != null && xmlElement.getName().equals("messages")) {
173 throw new ACEParserException(xmlElement);
174 }
175 return s;
176 }
177 }