001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.ape; 016 017 import java.io.StringReader; 018 019 import org.jdom.Element; 020 import org.jdom.input.SAXBuilder; 021 022 /** 023 * This is an interface to the Attempto Parsing Engine (APE), that translates sentences in 024 * Attempto Controlled English (ACE) into logic. 025 * 026 * @author Kaarel Kaljurand 027 * @author Tobias Kuhn 028 */ 029 public abstract class ACEParser { 030 031 private boolean guessingEnabled = false; 032 private boolean clexEnabled = true; 033 private String uri; 034 035 /** 036 * Creates a new ACEParser object. 037 */ 038 protected ACEParser() {} 039 040 /** 041 * Returns one single output for the given ACE text using the given lexicon. See the enumeration 042 * OutputType for further information about the possible outputs. If parsing succeeds then a string 043 * containing the result of the parser is returned. In the case of failure, an ACEParserException 044 * is thrown containing the error messages. 045 * 046 * @param text The ACE text to be parsed. 047 * @param lexicon The lexicon to be loaded. 048 * @param outputType The kind of output that should be returned. 049 * @return The result as a string. 050 * @throws ACEParserException Contains the error messages if an error occurred. 051 * @see #getMultiOutput(String, Lexicon, OutputType...) 052 */ 053 public abstract String getSoloOutput(String text, Lexicon lexicon, OutputType outputType) throws ACEParserException; 054 055 /** 056 * Returns one single output for the given ACE text using no lexicon. 057 * 058 * @param text The ACE text to be parsed. 059 * @param outputType The kind of output that should be returned. 060 * @return The result as a string. 061 * @throws ACEParserException Contains the error messages if an error occurred. 062 * @see #getSoloOutput(String, Lexicon, OutputType) 063 */ 064 public final String getSoloOutput(String text, OutputType outputType) throws ACEParserException { 065 return getSoloOutput(text, null, outputType); 066 } 067 068 /** 069 * Returns multiple outputs for the given ACE text using the given lexicon. See the enumeration 070 * OutputType for further information about the possible outputs. The result is returned as an 071 * ACEParserResult object. 072 * 073 * @param text The ACE text to be parsed. 074 * @param lexicon The lexicon to be loaded. 075 * @param outputTypes The kind of outputs that should be returned. 076 * @return A ParserResult object containing the outputs. 077 * @see #getSoloOutput(String, Lexicon, OutputType) 078 */ 079 public abstract ACEParserResult getMultiOutput(String text, Lexicon lexicon, OutputType... outputTypes); 080 081 /** 082 * Returns multiple outputs for the given ACE text using no lexicon. 083 * 084 * @param text The ACE text to be parsed. 085 * @param outputTypes The kind of outputs that should be returned. 086 * @return A ParserResult object containing the outputs. 087 * @see #getMultiOutput(String, Lexicon, OutputType...) 088 */ 089 public final ACEParserResult getMultiOutput(String text, OutputType... outputTypes) { 090 return getMultiOutput(text, null, outputTypes); 091 } 092 093 /** 094 * Determines whether unknown words should be guessed. If false, unknown words lead to an error 095 * message. Note that unknown word guessing is not always perfect. The default value is false. 096 * 097 * @param guessingEnabled true if unknown words should be guessed. false otherwise. 098 */ 099 public void setGuessingEnabled(boolean guessingEnabled) { 100 this.guessingEnabled = guessingEnabled; 101 } 102 103 /** 104 * Returns whether unknown words should be guessed. 105 */ 106 public boolean isGuessingEnabled() { 107 return guessingEnabled; 108 } 109 110 /** 111 * Sets the namespace URI to be used for outputs like OWL. 112 * 113 * @param uri The namespace URI. 114 */ 115 public void setURI(String uri) { 116 // TODO: BUG: should we use a URI instead of String? 117 this.uri = uri; 118 } 119 120 /** 121 * Returns the namespace URI to be used for outputs like OWL. 122 * 123 * @return The namespace URI. 124 */ 125 public String getURI() { 126 return uri; 127 } 128 129 /** 130 * Determines whether the built-in lexicon should be used. The default value is true. 131 * 132 * @param clexEnabled true if the built-in lexicon should be used. false otherwise. 133 */ 134 public void setClexEnabled(boolean clexEnabled) { 135 this.clexEnabled = clexEnabled; 136 } 137 138 /** 139 * Returns whether the built-in lexicon should be used. 140 */ 141 public boolean isClexEnabled() { 142 return clexEnabled; 143 } 144 145 String getOptions() { 146 String guessString = ""; 147 if (isGuessingEnabled()) guessString = ",guess=on"; 148 String clexString = ",noclex=on"; 149 if (isClexEnabled()) clexString = ",noclex=off"; 150 String uriString = ""; 151 if (getURI() != null) { 152 uriString = ",uri=" + PrologUtils.escape(getURI()); 153 } 154 return guessString + clexString + uriString; 155 } 156 157 static String checkForErrors(String s) throws ACEParserException { 158 Element xmlElement = null; 159 try { 160 SAXBuilder sb = new SAXBuilder(); 161 sb.setValidation(false); 162 xmlElement = sb.build(new StringReader(s)).getRootElement(); 163 } catch (Exception ex) { 164 // String s does not represent an XML document 165 return s; 166 } 167 168 if (xmlElement != null && xmlElement.getName().equals("messages")) { 169 throw new ACEParserException(xmlElement); 170 } 171 return s; 172 } 173 }