001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.acewiki.core.ontology;
016
017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023
024 import java.util.ArrayList;
025 import java.util.Arrays;
026 import java.util.List;
027
028 import org.semanticweb.owl.model.OWLOntology;
029 import org.semanticweb.owl.model.OWLOntologyCreationException;
030
031 import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement;
032 import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory;
033 import ch.uzh.ifi.attempto.ape.ACEParserResult;
034 import ch.uzh.ifi.attempto.ape.APELocal;
035 import ch.uzh.ifi.attempto.ape.Lexicon;
036 import ch.uzh.ifi.attempto.ape.LexiconEntry;
037 import ch.uzh.ifi.attempto.ape.MessageContainer;
038 import ch.uzh.ifi.attempto.preditor.text.BasicTextElement;
039 import ch.uzh.ifi.attempto.preditor.text.TextContainer;
040 import ch.uzh.ifi.attempto.preditor.text.TextElement;
041
042 /**
043 * This class represents an ACE sentence which is either a declarative statement or a question.
044 * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
045 * sentences have no OWL representation and do not participate in reasoning.
046 *<p>
047 * Each sentence belongs to exactly one article of an ontology element (the owner).
048 *<p>
049 * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
050 * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
051 *
052 * @author Tobias Kuhn
053 */
054 public class Sentence {
055
056 private String text;
057 private Ontology ontology;
058 private OntologyElement owner;
059 private boolean integrated = false;
060
061 // These fields are evaluated lazily:
062 private TextContainer textContainer;
063 private ACEParserResult parserResult;
064 private String owlxml;
065 private Boolean reasonerParticipant;
066 private Boolean isOWL;
067 private Boolean isOWLSWRL;
068 private OWLOntology owlOntology;
069
070 private List<Individual> answerCache;
071 private long answerCacheStateID = -1;
072
073 /**
074 * Creates a new asserted sentence. Asserted sentences must have an owner.
075 *
076 * @param text The sentence text.
077 * @param owner The owner ontology element.
078 */
079 public Sentence(String text, OntologyElement owner) {
080 this.text = text;
081 this.ontology = null;
082 this.owner = owner;
083 }
084
085 /**
086 * Creates a new inferred sentence. Inferred sentence have no owner.
087 *
088 * @param text The sentence text.
089 * @param ontology The ontology.
090 */
091 public Sentence(String text, Ontology ontology) {
092 this.text = text;
093 this.ontology = ontology;
094 this.owner = null;
095 }
096
097 /**
098 * Generates sentence objects out of a text container.
099 *
100 * @param textContainer The text container.
101 * @param owner The owner ontology element of the sentences.
102 * @return A list of sentences.
103 */
104 public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
105 ArrayList<Sentence> l = new ArrayList<Sentence>();
106 TextContainer c = new TextContainer();
107 for (TextElement e : textContainer.getTextElements()) {
108 c.addElement(e);
109 if (e.getText().matches("[.?]")) {
110 l.add(new Sentence(getUnderscoredText(c), owner));
111 c = new TextContainer();
112 }
113 }
114 return l;
115 }
116
117 /**
118 * Loads a sentence from a serialized form.
119 *
120 * @param serializedSentence The serialized sentence as a string.
121 * @param owner The owner ontology element of the sentence.
122 * @return A sentence object.
123 */
124 static Sentence loadSentence(String serializedSentence, OntologyElement owner) {
125 Sentence sentence = new Sentence(serializedSentence.substring(2), owner);
126 sentence.setIntegrated(serializedSentence.charAt(0) == '|');
127 return sentence;
128 }
129
130 private Ontology getOntology() {
131 if (ontology == null) {
132 ontology = owner.getOntology();
133 }
134 return ontology;
135 }
136
137 /**
138 * Returns a list of text elements that represent the tokens of this sentence.
139 *
140 * @return A token list.
141 */
142 public List<TextElement> getTextElements() {
143 if (textContainer == null) {
144 tokenize();
145 }
146 return textContainer.getTextElements();
147 }
148
149 /**
150 * Returns the owner ontology element of this sentence.
151 *
152 * @return The owner ontology element.
153 */
154 public OntologyElement getOwner() {
155 return owner;
156 }
157
158 /**
159 * Returns the sentence text as a string. Underscores are used for compound words,
160 * e.g. "credit_card".
161 *
162 * @return The sentence text as a string.
163 */
164 public String getText() {
165 if (textContainer == null) {
166 tokenize();
167 }
168 return getUnderscoredText(textContainer);
169 }
170
171 /**
172 * Returns the sentence text as a string with underscores displayed as blanks. Compound
173 * words containing underscores like "credit_cards" are pretty-printed with blank characters:
174 * "credit card".
175 *
176 * @return The sentence text as a pretty-printed string.
177 */
178 public String getPrettyText() {
179 return textContainer.getText();
180 }
181
182 /**
183 * Returns the parser result object.
184 *
185 * @return The parser result object.
186 */
187 public ACEParserResult getParserResult() {
188 if (parserResult == null) {
189 parse();
190 }
191 return parserResult;
192 }
193
194 /**
195 * Returns the OWL/XML representation of this sentence as a string.
196 *
197 * @return The OWL/XML representation.
198 */
199 public String getOWLXML() {
200 if (owlxml == null) {
201 parse();
202 }
203 return owlxml;
204 }
205
206 /**
207 * Returns true if this sentence participates in reasoning.
208 *
209 * @return true if this sentence participates in reasoning.
210 */
211 public boolean isReasonerParticipant() {
212 if (reasonerParticipant == null) {
213 parse();
214 }
215 return reasonerParticipant;
216 }
217
218 /**
219 * Returns true if this sentence has an OWL representation.
220 *
221 * @return true if this sentence has an OWL representation.
222 */
223 public boolean isOWL() {
224 if (isOWL == null) {
225 parse();
226 }
227 return isOWL;
228 }
229
230 /**
231 * Returns true if this sentence has an OWL or SWRL representation.
232 *
233 * @return true if this sentence has an OWL or SWRL representation.
234 */
235 public boolean isOWLSWRL() {
236 if (isOWLSWRL == null) {
237 parse();
238 }
239 return isOWLSWRL;
240 }
241
242 /**
243 * Returns the OWL ontology object that contains the OWL representation of this
244 * sentence.
245 *
246 * @return The OWL ontology object.
247 * @throws OWLOntologyCreationException If the OWL ontology object creation failed.
248 */
249 public OWLOntology getOWLOntology() throws OWLOntologyCreationException {
250 if (owlOntology == null) {
251 owlOntology = getOntology().readOWLOntology(getOWLXML());
252 }
253 return owlOntology;
254 }
255
256 /**
257 * Tokenizes the sentence text. A text container object is created.
258 */
259 private void tokenize() {
260 textContainer = new TextContainer();
261
262 String t = text;
263 t = t.replaceAll(" ", "&");
264 t = t.replaceAll("\\.", "&.&");
265 t = t.replaceAll("\\?", "&?&");
266 t = t.replaceAll("&of", " of");
267 t = t.replaceAll("&by", " by");
268
269 ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
270
271 while (tokens.contains("")) {
272 tokens.remove("");
273 }
274
275 toString();
276
277 for (String s : tokens) {
278 if (s.startsWith("<")) {
279 try {
280 long oeId = new Long(s.substring(1, s.indexOf(",")));
281 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
282 OntologyElement oe = getOntology().get(oeId);
283 textContainer.addElement(TextElemFactory.createTextElement(oe, wordNumber));
284 } catch (Exception ex) {
285 throw new RuntimeException("Could not resolve link: " + s);
286 }
287 } else {
288 OntologyElement oe = getOntology().get(s);
289 if (oe == null) {
290 textContainer.addElement(new BasicTextElement(s));
291 } else if (oe instanceof Individual && ((Individual) oe).hasDefiniteArticle()) {
292 textContainer.removeLastElement();
293 textContainer.addElement(TextElemFactory.createTextElement(oe, 0));
294 } else {
295 // not 100% clean solution (several word forms of the same word can be identical):
296 int wordId = Arrays.asList(oe.getWords()).indexOf(s);
297 textContainer.addElement(TextElemFactory.createTextElement(oe, wordId));
298 }
299 }
300 }
301 }
302
303 /**
304 * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
305 * This method is called automatically the first time a parsing result is needed.
306 * Furthermore, it needs to be called each time a word form of an ontology element
307 * (that occurs in the sentence) has changed.
308 */
309 synchronized void parse() {
310 APELocal.getInstance().setURI(getOntology().getURI());
311 APELocal.getInstance().setClexEnabled(false);
312 Lexicon lexicon = new Lexicon();
313 for (TextElement te : getTextElements()) {
314 if (te instanceof OntologyTextElement) {
315 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
316 for (LexiconEntry le : oe.getLexiconEntries()) {
317 lexicon.addEntry(le);
318 }
319 }
320 }
321 parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
322 MessageContainer mc = parserResult.getMessageContainer();
323 owlxml = parserResult.get(OWLXML);
324 if (owlxml != null) {
325 // Every OWL ontology object needs its own URI:
326 long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
327 owlxml = owlxml.replace("URI=\"" + ontology.getURI() + "\">", "URI=\"" + ontology.getURI() + "/" + hashCode + "\">");
328 }
329 reasonerParticipant =
330 (mc.getMessages("owl").size() == 0) &&
331 (owlxml.indexOf("<swrl:Imp>") < 0) &&
332 (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
333 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
334 (owlxml.length() > 0);
335 if (isQuestion()) {
336 reasonerParticipant = false;
337 }
338 isOWL =
339 (mc.getMessages("owl").size() == 0) &&
340 (owlxml.indexOf("<swrl:Imp>") < 0) &&
341 (owlxml.length() > 0);
342 isOWLSWRL =
343 (mc.getMessages("owl").size() == 0) &&
344 (owlxml.length() > 0);
345 String messages = mc.toString();
346 if (messages.length() > 0) {
347 System.err.println("Parser messages: " + messages);
348 }
349 owlOntology = null;
350 }
351
352 /**
353 * This method tries to reassert a sentence that is not yet integrated. This is
354 * used for sentences that have an OWL representation but the integration failed
355 * because it introduced an inconsistency. Later, when the ontology has changed,
356 * the integration might succeed.
357 *
358 * @return An integer value denoting the success/failure of the operation.
359 * @see Ontology#commitSentence(Sentence)
360 */
361 public int reassert() {
362 return getOntology().commitSentence(this);
363 }
364
365 /**
366 * Returns true if the sentence is integrated into the ontology.
367 *
368 * @return true if the sentence is integrated into the ontology.
369 */
370 public boolean isIntegrated() {
371 return integrated;
372 }
373
374 void setIntegrated(boolean integrated) {
375 this.integrated = integrated;
376 }
377
378 /**
379 * Returns true if the sentence is a question.
380 *
381 * @return true if the sentence is a question.
382 */
383 public boolean isQuestion() {
384 return text.substring(text.length()-1).equals("?");
385 }
386
387 /**
388 * Checks if the sentence is inferred or asserted.
389 *
390 * @return true if the sentence is inferred, false if it is asserted.
391 */
392 public boolean isInferred() {
393 return owner == null;
394 }
395
396 /**
397 * Checks whether the sentence contains the given word form (by word number) of the
398 * given ontology element.
399 *
400 * @param e The ontology element.
401 * @param wordNumber The word number.
402 * @return true if the word form occurs in this sentence.
403 */
404 public boolean contains(OntologyElement e, int wordNumber) {
405 if (textContainer == null) {
406 tokenize();
407 }
408 for (TextElement t : textContainer.getTextElements()) {
409 if (t instanceof OntologyTextElement) {
410 OntologyTextElement ot = (OntologyTextElement) t;
411 if (e == ot.getOntologyElement() && wordNumber == -1) return true;
412 if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
413 }
414 }
415 return false;
416 }
417
418 /**
419 * Checks whether the sentence contains the given ontology element (no matter which
420 * word form).
421 *
422 * @param e The ontology element.
423 * @return true if the ontology element occurs in this sentence.
424 */
425 public boolean contains(OntologyElement e) {
426 return contains(e, -1);
427 }
428
429 /**
430 * Returns all individuals that answer this question. Questions in AceWiki are "DL Queries".
431 * They describe a concept and the answer consists of all individuals that belong to this concept.
432 * The null value is returned if the sentence is not a question.
433 *
434 * @return A list of individuals that are the answer for the question.
435 * @see Ontology#getAnswer(Sentence)
436 */
437 public List<Individual> getAnswer() {
438 if (!isQuestion()) return null;
439
440 Ontology o = getOntology();
441 if (answerCacheStateID != o.getStateID()) {
442 answerCache = o.getAnswer(this);
443 answerCacheStateID = o.getStateID();
444 }
445 return new ArrayList<Individual>(answerCache);
446 }
447
448 /**
449 * Returns true if the sentence is a question and the answer to the question is cached and does
450 * not have to be recalculated.
451 *
452 * @return true if the answer is cached.
453 */
454 public boolean isAnswerCached() {
455 if (!isQuestion()) return false;
456 return answerCacheStateID == getOntology().getStateID();
457 }
458
459 private static String getUnderscoredText(TextContainer textContainer) {
460 String t = "";
461 for (TextElement te : textContainer.getTextElements()) {
462 if (te instanceof OntologyTextElement) {
463 t += " " + ((OntologyTextElement) te).getUnderscoredText();
464 } else if (te.getText().matches("[.?]")) {
465 t += te.getText();
466 } else {
467 t += " " + te.getText();
468 }
469 }
470 if (t.length() > 0) {
471 t = t.substring(1);
472 }
473 return t;
474 }
475
476 String serialize() {
477 if (textContainer == null) {
478 tokenize();
479 }
480 String s;
481 if (integrated) {
482 s = "|";
483 } else {
484 s = "#";
485 }
486 for (TextElement te : textContainer.getTextElements()) {
487 if (te instanceof OntologyTextElement) {
488 OntologyTextElement ot = (OntologyTextElement) te;
489 s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
490 } else {
491 s += " " + te.getText();
492 }
493 }
494 return s + "\n";
495 }
496
497 public String toString() {
498 return getText();
499 }
500
501 }