001 // This file is part of the Attempto Java Packages.
002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003 //
004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006 // either version 3 of the License, or (at your option) any later version.
007 //
008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010 // PURPOSE. See the GNU Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013 // Java Packages. If not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.acewiki.core.ontology;
016
017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023
024 import java.util.ArrayList;
025 import java.util.Arrays;
026 import java.util.List;
027
028 import org.semanticweb.owl.model.OWLOntology;
029 import org.semanticweb.owl.model.OWLOntologyCreationException;
030
031 import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement;
032 import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory;
033 import ch.uzh.ifi.attempto.ape.ACEParserResult;
034 import ch.uzh.ifi.attempto.ape.APELocal;
035 import ch.uzh.ifi.attempto.ape.Lexicon;
036 import ch.uzh.ifi.attempto.ape.LexiconEntry;
037 import ch.uzh.ifi.attempto.ape.MessageContainer;
038 import ch.uzh.ifi.attempto.preditor.text.BasicTextElement;
039 import ch.uzh.ifi.attempto.preditor.text.TextContainer;
040 import ch.uzh.ifi.attempto.preditor.text.TextElement;
041
042 /**
043 * This class represents an ACE sentence which is either a declarative statement or a question.
044 * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
045 * sentences have no OWL representation and do not participate in reasoning.
046 *<p>
047 * Each sentence belongs to exactly one article of an ontology element (the owner).
048 *<p>
049 * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
050 * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
051 *
052 * @author Tobias Kuhn
053 */
054 public class Sentence {
055
056 private String text;
057 private Ontology ontology;
058 private OntologyElement owner;
059 private boolean integrated = false;
060
061 // These fields are evaluated lazily:
062 private TextContainer textContainer;
063 private ACEParserResult parserResult;
064 private String owlxml;
065 private Boolean reasonerParticipant;
066 private Boolean isOWL;
067 private Boolean isOWLSWRL;
068 private OWLOntology owlOntology;
069
070 private List<Individual> answerCache;
071 private long answerCacheStateID = -1;
072
073 /**
074 * Creates a new asserted sentence. Asserted sentences must have an owner.
075 *
076 * @param text The sentence text.
077 * @param owner The owner ontology element.
078 */
079 public Sentence(String text, OntologyElement owner) {
080 this.ontology = null;
081 this.owner = owner;
082 setText(text);
083 }
084
085 /**
086 * Creates a new inferred sentence. Inferred sentence have no owner.
087 *
088 * @param text The sentence text.
089 * @param ontology The ontology.
090 */
091 public Sentence(String text, Ontology ontology) {
092 this.ontology = ontology;
093 this.owner = null;
094 setText(text);
095 }
096
097 /**
098 * Generates sentence objects out of a text container.
099 *
100 * @param textContainer The text container.
101 * @param owner The owner ontology element of the sentences.
102 * @return A list of sentences.
103 */
104 public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
105 ArrayList<Sentence> l = new ArrayList<Sentence>();
106 TextContainer c = new TextContainer();
107 for (TextElement e : textContainer.getTextElements()) {
108 c.addElement(e);
109 if (e.getText().matches("[.?]")) {
110 l.add(new Sentence(getUnderscoredText(c), owner));
111 c = new TextContainer();
112 }
113 }
114 return l;
115 }
116
117 /**
118 * Loads a sentence from a serialized form.
119 *
120 * @param serializedSentence The serialized sentence as a string.
121 * @param owner The owner ontology element of the sentence.
122 * @return A sentence object.
123 */
124 static Sentence loadSentence(String serializedSentence, OntologyElement owner) {
125 Sentence sentence = new Sentence(serializedSentence.substring(2), owner);
126 sentence.setIntegrated(serializedSentence.charAt(0) == '|');
127 return sentence;
128 }
129
130 private Ontology getOntology() {
131 if (ontology == null) {
132 ontology = owner.getOntology();
133 }
134 return ontology;
135 }
136
137 /**
138 * Returns a list of text elements that represent the tokens of this sentence.
139 *
140 * @return A token list.
141 */
142 public List<TextElement> getTextElements() {
143 if (textContainer == null) {
144 tokenize();
145 }
146 textContainer.updateConnections();
147 return textContainer.getTextElements();
148 }
149
150 /**
151 * Returns the owner ontology element of this sentence.
152 *
153 * @return The owner ontology element.
154 */
155 public OntologyElement getOwner() {
156 return owner;
157 }
158
159 private void setText(String text) {
160 // remove trailing blank spaces.
161 this.text = text.replaceFirst("\\s+$", "");
162 }
163
164 /**
165 * Returns the sentence text as a string. Underscores are used for compound words,
166 * e.g. "credit_card".
167 *
168 * @return The sentence text as a string.
169 */
170 public String getText() {
171 if (textContainer == null) {
172 tokenize();
173 }
174 return getUnderscoredText(textContainer);
175 }
176
177 /**
178 * Returns the sentence text as a string with underscores displayed as blanks. Compound
179 * words containing underscores like "credit_cards" are pretty-printed with blank characters:
180 * "credit card".
181 *
182 * @return The sentence text as a pretty-printed string.
183 */
184 public String getPrettyText() {
185 return textContainer.getText();
186 }
187
188 /**
189 * Returns the parser result object.
190 *
191 * @return The parser result object.
192 */
193 public ACEParserResult getParserResult() {
194 if (parserResult == null) {
195 parse();
196 }
197 return parserResult;
198 }
199
200 /**
201 * Returns the OWL/XML representation of this sentence as a string.
202 *
203 * @return The OWL/XML representation.
204 */
205 public String getOWLXML() {
206 if (owlxml == null) {
207 parse();
208 }
209 return owlxml;
210 }
211
212 /**
213 * Returns true if this sentence participates in reasoning.
214 *
215 * @return true if this sentence participates in reasoning.
216 */
217 public boolean isReasonerParticipant() {
218 if (reasonerParticipant == null) {
219 parse();
220 }
221 return reasonerParticipant;
222 }
223
224 /**
225 * Returns true if this sentence has an OWL representation.
226 *
227 * @return true if this sentence has an OWL representation.
228 */
229 public boolean isOWL() {
230 if (isOWL == null) {
231 parse();
232 }
233 return isOWL;
234 }
235
236 /**
237 * Returns true if this sentence has an OWL or SWRL representation.
238 *
239 * @return true if this sentence has an OWL or SWRL representation.
240 */
241 public boolean isOWLSWRL() {
242 if (isOWLSWRL == null) {
243 parse();
244 }
245 return isOWLSWRL;
246 }
247
248 /**
249 * Returns the OWL ontology object that contains the OWL representation of this
250 * sentence. Null is returned if there is no OWL representation of this sentence
251 * or if the creation of the OWL ontology object failed.
252 *
253 * @return The OWL ontology object.
254 */
255 public OWLOntology getOWLOntology() {
256 if (owlxml == null) {
257 parse();
258 }
259 return owlOntology;
260 }
261
262 /**
263 * Tokenizes the sentence text. A text container object is created.
264 */
265 private void tokenize() {
266 textContainer = new TextContainer();
267
268 String t = "&" + text + "&";
269 t = t.replaceAll(" ", "&");
270 t = t.replaceAll("\\.", "&.&");
271 t = t.replaceAll("\\?", "&?&");
272 t = t.replaceAll("&of&", " of&");
273 t = t.replaceAll("&by&", " by&");
274
275 ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
276
277 while (tokens.contains("")) {
278 tokens.remove("");
279 }
280
281 toString();
282
283 for (String s : tokens) {
284 if (s.startsWith("<")) {
285 OntologyTextElement te;
286 try {
287 long oeId = new Long(s.substring(1, s.indexOf(",")));
288 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
289 OntologyElement oe = getOntology().get(oeId);
290 te = TextElemFactory.createTextElement(oe, wordNumber);
291 } catch (Exception ex) {
292 throw new RuntimeException("Could not resolve link: " + s, ex);
293 }
294 if (te != null) {
295 textContainer.addElement(te);
296 } else {
297 throw new RuntimeException("Could not resolve link: " + s);
298 }
299 } else {
300 OntologyElement oe = getOntology().get(s);
301
302 if (oe == null) {
303 textContainer.addElement(new BasicTextElement(s));
304 } else {
305 // not 100% clean solution (several word forms of the same word can be identical):
306 int wordId = Arrays.asList(oe.getWords()).indexOf(s);
307 if (oe instanceof Individual) {
308 // this should probably be done at a different place...
309 Individual ind = (Individual) oe;
310 if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) {
311 String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText();
312 if (precedingText.equals("the") || precedingText.equals("The")) {
313 textContainer.removeLastElement();
314 wordId--;
315 }
316 }
317 }
318 textContainer.addElement(TextElemFactory.createTextElement(oe, wordId));
319 }
320 }
321 }
322 }
323
324 /**
325 * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
326 * This method is called automatically the first time a parsing result is needed.
327 * Furthermore, it needs to be called each time a word form of an ontology element
328 * (that occurs in the sentence) has changed.
329 */
330 synchronized void parse() {
331 APELocal.getInstance().setURI(getOntology().getURI());
332 APELocal.getInstance().setClexEnabled(false);
333 Lexicon lexicon = new Lexicon();
334 for (TextElement te : getTextElements()) {
335 if (te instanceof OntologyTextElement) {
336 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
337 for (LexiconEntry le : oe.getLexiconEntries()) {
338 lexicon.addEntry(le);
339 }
340 }
341 }
342 parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
343 MessageContainer mc = parserResult.getMessageContainer();
344 owlxml = parserResult.get(OWLXML);
345 if (owlxml != null) {
346 // Every OWL ontology object needs its own URI:
347 long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
348 owlxml = owlxml.replace("URI=\"" + ontology.getURI() + "\">", "URI=\"" + ontology.getURI() + "/" + hashCode + "\">");
349 }
350 reasonerParticipant =
351 (mc.getMessages("owl").size() == 0) &&
352 (owlxml.indexOf("<swrl:Imp>") < 0) &&
353 (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
354 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
355 (owlxml.indexOf("<SubObjectPropertyChain>") < 0) &&
356 (owlxml.length() > 0);
357 isOWL =
358 (mc.getMessages("owl").size() == 0) &&
359 (owlxml.indexOf("<swrl:Imp>") < 0) &&
360 (owlxml.length() > 0);
361 isOWLSWRL =
362 (mc.getMessages("owl").size() == 0) &&
363 (owlxml.length() > 0);
364 owlOntology = null;
365 if (isOWL) {
366 try {
367 owlOntology = getOntology().readOWLOntology(owlxml);
368 if (owlOntology.isEmpty()) {
369 reasonerParticipant = false;
370 isOWL = false;
371 isOWLSWRL = false;
372 }
373 } catch (OWLOntologyCreationException ex) {
374 ex.printStackTrace();
375 }
376 }
377 if (isQuestion()) {
378 reasonerParticipant = false;
379 }
380 String messages = mc.toString();
381 if (messages.length() > 0) {
382 System.err.println("Parser messages: " + messages);
383 }
384 }
385
386 /**
387 * This method tries to reassert a sentence that is not yet integrated. This is
388 * used for sentences that have an OWL representation but the integration failed
389 * because it introduced an inconsistency. Later, when the ontology has changed,
390 * the integration might succeed.
391 *
392 * @return An integer value denoting the success/failure of the operation.
393 * @see Ontology#commitSentence(Sentence)
394 */
395 public int reassert() {
396 int success = getOntology().commitSentence(this);
397 getOntology().save(owner);
398 return success;
399 }
400
401 /**
402 * Returns true if the sentence is integrated into the ontology.
403 *
404 * @return true if the sentence is integrated into the ontology.
405 */
406 public boolean isIntegrated() {
407 return integrated;
408 }
409
410 void setIntegrated(boolean integrated) {
411 this.integrated = integrated;
412 }
413
414 /**
415 * Returns true if the sentence is a question.
416 *
417 * @return true if the sentence is a question.
418 */
419 public boolean isQuestion() {
420 return text.substring(text.length()-1).equals("?");
421 }
422
423 /**
424 * Checks if the sentence is inferred or asserted.
425 *
426 * @return true if the sentence is inferred, false if it is asserted.
427 */
428 public boolean isInferred() {
429 return owner == null;
430 }
431
432 /**
433 * Checks whether the sentence contains the given word form (by word number) of the
434 * given ontology element.
435 *
436 * @param e The ontology element.
437 * @param wordNumber The word number.
438 * @return true if the word form occurs in this sentence.
439 */
440 public boolean contains(OntologyElement e, int wordNumber) {
441 if (textContainer == null) {
442 tokenize();
443 }
444 for (TextElement t : textContainer.getTextElements()) {
445 if (t instanceof OntologyTextElement) {
446 OntologyTextElement ot = (OntologyTextElement) t;
447 if (e == ot.getOntologyElement() && wordNumber == -1) return true;
448 if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
449 }
450 }
451 return false;
452 }
453
454 /**
455 * Checks whether the sentence contains the given ontology element (no matter which
456 * word form).
457 *
458 * @param e The ontology element.
459 * @return true if the ontology element occurs in this sentence.
460 */
461 public boolean contains(OntologyElement e) {
462 return contains(e, -1);
463 }
464
465 /**
466 * Returns all individuals that answer this question. Questions in AceWiki are "DL Queries".
467 * They describe a concept and the answer consists of all individuals that belong to this concept.
468 * The null value is returned if the sentence is not a question.
469 *
470 * @return A list of individuals that are the answer for the question.
471 * @see Ontology#getAnswer(Sentence)
472 */
473 public synchronized List<Individual> getAnswer() {
474 if (!isQuestion()) return null;
475
476 Ontology o = getOntology();
477 if (answerCacheStateID != o.getStateID()) {
478 answerCache = o.getAnswer(this);
479 answerCacheStateID = o.getStateID();
480 }
481 if (answerCache == null) {
482 return null;
483 } else {
484 return new ArrayList<Individual>(answerCache);
485 }
486 }
487
488 /**
489 * Returns true if the sentence is a question and the answer to the question is cached and does
490 * not have to be recalculated.
491 *
492 * @return true if the answer is cached.
493 */
494 public boolean isAnswerCached() {
495 if (!isQuestion()) return false;
496 return answerCacheStateID == getOntology().getStateID();
497 }
498
499 private static String getUnderscoredText(TextContainer textContainer) {
500 String t = "";
501 for (TextElement te : textContainer.getTextElements()) {
502 if (te instanceof OntologyTextElement) {
503 t += " " + ((OntologyTextElement) te).getUnderscoredText();
504 } else if (te.getText().matches("[.?]")) {
505 t += te.getText();
506 } else {
507 t += " " + te.getText();
508 }
509 }
510 if (t.length() > 0) {
511 t = t.substring(1);
512 }
513 return t;
514 }
515
516 String serialize() {
517 if (textContainer == null) {
518 tokenize();
519 }
520 String s;
521 if (integrated) {
522 s = "|";
523 } else {
524 s = "#";
525 }
526 for (TextElement te : textContainer.getTextElements()) {
527 if (te instanceof OntologyTextElement) {
528 OntologyTextElement ot = (OntologyTextElement) te;
529 s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
530 } else {
531 s += " " + te.getText();
532 }
533 }
534 return s + "\n";
535 }
536
537 public String toString() {
538 return getText();
539 }
540
541 }