001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.core.ontology; 016 017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP; 018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF; 019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML; 020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1; 021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX; 022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP; 023 024 import java.util.ArrayList; 025 import java.util.Arrays; 026 import java.util.List; 027 028 import org.semanticweb.owl.model.OWLOntology; 029 import org.semanticweb.owl.model.OWLOntologyCreationException; 030 031 import ch.uzh.ifi.attempto.ape.ACEParserResult; 032 import ch.uzh.ifi.attempto.ape.APELocal; 033 import ch.uzh.ifi.attempto.ape.Lexicon; 034 import ch.uzh.ifi.attempto.ape.LexiconEntry; 035 import ch.uzh.ifi.attempto.ape.MessageContainer; 036 import ch.uzh.ifi.attempto.preditor.text.ContextChecker; 037 import ch.uzh.ifi.attempto.preditor.text.EnglishContextChecker; 038 import ch.uzh.ifi.attempto.preditor.text.TextContainer; 039 import ch.uzh.ifi.attempto.preditor.text.TextElement; 040 041 /** 042 * This class represents an ACE sentence which is either a declarative statement or a question. 043 * Some declarative sentences can be translated into OWL and can participate in reasoning. Other 044 * sentences have no OWL representation and do not participate in reasoning. 045 *<p> 046 * ACE sentences can either have an ontology element as owner (in the case of asserted sentences) 047 * or it can be an independent statement that has no owner (in the case of inferred sentences). 048 *<p> 049 * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required. 050 * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant. 051 * 052 * @author Tobias Kuhn 053 */ 054 public class Sentence extends Statement { 055 056 /** 057 * The context checker used for AceWiki. 058 */ 059 public static final ContextChecker contextChecker = new EnglishContextChecker(true, true); 060 061 private String text; 062 private boolean integrated = false; 063 064 // These fields are evaluated lazily: 065 private TextContainer textContainer; 066 private ACEParserResult parserResult; 067 private String owlxml; 068 private Boolean reasonerParticipant; 069 private Boolean isOWL; 070 private Boolean isOWLSWRL; 071 private OWLOntology owlOntology; 072 073 private List<OntologyElement> answerCache; 074 private long answerCacheStateID = -1; 075 076 /** 077 * Creates a new asserted sentence. Asserted sentences must have an owner. 078 * 079 * @param text The sentence text. 080 * @param owner The owner ontology element. 081 */ 082 public Sentence(String text, OntologyElement owner) { 083 super(owner); 084 setText(text); 085 } 086 087 /** 088 * Creates a new inferred sentence. Inferred sentence have no owner. 089 * 090 * @param text The sentence text. 091 * @param ontology The ontology. 092 */ 093 public Sentence(String text, Ontology ontology) { 094 super(ontology); 095 setText(text); 096 } 097 098 /** 099 * Generates sentence objects out of a text container. 100 * 101 * @param textContainer The text container. 102 * @param owner The owner ontology element of the sentences. 103 * @return A list of sentences. 104 */ 105 public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) { 106 ArrayList<Sentence> l = new ArrayList<Sentence>(); 107 TextContainer c = new TextContainer(contextChecker); 108 for (TextElement e : textContainer.getTextElements()) { 109 c.addElement(e); 110 if (e.getText().matches("[.?]")) { 111 l.add(new Sentence(getUnderscoredText(c), owner)); 112 c = new TextContainer(contextChecker); 113 } 114 } 115 return l; 116 } 117 118 /** 119 * Returns a list of text elements that represent the tokens of this sentence. 120 * 121 * @return A token list. 122 */ 123 public List<TextElement> getTextElements() { 124 if (textContainer == null) { 125 tokenize(); 126 } 127 return textContainer.getTextElements(); 128 } 129 130 private void setText(String text) { 131 // remove trailing blank spaces. 132 this.text = text.replaceFirst("\\s+$", ""); 133 } 134 135 /** 136 * Returns the sentence text as a string. Underscores are used for compound words, 137 * e.g. "credit_card". 138 * 139 * @return The sentence text as a string. 140 */ 141 public String getText() { 142 if (textContainer == null) { 143 tokenize(); 144 } 145 return getUnderscoredText(textContainer); 146 } 147 148 /** 149 * Returns the sentence text as a string with underscores displayed as blanks. Compound 150 * words containing underscores like "credit_cards" are pretty-printed with blank characters: 151 * "credit card". 152 * 153 * @return The sentence text as a pretty-printed string. 154 */ 155 public String getPrettyText() { 156 return textContainer.getText(); 157 } 158 159 /** 160 * Returns the parser result object. 161 * 162 * @return The parser result object. 163 */ 164 public ACEParserResult getParserResult() { 165 if (parserResult == null) { 166 parse(); 167 } 168 return parserResult; 169 } 170 171 /** 172 * Returns the OWL/XML representation of this sentence as a string. 173 * 174 * @return The OWL/XML representation. 175 */ 176 public String getOWLXML() { 177 if (owlxml == null) { 178 parse(); 179 } 180 return owlxml; 181 } 182 183 /** 184 * Returns true if this sentence participates in reasoning. 185 * 186 * @return true if this sentence participates in reasoning. 187 */ 188 public boolean isReasonerParticipant() { 189 if (reasonerParticipant == null) { 190 parse(); 191 } 192 return reasonerParticipant; 193 } 194 195 /** 196 * Returns true if this sentence has an OWL representation. 197 * 198 * @return true if this sentence has an OWL representation. 199 */ 200 public boolean isOWL() { 201 if (isOWL == null) { 202 parse(); 203 } 204 return isOWL; 205 } 206 207 /** 208 * Returns true if this sentence has an OWL or SWRL representation. 209 * 210 * @return true if this sentence has an OWL or SWRL representation. 211 */ 212 public boolean isOWLSWRL() { 213 if (isOWLSWRL == null) { 214 parse(); 215 } 216 return isOWLSWRL; 217 } 218 219 /** 220 * Returns the OWL ontology object that contains the OWL representation of this 221 * sentence. Null is returned if there is no OWL representation of this sentence 222 * or if the creation of the OWL ontology object failed. 223 * 224 * @return The OWL ontology object. 225 */ 226 public OWLOntology getOWLOntology() { 227 if (owlxml == null) { 228 parse(); 229 } 230 return owlOntology; 231 } 232 233 /** 234 * Tokenizes the sentence text. A text container object is created. 235 */ 236 private void tokenize() { 237 textContainer = new TextContainer(contextChecker); 238 239 String t = "&" + text + "&"; 240 t = t.replaceAll(" ", "&"); 241 t = t.replaceAll("\\.", "&.&"); 242 t = t.replaceAll("\\?", "&?&"); 243 t = t.replaceAll("&of&", " of&"); 244 t = t.replaceAll("&by&", " by&"); 245 246 ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&"))); 247 248 while (tokens.contains("")) { 249 tokens.remove(""); 250 } 251 252 toString(); 253 254 for (String s : tokens) { 255 if (s.startsWith("<")) { 256 OntologyTextElement te; 257 try { 258 long oeId = new Long(s.substring(1, s.indexOf(","))); 259 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">"))); 260 OntologyElement oe = getOntology().get(oeId); 261 te = OntologyTextElement.createTextElement(oe, wordNumber); 262 } catch (Exception ex) { 263 throw new RuntimeException("Could not resolve link: " + s, ex); 264 } 265 if (te != null) { 266 textContainer.addElement(te); 267 } else { 268 throw new RuntimeException("Could not resolve link: " + s); 269 } 270 } else { 271 OntologyElement oe = getOntology().get(s); 272 273 if (oe == null) { 274 textContainer.addElement(new TextElement(s)); 275 } else { 276 // not 100% clean solution (several word forms of the same word can be identical): 277 int wordId = Arrays.asList(oe.getWords()).indexOf(s); 278 if (oe instanceof Individual) { 279 // this should probably be done at a different place... 280 Individual ind = (Individual) oe; 281 if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) { 282 String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText(); 283 if (precedingText.equals("the") || precedingText.equals("The")) { 284 textContainer.removeLastElement(); 285 wordId--; 286 } 287 } 288 } 289 textContainer.addElement(OntologyTextElement.createTextElement(oe, wordId)); 290 } 291 } 292 } 293 } 294 295 /** 296 * Parses the sentence text. The OWL and SWRL representations are calculated if possible. 297 * This method is called automatically the first time a parsing result is needed. 298 * Furthermore, it needs to be called each time a word form of an ontology element 299 * (that occurs in the sentence) has changed. 300 */ 301 synchronized void parse() { 302 APELocal.getInstance().setURI(getOntology().getURI()); 303 APELocal.getInstance().setClexEnabled(false); 304 Lexicon lexicon = new Lexicon(); 305 for (TextElement te : getTextElements()) { 306 if (te instanceof OntologyTextElement) { 307 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement(); 308 for (LexiconEntry le : oe.getLexiconEntries()) { 309 lexicon.addEntry(le); 310 } 311 } 312 } 313 parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP); 314 MessageContainer mc = parserResult.getMessageContainer(); 315 owlxml = parserResult.get(OWLXML); 316 if (owlxml != null) { 317 // Every OWL ontology object needs its own URI: 318 long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE; 319 String uri = getOntology().getURI(); 320 owlxml = owlxml.replace("URI=\"" + uri + "\">", "URI=\"" + uri + "/" + hashCode + "\">"); 321 } 322 reasonerParticipant = 323 (mc.getMessages("owl").size() == 0) && 324 (owlxml.indexOf("<swrl:Imp>") < 0) && 325 (owlxml.indexOf("<ObjectExistsSelf>") < 0) && 326 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) && 327 (owlxml.indexOf("<SubObjectPropertyChain>") < 0) && 328 (owlxml.length() > 0); 329 isOWL = 330 (mc.getMessages("owl").size() == 0) && 331 (owlxml.indexOf("<swrl:Imp>") < 0) && 332 (owlxml.length() > 0); 333 isOWLSWRL = 334 (mc.getMessages("owl").size() == 0) && 335 (owlxml.length() > 0); 336 owlOntology = null; 337 if (isOWL) { 338 try { 339 owlOntology = getOntology().readOWLOntology(owlxml); 340 if (owlOntology.isEmpty()) { 341 reasonerParticipant = false; 342 isOWL = false; 343 isOWLSWRL = false; 344 } 345 } catch (OWLOntologyCreationException ex) { 346 ex.printStackTrace(); 347 } 348 } 349 if (isQuestion()) { 350 reasonerParticipant = false; 351 } 352 String messages = mc.toString(); 353 if (messages.length() > 0) { 354 System.err.println("Parser messages: " + messages); 355 } 356 } 357 358 /** 359 * This method tries to reassert a sentence that is not yet integrated. This is 360 * used for sentences that have an OWL representation but the integration failed 361 * because it introduced an inconsistency. Later, when the ontology has changed, 362 * the integration might succeed. 363 * 364 * @return An integer value denoting the success/failure of the operation. 365 * @see Ontology#commitSentence(Sentence) 366 */ 367 public int reassert() { 368 int success = getOntology().commitSentence(this); 369 getOntology().save(getOwner()); 370 return success; 371 } 372 373 /** 374 * Returns true if the sentence is integrated into the ontology. 375 * 376 * @return true if the sentence is integrated into the ontology. 377 */ 378 public boolean isIntegrated() { 379 return integrated; 380 } 381 382 void setIntegrated(boolean integrated) { 383 this.integrated = integrated; 384 } 385 386 /** 387 * Returns true if the sentence is a question. 388 * 389 * @return true if the sentence is a question. 390 */ 391 public boolean isQuestion() { 392 return text.substring(text.length()-1).equals("?"); 393 } 394 395 /** 396 * Checks if the sentence is inferred or asserted. 397 * 398 * @return true if the sentence is inferred, false if it is asserted. 399 */ 400 public boolean isInferred() { 401 return getOwner() == null; 402 } 403 404 /** 405 * Checks whether the sentence contains the given word form (by word number) of the 406 * given ontology element. 407 * 408 * @param e The ontology element. 409 * @param wordNumber The word number. 410 * @return true if the word form occurs in this sentence. 411 */ 412 public boolean contains(OntologyElement e, int wordNumber) { 413 if (textContainer == null) { 414 tokenize(); 415 } 416 for (TextElement t : textContainer.getTextElements()) { 417 if (t instanceof OntologyTextElement) { 418 OntologyTextElement ot = (OntologyTextElement) t; 419 if (e == ot.getOntologyElement() && wordNumber == -1) return true; 420 if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true; 421 } 422 } 423 return false; 424 } 425 426 /** 427 * Checks whether the sentence contains the given ontology element (no matter which 428 * word form). 429 * 430 * @param e The ontology element. 431 * @return true if the ontology element occurs in this sentence. 432 */ 433 public boolean contains(OntologyElement e) { 434 return contains(e, -1); 435 } 436 437 /** 438 * Returns all ontology elements that answer this question. In the case the sentence has the form 439 * "what is (Individual)?" then the answer contains all concepts the individual belongs to. 440 * Otherwise, the question is processed as a "DL Query" that describes a concept. In this case, 441 * the answer consists of all individuals that belong to the concept. 442 * The null value is returned if the sentence is not a question. 443 * 444 * @return A list of ontology elements that are the answer for the question. 445 * @see Ontology#getAnswer(Sentence) 446 */ 447 public synchronized List<OntologyElement> getAnswer() { 448 if (!isQuestion()) return null; 449 450 Ontology o = getOntology(); 451 if (answerCacheStateID != o.getStateID()) { 452 answerCache = o.getAnswer(this); 453 answerCacheStateID = o.getStateID(); 454 } 455 if (answerCache == null) { 456 return null; 457 } else { 458 return new ArrayList<OntologyElement>(answerCache); 459 } 460 } 461 462 /** 463 * Returns the cached answer if the sentence is a question. Null is returned if the the sentence 464 * is no question or there is no cached answer. This returned answer might not be up-to-date. 465 * 466 * @return A list of ontology elements that are the cached answer for the question. 467 */ 468 public List<OntologyElement> getCachedAnswer() { 469 if (!isQuestion() || answerCache == null) return null; 470 return new ArrayList<OntologyElement>(answerCache); 471 } 472 473 /** 474 * Returns true if the sentence is a question and the answer to the question is cached and up-to-date 475 * and thus does not have to be recalculated. 476 * 477 * @return true if the answer is cached. 478 */ 479 public boolean isAnswerCached() { 480 if (!isQuestion()) return false; 481 return answerCacheStateID == getOntology().getStateID(); 482 } 483 484 private static String getUnderscoredText(TextContainer textContainer) { 485 String t = ""; 486 for (TextElement te : textContainer.getTextElements()) { 487 if (te instanceof OntologyTextElement) { 488 t += " " + ((OntologyTextElement) te).getUnderscoredText(); 489 } else if (te.getText().matches("[.?]")) { 490 t += te.getText(); 491 } else { 492 t += " " + te.getText(); 493 } 494 } 495 if (t.length() > 0) { 496 t = t.substring(1); 497 } 498 return t; 499 } 500 501 String serialize() { 502 if (textContainer == null) { 503 tokenize(); 504 } 505 String s; 506 if (integrated) { 507 s = "|"; 508 } else { 509 s = "#"; 510 } 511 for (TextElement te : textContainer.getTextElements()) { 512 if (te instanceof OntologyTextElement) { 513 OntologyTextElement ot = (OntologyTextElement) te; 514 s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">"; 515 } else { 516 s += " " + te.getText(); 517 } 518 } 519 return s + "\n"; 520 } 521 522 public String toString() { 523 return getText(); 524 } 525 526 }