001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.core.ontology; 016 017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP; 018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF; 019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML; 020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1; 021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX; 022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP; 023 024 import java.util.ArrayList; 025 import java.util.Arrays; 026 import java.util.List; 027 028 import org.semanticweb.owl.model.OWLOntology; 029 import org.semanticweb.owl.model.OWLOntologyCreationException; 030 031 import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement; 032 import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory; 033 import ch.uzh.ifi.attempto.ape.ACEParserResult; 034 import ch.uzh.ifi.attempto.ape.APELocal; 035 import ch.uzh.ifi.attempto.ape.Lexicon; 036 import ch.uzh.ifi.attempto.ape.LexiconEntry; 037 import ch.uzh.ifi.attempto.ape.MessageContainer; 038 import ch.uzh.ifi.attempto.preditor.text.BasicTextElement; 039 import ch.uzh.ifi.attempto.preditor.text.TextContainer; 040 import ch.uzh.ifi.attempto.preditor.text.TextElement; 041 042 /** 043 * This class represents an ACE sentence which is either a declarative statement or a question. 044 * Some declarative sentences can be translated into OWL and can participate in reasoning. Other 045 * sentences have no OWL representation and do not participate in reasoning. 046 *<p> 047 * ACE sentences can either have an ontology element as owner (in the case of asserted sentences) 048 * or it can be an independent statement that has no owner (in the case of inferred sentences). 049 *<p> 050 * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required. 051 * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant. 052 * 053 * @author Tobias Kuhn 054 */ 055 public class Sentence extends Statement { 056 057 private String text; 058 private boolean integrated = false; 059 060 // These fields are evaluated lazily: 061 private TextContainer textContainer; 062 private ACEParserResult parserResult; 063 private String owlxml; 064 private Boolean reasonerParticipant; 065 private Boolean isOWL; 066 private Boolean isOWLSWRL; 067 private OWLOntology owlOntology; 068 069 private List<OntologyElement> answerCache; 070 private long answerCacheStateID = -1; 071 072 /** 073 * Creates a new asserted sentence. Asserted sentences must have an owner. 074 * 075 * @param text The sentence text. 076 * @param owner The owner ontology element. 077 */ 078 public Sentence(String text, OntologyElement owner) { 079 super(owner); 080 setText(text); 081 } 082 083 /** 084 * Creates a new inferred sentence. Inferred sentence have no owner. 085 * 086 * @param text The sentence text. 087 * @param ontology The ontology. 088 */ 089 public Sentence(String text, Ontology ontology) { 090 super(ontology); 091 setText(text); 092 } 093 094 /** 095 * Generates sentence objects out of a text container. 096 * 097 * @param textContainer The text container. 098 * @param owner The owner ontology element of the sentences. 099 * @return A list of sentences. 100 */ 101 public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) { 102 ArrayList<Sentence> l = new ArrayList<Sentence>(); 103 TextContainer c = new TextContainer(); 104 for (TextElement e : textContainer.getTextElements()) { 105 c.addElement(e); 106 if (e.getText().matches("[.?]")) { 107 l.add(new Sentence(getUnderscoredText(c), owner)); 108 c = new TextContainer(); 109 } 110 } 111 return l; 112 } 113 114 /** 115 * Returns a list of text elements that represent the tokens of this sentence. 116 * 117 * @return A token list. 118 */ 119 public List<TextElement> getTextElements() { 120 if (textContainer == null) { 121 tokenize(); 122 } 123 textContainer.updateConnections(); 124 return textContainer.getTextElements(); 125 } 126 127 private void setText(String text) { 128 // remove trailing blank spaces. 129 this.text = text.replaceFirst("\\s+$", ""); 130 } 131 132 /** 133 * Returns the sentence text as a string. Underscores are used for compound words, 134 * e.g. "credit_card". 135 * 136 * @return The sentence text as a string. 137 */ 138 public String getText() { 139 if (textContainer == null) { 140 tokenize(); 141 } 142 return getUnderscoredText(textContainer); 143 } 144 145 /** 146 * Returns the sentence text as a string with underscores displayed as blanks. Compound 147 * words containing underscores like "credit_cards" are pretty-printed with blank characters: 148 * "credit card". 149 * 150 * @return The sentence text as a pretty-printed string. 151 */ 152 public String getPrettyText() { 153 return textContainer.getText(); 154 } 155 156 /** 157 * Returns the parser result object. 158 * 159 * @return The parser result object. 160 */ 161 public ACEParserResult getParserResult() { 162 if (parserResult == null) { 163 parse(); 164 } 165 return parserResult; 166 } 167 168 /** 169 * Returns the OWL/XML representation of this sentence as a string. 170 * 171 * @return The OWL/XML representation. 172 */ 173 public String getOWLXML() { 174 if (owlxml == null) { 175 parse(); 176 } 177 return owlxml; 178 } 179 180 /** 181 * Returns true if this sentence participates in reasoning. 182 * 183 * @return true if this sentence participates in reasoning. 184 */ 185 public boolean isReasonerParticipant() { 186 if (reasonerParticipant == null) { 187 parse(); 188 } 189 return reasonerParticipant; 190 } 191 192 /** 193 * Returns true if this sentence has an OWL representation. 194 * 195 * @return true if this sentence has an OWL representation. 196 */ 197 public boolean isOWL() { 198 if (isOWL == null) { 199 parse(); 200 } 201 return isOWL; 202 } 203 204 /** 205 * Returns true if this sentence has an OWL or SWRL representation. 206 * 207 * @return true if this sentence has an OWL or SWRL representation. 208 */ 209 public boolean isOWLSWRL() { 210 if (isOWLSWRL == null) { 211 parse(); 212 } 213 return isOWLSWRL; 214 } 215 216 /** 217 * Returns the OWL ontology object that contains the OWL representation of this 218 * sentence. Null is returned if there is no OWL representation of this sentence 219 * or if the creation of the OWL ontology object failed. 220 * 221 * @return The OWL ontology object. 222 */ 223 public OWLOntology getOWLOntology() { 224 if (owlxml == null) { 225 parse(); 226 } 227 return owlOntology; 228 } 229 230 /** 231 * Tokenizes the sentence text. A text container object is created. 232 */ 233 private void tokenize() { 234 textContainer = new TextContainer(); 235 236 String t = "&" + text + "&"; 237 t = t.replaceAll(" ", "&"); 238 t = t.replaceAll("\\.", "&.&"); 239 t = t.replaceAll("\\?", "&?&"); 240 t = t.replaceAll("&of&", " of&"); 241 t = t.replaceAll("&by&", " by&"); 242 243 ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&"))); 244 245 while (tokens.contains("")) { 246 tokens.remove(""); 247 } 248 249 toString(); 250 251 for (String s : tokens) { 252 if (s.startsWith("<")) { 253 OntologyTextElement te; 254 try { 255 long oeId = new Long(s.substring(1, s.indexOf(","))); 256 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">"))); 257 OntologyElement oe = getOntology().get(oeId); 258 te = TextElemFactory.createTextElement(oe, wordNumber); 259 } catch (Exception ex) { 260 throw new RuntimeException("Could not resolve link: " + s, ex); 261 } 262 if (te != null) { 263 textContainer.addElement(te); 264 } else { 265 throw new RuntimeException("Could not resolve link: " + s); 266 } 267 } else { 268 OntologyElement oe = getOntology().get(s); 269 270 if (oe == null) { 271 textContainer.addElement(new BasicTextElement(s)); 272 } else { 273 // not 100% clean solution (several word forms of the same word can be identical): 274 int wordId = Arrays.asList(oe.getWords()).indexOf(s); 275 if (oe instanceof Individual) { 276 // this should probably be done at a different place... 277 Individual ind = (Individual) oe; 278 if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) { 279 String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText(); 280 if (precedingText.equals("the") || precedingText.equals("The")) { 281 textContainer.removeLastElement(); 282 wordId--; 283 } 284 } 285 } 286 textContainer.addElement(TextElemFactory.createTextElement(oe, wordId)); 287 } 288 } 289 } 290 } 291 292 /** 293 * Parses the sentence text. The OWL and SWRL representations are calculated if possible. 294 * This method is called automatically the first time a parsing result is needed. 295 * Furthermore, it needs to be called each time a word form of an ontology element 296 * (that occurs in the sentence) has changed. 297 */ 298 synchronized void parse() { 299 APELocal.getInstance().setURI(getOntology().getURI()); 300 APELocal.getInstance().setClexEnabled(false); 301 Lexicon lexicon = new Lexicon(); 302 for (TextElement te : getTextElements()) { 303 if (te instanceof OntologyTextElement) { 304 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement(); 305 for (LexiconEntry le : oe.getLexiconEntries()) { 306 lexicon.addEntry(le); 307 } 308 } 309 } 310 parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP); 311 MessageContainer mc = parserResult.getMessageContainer(); 312 owlxml = parserResult.get(OWLXML); 313 if (owlxml != null) { 314 // Every OWL ontology object needs its own URI: 315 long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE; 316 String uri = getOntology().getURI(); 317 owlxml = owlxml.replace("URI=\"" + uri + "\">", "URI=\"" + uri + "/" + hashCode + "\">"); 318 } 319 reasonerParticipant = 320 (mc.getMessages("owl").size() == 0) && 321 (owlxml.indexOf("<swrl:Imp>") < 0) && 322 (owlxml.indexOf("<ObjectExistsSelf>") < 0) && 323 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) && 324 (owlxml.indexOf("<SubObjectPropertyChain>") < 0) && 325 (owlxml.length() > 0); 326 isOWL = 327 (mc.getMessages("owl").size() == 0) && 328 (owlxml.indexOf("<swrl:Imp>") < 0) && 329 (owlxml.length() > 0); 330 isOWLSWRL = 331 (mc.getMessages("owl").size() == 0) && 332 (owlxml.length() > 0); 333 owlOntology = null; 334 if (isOWL) { 335 try { 336 owlOntology = getOntology().readOWLOntology(owlxml); 337 if (owlOntology.isEmpty()) { 338 reasonerParticipant = false; 339 isOWL = false; 340 isOWLSWRL = false; 341 } 342 } catch (OWLOntologyCreationException ex) { 343 ex.printStackTrace(); 344 } 345 } 346 if (isQuestion()) { 347 reasonerParticipant = false; 348 } 349 String messages = mc.toString(); 350 if (messages.length() > 0) { 351 System.err.println("Parser messages: " + messages); 352 } 353 } 354 355 /** 356 * This method tries to reassert a sentence that is not yet integrated. This is 357 * used for sentences that have an OWL representation but the integration failed 358 * because it introduced an inconsistency. Later, when the ontology has changed, 359 * the integration might succeed. 360 * 361 * @return An integer value denoting the success/failure of the operation. 362 * @see Ontology#commitSentence(Sentence) 363 */ 364 public int reassert() { 365 int success = getOntology().commitSentence(this); 366 getOntology().save(getOwner()); 367 return success; 368 } 369 370 /** 371 * Returns true if the sentence is integrated into the ontology. 372 * 373 * @return true if the sentence is integrated into the ontology. 374 */ 375 public boolean isIntegrated() { 376 return integrated; 377 } 378 379 void setIntegrated(boolean integrated) { 380 this.integrated = integrated; 381 } 382 383 /** 384 * Returns true if the sentence is a question. 385 * 386 * @return true if the sentence is a question. 387 */ 388 public boolean isQuestion() { 389 return text.substring(text.length()-1).equals("?"); 390 } 391 392 /** 393 * Checks if the sentence is inferred or asserted. 394 * 395 * @return true if the sentence is inferred, false if it is asserted. 396 */ 397 public boolean isInferred() { 398 return getOwner() == null; 399 } 400 401 /** 402 * Checks whether the sentence contains the given word form (by word number) of the 403 * given ontology element. 404 * 405 * @param e The ontology element. 406 * @param wordNumber The word number. 407 * @return true if the word form occurs in this sentence. 408 */ 409 public boolean contains(OntologyElement e, int wordNumber) { 410 if (textContainer == null) { 411 tokenize(); 412 } 413 for (TextElement t : textContainer.getTextElements()) { 414 if (t instanceof OntologyTextElement) { 415 OntologyTextElement ot = (OntologyTextElement) t; 416 if (e == ot.getOntologyElement() && wordNumber == -1) return true; 417 if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true; 418 } 419 } 420 return false; 421 } 422 423 /** 424 * Checks whether the sentence contains the given ontology element (no matter which 425 * word form). 426 * 427 * @param e The ontology element. 428 * @return true if the ontology element occurs in this sentence. 429 */ 430 public boolean contains(OntologyElement e) { 431 return contains(e, -1); 432 } 433 434 /** 435 * Returns all ontology elements that answer this question. In the case the sentence has the form 436 * "what is (Individual)?" then the answer contains all concepts the individual belongs to. 437 * Otherwise, the question is processed as a "DL Query" that describes a concept. In this case, 438 * the answer consists of all individuals that belong to the concept. 439 * The null value is returned if the sentence is not a question. 440 * 441 * @return A list of ontology elements that are the answer for the question. 442 * @see Ontology#getAnswer(Sentence) 443 */ 444 public synchronized List<OntologyElement> getAnswer() { 445 if (!isQuestion()) return null; 446 447 Ontology o = getOntology(); 448 if (answerCacheStateID != o.getStateID()) { 449 answerCache = o.getAnswer(this); 450 answerCacheStateID = o.getStateID(); 451 } 452 if (answerCache == null) { 453 return null; 454 } else { 455 return new ArrayList<OntologyElement>(answerCache); 456 } 457 } 458 459 /** 460 * Returns the cached answer if the sentence is a question. Null is returned if the the sentence 461 * is no question or there is no cached answer. This returned answer might not be up-to-date. 462 * 463 * @return A list of ontology elements that are the cached answer for the question. 464 */ 465 public List<OntologyElement> getCachedAnswer() { 466 if (!isQuestion() || answerCache == null) return null; 467 return new ArrayList<OntologyElement>(answerCache); 468 } 469 470 /** 471 * Returns true if the sentence is a question and the answer to the question is cached and up-to-date 472 * and thus does not have to be recalculated. 473 * 474 * @return true if the answer is cached. 475 */ 476 public boolean isAnswerCached() { 477 if (!isQuestion()) return false; 478 return answerCacheStateID == getOntology().getStateID(); 479 } 480 481 private static String getUnderscoredText(TextContainer textContainer) { 482 String t = ""; 483 for (TextElement te : textContainer.getTextElements()) { 484 if (te instanceof OntologyTextElement) { 485 t += " " + ((OntologyTextElement) te).getUnderscoredText(); 486 } else if (te.getText().matches("[.?]")) { 487 t += te.getText(); 488 } else { 489 t += " " + te.getText(); 490 } 491 } 492 if (t.length() > 0) { 493 t = t.substring(1); 494 } 495 return t; 496 } 497 498 String serialize() { 499 if (textContainer == null) { 500 tokenize(); 501 } 502 String s; 503 if (integrated) { 504 s = "|"; 505 } else { 506 s = "#"; 507 } 508 for (TextElement te : textContainer.getTextElements()) { 509 if (te instanceof OntologyTextElement) { 510 OntologyTextElement ot = (OntologyTextElement) te; 511 s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">"; 512 } else { 513 s += " " + te.getText(); 514 } 515 } 516 return s + "\n"; 517 } 518 519 public String toString() { 520 return getText(); 521 } 522 523 }