001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.core.ontology; 016 017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP; 018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF; 019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML; 020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1; 021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX; 022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP; 023 024 import java.util.ArrayList; 025 import java.util.Arrays; 026 import java.util.List; 027 028 import org.semanticweb.owl.model.OWLOntology; 029 import org.semanticweb.owl.model.OWLOntologyCreationException; 030 031 import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement; 032 import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory; 033 import ch.uzh.ifi.attempto.ape.ACEParserResult; 034 import ch.uzh.ifi.attempto.ape.APELocal; 035 import ch.uzh.ifi.attempto.ape.Lexicon; 036 import ch.uzh.ifi.attempto.ape.LexiconEntry; 037 import ch.uzh.ifi.attempto.ape.MessageContainer; 038 import ch.uzh.ifi.attempto.preditor.text.BasicTextElement; 039 import ch.uzh.ifi.attempto.preditor.text.TextContainer; 040 import ch.uzh.ifi.attempto.preditor.text.TextElement; 041 042 /** 043 * This class represents an ACE sentence which is either a declarative statement or a question. 044 * Some declarative sentences can be translated into OWL and can participate in reasoning. Other 045 * sentences have no OWL representation and do not participate in reasoning. 046 *<p> 047 * Each sentence belongs to exactly one article of an ontology element (the owner). 048 *<p> 049 * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required. 050 * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant. 051 * 052 * @author Tobias Kuhn 053 */ 054 public class Sentence { 055 056 private String text; 057 private Ontology ontology; 058 private OntologyElement owner; 059 private boolean integrated = false; 060 061 // These fields are evaluated lazily: 062 private TextContainer textContainer; 063 private ACEParserResult parserResult; 064 private String owlxml; 065 private Boolean reasonerParticipant; 066 private Boolean isOWL; 067 private Boolean isOWLSWRL; 068 private OWLOntology owlOntology; 069 070 private List<Individual> answerCache; 071 private long answerCacheStateID = -1; 072 073 /** 074 * Creates a new asserted sentence. Asserted sentences must have an owner. 075 * 076 * @param text The sentence text. 077 * @param owner The owner ontology element. 078 */ 079 public Sentence(String text, OntologyElement owner) { 080 this.text = text; 081 this.ontology = null; 082 this.owner = owner; 083 } 084 085 /** 086 * Creates a new inferred sentence. Inferred sentence have no owner. 087 * 088 * @param text The sentence text. 089 * @param ontology The ontology. 090 */ 091 public Sentence(String text, Ontology ontology) { 092 this.text = text; 093 this.ontology = ontology; 094 this.owner = null; 095 } 096 097 /** 098 * Generates sentence objects out of a text container. 099 * 100 * @param textContainer The text container. 101 * @param owner The owner ontology element of the sentences. 102 * @return A list of sentences. 103 */ 104 public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) { 105 ArrayList<Sentence> l = new ArrayList<Sentence>(); 106 TextContainer c = new TextContainer(); 107 for (TextElement e : textContainer.getTextElements()) { 108 c.addElement(e); 109 if (e.getText().matches("[.?]")) { 110 l.add(new Sentence(getUnderscoredText(c), owner)); 111 c = new TextContainer(); 112 } 113 } 114 return l; 115 } 116 117 /** 118 * Loads a sentence from a serialized form. 119 * 120 * @param serializedSentence The serialized sentence as a string. 121 * @param owner The owner ontology element of the sentence. 122 * @return A sentence object. 123 */ 124 static Sentence loadSentence(String serializedSentence, OntologyElement owner) { 125 Sentence sentence = new Sentence(serializedSentence.substring(2), owner); 126 sentence.setIntegrated(serializedSentence.charAt(0) == '|'); 127 return sentence; 128 } 129 130 private Ontology getOntology() { 131 if (ontology == null) { 132 ontology = owner.getOntology(); 133 } 134 return ontology; 135 } 136 137 /** 138 * Returns a list of text elements that represent the tokens of this sentence. 139 * 140 * @return A token list. 141 */ 142 public List<TextElement> getTextElements() { 143 if (textContainer == null) { 144 tokenize(); 145 } 146 return textContainer.getTextElements(); 147 } 148 149 /** 150 * Returns the owner ontology element of this sentence. 151 * 152 * @return The owner ontology element. 153 */ 154 public OntologyElement getOwner() { 155 return owner; 156 } 157 158 /** 159 * Returns the sentence text as a string. Underscores are used for compound words, 160 * e.g. "credit_card". 161 * 162 * @return The sentence text as a string. 163 */ 164 public String getText() { 165 if (textContainer == null) { 166 tokenize(); 167 } 168 return getUnderscoredText(textContainer); 169 } 170 171 /** 172 * Returns the sentence text as a string with underscores displayed as blanks. Compound 173 * words containing underscores like "credit_cards" are pretty-printed with blank characters: 174 * "credit card". 175 * 176 * @return The sentence text as a pretty-printed string. 177 */ 178 public String getPrettyText() { 179 return textContainer.getText(); 180 } 181 182 /** 183 * Returns the parser result object. 184 * 185 * @return The parser result object. 186 */ 187 public ACEParserResult getParserResult() { 188 if (parserResult == null) { 189 parse(); 190 } 191 return parserResult; 192 } 193 194 /** 195 * Returns the OWL/XML representation of this sentence as a string. 196 * 197 * @return The OWL/XML representation. 198 */ 199 public String getOWLXML() { 200 if (owlxml == null) { 201 parse(); 202 } 203 return owlxml; 204 } 205 206 /** 207 * Returns true if this sentence participates in reasoning. 208 * 209 * @return true if this sentence participates in reasoning. 210 */ 211 public boolean isReasonerParticipant() { 212 if (reasonerParticipant == null) { 213 parse(); 214 } 215 return reasonerParticipant; 216 } 217 218 /** 219 * Returns true if this sentence has an OWL representation. 220 * 221 * @return true if this sentence has an OWL representation. 222 */ 223 public boolean isOWL() { 224 if (isOWL == null) { 225 parse(); 226 } 227 return isOWL; 228 } 229 230 /** 231 * Returns true if this sentence has an OWL or SWRL representation. 232 * 233 * @return true if this sentence has an OWL or SWRL representation. 234 */ 235 public boolean isOWLSWRL() { 236 if (isOWLSWRL == null) { 237 parse(); 238 } 239 return isOWLSWRL; 240 } 241 242 /** 243 * Returns the OWL ontology object that contains the OWL representation of this 244 * sentence. 245 * 246 * @return The OWL ontology object. 247 * @throws OWLOntologyCreationException If the OWL ontology object creation failed. 248 */ 249 public OWLOntology getOWLOntology() throws OWLOntologyCreationException { 250 if (owlOntology == null) { 251 owlOntology = getOntology().readOWLOntology(getOWLXML()); 252 } 253 return owlOntology; 254 } 255 256 /** 257 * Tokenizes the sentence text. A text container object is created. 258 */ 259 private void tokenize() { 260 textContainer = new TextContainer(); 261 262 String t = text; 263 t = t.replaceAll(" ", "&"); 264 t = t.replaceAll("\\.", "&.&"); 265 t = t.replaceAll("\\?", "&?&"); 266 t = t.replaceAll("&of", " of"); 267 t = t.replaceAll("&by", " by"); 268 269 ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&"))); 270 271 while (tokens.contains("")) { 272 tokens.remove(""); 273 } 274 275 toString(); 276 277 for (String s : tokens) { 278 if (s.startsWith("<")) { 279 try { 280 long oeId = new Long(s.substring(1, s.indexOf(","))); 281 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">"))); 282 OntologyElement oe = getOntology().get(oeId); 283 textContainer.addElement(TextElemFactory.createTextElement(oe, wordNumber)); 284 } catch (Exception ex) { 285 throw new RuntimeException("Could not resolve link: " + s); 286 } 287 } else { 288 OntologyElement oe = getOntology().get(s); 289 if (oe == null) { 290 textContainer.addElement(new BasicTextElement(s)); 291 } else if (oe instanceof Individual && ((Individual) oe).hasDefiniteArticle()) { 292 textContainer.removeLastElement(); 293 textContainer.addElement(TextElemFactory.createTextElement(oe, 0)); 294 } else { 295 // not 100% clean solution (several word forms of the same word can be identical): 296 int wordId = Arrays.asList(oe.getWords()).indexOf(s); 297 textContainer.addElement(TextElemFactory.createTextElement(oe, wordId)); 298 } 299 } 300 } 301 } 302 303 /** 304 * Parses the sentence text. The OWL and SWRL representations are calculated if possible. 305 * This method is called automatically the first time a parsing result is needed. 306 * Furthermore, it needs to be called each time a word form of an ontology element 307 * (that occurs in the sentence) has changed. 308 */ 309 synchronized void parse() { 310 APELocal.getInstance().setURI(getOntology().getURI()); 311 APELocal.getInstance().setClexEnabled(false); 312 Lexicon lexicon = new Lexicon(); 313 for (TextElement te : getTextElements()) { 314 if (te instanceof OntologyTextElement) { 315 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement(); 316 for (LexiconEntry le : oe.getLexiconEntries()) { 317 lexicon.addEntry(le); 318 } 319 } 320 } 321 parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP); 322 MessageContainer mc = parserResult.getMessageContainer(); 323 owlxml = parserResult.get(OWLXML); 324 if (owlxml != null) { 325 // Every OWL ontology object needs its own URI: 326 long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE; 327 owlxml = owlxml.replace("URI=\"" + ontology.getURI() + "\">", "URI=\"" + ontology.getURI() + "/" + hashCode + "\">"); 328 } 329 reasonerParticipant = 330 (mc.getMessages("owl").size() == 0) && 331 (owlxml.indexOf("<swrl:Imp>") < 0) && 332 (owlxml.indexOf("<ObjectExistsSelf>") < 0) && 333 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) && 334 (owlxml.length() > 0); 335 if (isQuestion()) { 336 reasonerParticipant = false; 337 } 338 isOWL = 339 (mc.getMessages("owl").size() == 0) && 340 (owlxml.indexOf("<swrl:Imp>") < 0) && 341 (owlxml.length() > 0); 342 isOWLSWRL = 343 (mc.getMessages("owl").size() == 0) && 344 (owlxml.length() > 0); 345 String messages = mc.toString(); 346 if (messages.length() > 0) { 347 System.err.println("Parser messages: " + messages); 348 } 349 owlOntology = null; 350 } 351 352 /** 353 * This method tries to reassert a sentence that is not yet integrated. This is 354 * used for sentences that have an OWL representation but the integration failed 355 * because it introduced an inconsistency. Later, when the ontology has changed, 356 * the integration might succeed. 357 * 358 * @return An integer value denoting the success/failure of the operation. 359 * @see Ontology#commitSentence(Sentence) 360 */ 361 public int reassert() { 362 return getOntology().commitSentence(this); 363 } 364 365 /** 366 * Returns true if the sentence is integrated into the ontology. 367 * 368 * @return true if the sentence is integrated into the ontology. 369 */ 370 public boolean isIntegrated() { 371 return integrated; 372 } 373 374 void setIntegrated(boolean integrated) { 375 this.integrated = integrated; 376 } 377 378 /** 379 * Returns true if the sentence is a question. 380 * 381 * @return true if the sentence is a question. 382 */ 383 public boolean isQuestion() { 384 return text.substring(text.length()-1).equals("?"); 385 } 386 387 /** 388 * Checks if the sentence is inferred or asserted. 389 * 390 * @return true if the sentence is inferred, false if it is asserted. 391 */ 392 public boolean isInferred() { 393 return owner == null; 394 } 395 396 /** 397 * Checks whether the sentence contains the given word form (by word number) of the 398 * given ontology element. 399 * 400 * @param e The ontology element. 401 * @param wordNumber The word number. 402 * @return true if the word form occurs in this sentence. 403 */ 404 public boolean contains(OntologyElement e, int wordNumber) { 405 if (textContainer == null) { 406 tokenize(); 407 } 408 for (TextElement t : textContainer.getTextElements()) { 409 if (t instanceof OntologyTextElement) { 410 OntologyTextElement ot = (OntologyTextElement) t; 411 if (e == ot.getOntologyElement() && wordNumber == -1) return true; 412 if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true; 413 } 414 } 415 return false; 416 } 417 418 /** 419 * Checks whether the sentence contains the given ontology element (no matter which 420 * word form). 421 * 422 * @param e The ontology element. 423 * @return true if the ontology element occurs in this sentence. 424 */ 425 public boolean contains(OntologyElement e) { 426 return contains(e, -1); 427 } 428 429 /** 430 * Returns all individuals that answer this question. Questions in AceWiki are "DL Queries". 431 * They describe a concept and the answer consists of all individuals that belong to this concept. 432 * The null value is returned if the sentence is not a question. 433 * 434 * @return A list of individuals that are the answer for the question. 435 * @see Ontology#getAnswer(Sentence) 436 */ 437 public List<Individual> getAnswer() { 438 if (!isQuestion()) return null; 439 440 Ontology o = getOntology(); 441 if (answerCacheStateID != o.getStateID()) { 442 answerCache = o.getAnswer(this); 443 answerCacheStateID = o.getStateID(); 444 } 445 return new ArrayList<Individual>(answerCache); 446 } 447 448 /** 449 * Returns true if the sentence is a question and the answer to the question is cached and does 450 * not have to be recalculated. 451 * 452 * @return true if the answer is cached. 453 */ 454 public boolean isAnswerCached() { 455 if (!isQuestion()) return false; 456 return answerCacheStateID == getOntology().getStateID(); 457 } 458 459 private static String getUnderscoredText(TextContainer textContainer) { 460 String t = ""; 461 for (TextElement te : textContainer.getTextElements()) { 462 if (te instanceof OntologyTextElement) { 463 t += " " + ((OntologyTextElement) te).getUnderscoredText(); 464 } else if (te.getText().matches("[.?]")) { 465 t += te.getText(); 466 } else { 467 t += " " + te.getText(); 468 } 469 } 470 if (t.length() > 0) { 471 t = t.substring(1); 472 } 473 return t; 474 } 475 476 String serialize() { 477 if (textContainer == null) { 478 tokenize(); 479 } 480 String s; 481 if (integrated) { 482 s = "|"; 483 } else { 484 s = "#"; 485 } 486 for (TextElement te : textContainer.getTextElements()) { 487 if (te instanceof OntologyTextElement) { 488 OntologyTextElement ot = (OntologyTextElement) te; 489 s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">"; 490 } else { 491 s += " " + te.getText(); 492 } 493 } 494 return s + "\n"; 495 } 496 497 public String toString() { 498 return getText(); 499 } 500 501 }