001 // This file is part of AceWiki. 002 // Copyright 2008-2012, AceWiki developers. 003 // 004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU 005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of 006 // the License, or (at your option) any later version. 007 // 008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without 009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 010 // Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If 013 // not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.aceowl; 016 017 import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP; 018 import static ch.uzh.ifi.attempto.ape.OutputType.OWLFSSPP; 019 import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML; 020 import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1; 021 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX; 022 import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP; 023 024 import java.util.ArrayList; 025 import java.util.Arrays; 026 import java.util.HashSet; 027 import java.util.List; 028 import java.util.Set; 029 030 import org.apache.commons.lang.StringEscapeUtils; 031 import org.semanticweb.owlapi.apibinding.OWLManager; 032 import org.semanticweb.owlapi.io.StringDocumentSource; 033 import org.semanticweb.owlapi.model.OWLAxiom; 034 import org.semanticweb.owlapi.model.OWLOntology; 035 import org.semanticweb.owlapi.model.OWLOntologyCreationException; 036 import org.semanticweb.owlapi.model.OWLOntologyManager; 037 import org.semanticweb.owlapi.profiles.OWLProfile; 038 import org.semanticweb.owlapi.profiles.OWLProfileReport; 039 import org.semanticweb.owlapi.profiles.OWLProfileViolation; 040 041 import ch.uzh.ifi.attempto.acewiki.core.Declaration; 042 import ch.uzh.ifi.attempto.acewiki.core.MonolingualSentence; 043 import ch.uzh.ifi.attempto.acewiki.core.OntologyElement; 044 import ch.uzh.ifi.attempto.acewiki.core.OntologyTextElement; 045 import ch.uzh.ifi.attempto.acewiki.core.SentenceDetail; 046 import ch.uzh.ifi.attempto.acewiki.owl.AceWikiOWLReasoner; 047 import ch.uzh.ifi.attempto.acewiki.owl.OWLSentence; 048 import ch.uzh.ifi.attempto.ape.ACEParser; 049 import ch.uzh.ifi.attempto.ape.ACEParserResult; 050 import ch.uzh.ifi.attempto.ape.Lexicon; 051 import ch.uzh.ifi.attempto.ape.LexiconEntry; 052 import ch.uzh.ifi.attempto.ape.MessageContainer; 053 import ch.uzh.ifi.attempto.ape.SyntaxBoxes; 054 import ch.uzh.ifi.attempto.base.APE; 055 import ch.uzh.ifi.attempto.base.TextContainer; 056 import ch.uzh.ifi.attempto.base.TextElement; 057 058 /** 059 * This class represents an ACE sentence, which can be either a declarative sentence or a question. 060 * 061 * @author Tobias Kuhn 062 */ 063 public abstract class ACESentence extends MonolingualSentence implements OWLSentence { 064 065 private static OWLOntologyManager ontologyManager = OWLManager.createOWLOntologyManager(); 066 067 // This field is either initialized when the object is created, or otherwise unused: 068 private String serialized; 069 070 // Unless initialized when the object is created, this field is evaluated lazily: 071 private TextContainer textContainer; 072 073 // These fields are evaluated lazily: 074 private ACEParserResult parserResult; 075 private Boolean reasonable; 076 private Boolean isOWL; 077 private Boolean isOWLSWRL; 078 private Set<OWLAxiom> owlAxioms; 079 080 /** 081 * Initializes a new ACE sentence. 082 * 083 * @param serialized The serialized representation of the sentence. 084 */ 085 protected ACESentence(String serialized) { 086 this.serialized = serialized; 087 } 088 089 /** 090 * Initializes a new ACE sentence. 091 * 092 * @param textContainer The text container with the sentence text. 093 */ 094 protected ACESentence(TextContainer textContainer) { 095 this.textContainer = textContainer; 096 } 097 098 public List<TextElement> getTextElements() { 099 List<TextElement> list = new ArrayList<TextElement>(); 100 // TODO: this should be done in a different way 101 for (TextElement e : getTextContainer().getTextElements()) { 102 if (e instanceof OntologyTextElement) { 103 OntologyTextElement ote = (OntologyTextElement) e; 104 OntologyElement oe = ote.getOntologyElement(); 105 if (ote.getPreText().length() > 0) { 106 list.add(new TextElement(ote.getPreText())); 107 ote = new OntologyTextElement(oe, ote.getWordNumber()); 108 } 109 if (oe instanceof ProperNameIndividual) { 110 // Proper names with definite articles are handled differently: The "the" is 111 // not a part of the link. 112 ProperNameIndividual ind = (ProperNameIndividual) oe; 113 int wn = ote.getWordNumber(); 114 if (ind.hasDefiniteArticle(wn)) { 115 list.add(new TextElement(e.getText().substring(0, 3))); 116 list.add(new OntologyTextElement(ind, wn+1)); 117 } else { 118 list.add(ote); 119 } 120 } else { 121 list.add(ote); 122 } 123 } else { 124 list.add(e); 125 } 126 } 127 return list; 128 } 129 130 protected TextContainer getTextContainer() { 131 if (textContainer == null) { 132 tokenize(); 133 } 134 return textContainer; 135 } 136 137 private void tokenize() { 138 textContainer = new TextContainer(getTextOperator()); 139 140 // TODO Remove legacy code at some point 141 142 // Replace for legacy code below: 143 //List<String> tokens = Arrays.asList(serialized.split(" ")); 144 145 // This is legacy code to support old acewikidata files: 146 String t = "&" + serialized + "&"; 147 t = t.replaceAll(" ", "&"); 148 t = t.replaceAll("\\.", "&.&"); 149 t = t.replaceAll("\\?", "&?&"); 150 t = t.replaceAll("&of&", " of&"); 151 t = t.replaceAll("&by&", " by&"); 152 153 List<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&"))); 154 155 while (tokens.contains("")) { 156 tokens.remove(""); 157 } 158 // End of legacy code 159 160 for (String s : tokens) { 161 if (s.startsWith("<")) { 162 OntologyTextElement te; 163 try { 164 long oeId = new Long(s.substring(1, s.indexOf(","))); 165 int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">"))); 166 OntologyElement oe = getOntology().get(oeId); 167 te = new OntologyTextElement(oe, wordNumber); 168 } catch (Exception ex) { 169 throw new RuntimeException("Could not resolve link: " + s, ex); 170 } 171 textContainer.addElement(te); 172 } else { 173 TextElement te = getTextOperator().createTextElement(s); 174 if (!(te instanceof OntologyTextElement) || serialized.indexOf("<") > -1) { 175 textContainer.addElement(te); 176 } else { 177 // This is legacy code to support old acewikidata files: 178 OntologyTextElement ote = (OntologyTextElement) te; 179 OntologyElement oe = ote.getOntologyElement(); 180 int wordId = ote.getWordNumber(); 181 if (oe instanceof ProperNameIndividual) { 182 ProperNameIndividual ind = (ProperNameIndividual) oe; 183 if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) { 184 String precedingText = textContainer. 185 getTextElement(textContainer.getTextElementsCount()-1). 186 getText(); 187 if (precedingText.equals("the") || precedingText.equals("The")) { 188 textContainer.removeLastElement(); 189 wordId--; 190 } 191 } 192 } 193 textContainer.addElement(new OntologyTextElement(oe, wordId)); 194 // End of legacy code 195 } 196 } 197 } 198 } 199 200 /** 201 * Returns the parser result object. 202 * 203 * @return The parser result object. 204 */ 205 public ACEParserResult getParserResult() { 206 if (parserResult == null) { 207 update(); 208 } 209 return parserResult; 210 } 211 212 public String getPrettyOWL() { 213 if (parserResult == null) { 214 update(); 215 } 216 return parserResult.get(OWLFSSPP); 217 } 218 219 public boolean isReasonable() { 220 if (reasonable == null) { 221 update(); 222 } 223 return reasonable; 224 } 225 226 public boolean isOWL() { 227 if (isOWL == null) { 228 update(); 229 } 230 return isOWL; 231 } 232 233 public boolean isOWLSWRL() { 234 if (isOWLSWRL == null) { 235 update(); 236 } 237 return isOWLSWRL; 238 } 239 240 public Set<OWLAxiom> getOWLAxioms() { 241 if (parserResult == null) { 242 update(); 243 } 244 if (owlAxioms == null) { 245 owlAxioms = new HashSet<OWLAxiom>(); 246 } 247 return owlAxioms; 248 } 249 250 public void update() { 251 // TODO: refactor and clean-up! 252 AceWikiOWLReasoner reasoner = (AceWikiOWLReasoner) getOntology() 253 .getReasoner().getWrappedReasoner(); 254 255 ACEParser ape = APE.getParser(); 256 synchronized (ape) { 257 ape.setURI(getOntology().getURI()); 258 ape.setClexEnabled(false); 259 Lexicon lexicon = new Lexicon(); 260 for (TextElement te : getTextContainer().getTextElements()) { 261 if (te instanceof OntologyTextElement) { 262 OntologyElement oe = ((OntologyTextElement) te).getOntologyElement(); 263 if (oe instanceof ACEOWLOntoElement) { 264 for (LexiconEntry le : ((ACEOWLOntoElement) oe).getLexiconEntries()) { 265 lexicon.addEntry(le); 266 } 267 } 268 } 269 } 270 parserResult = ape.getMultiOutput( 271 getText(), 272 lexicon, 273 PARAPHRASE1, 274 SYNTAX, 275 SYNTAXPP, 276 OWLXML, 277 OWLFSSPP, 278 DRSPP 279 ); 280 } 281 MessageContainer mc = parserResult.getMessageContainer(); 282 String owlxml = parserResult.get(OWLXML); 283 284 isOWLSWRL = 285 (mc.getMessages("owl").size() == 0) && 286 (owlxml.length() > 0); 287 288 isOWL = isOWLSWRL && 289 (owlxml.indexOf("<swrl:Imp>") < 0) && 290 (owlxml.indexOf("<DLSafeRule>") < 0); 291 292 if (isOWL && reasoner.getGlobalRestrictionsPolicy().equals("no_chains")) { 293 reasonable = 294 (owlxml.indexOf("<TransitiveObjectProperty>") < 0) && 295 (owlxml.indexOf("<ObjectPropertyChain>") < 0); 296 } else { 297 reasonable = isOWL; 298 } 299 300 owlAxioms = null; 301 OWLOntology owlOntology = null; 302 if (isOWL) { 303 try { 304 owlOntology = ontologyManager.loadOntologyFromOntologyDocument( 305 new StringDocumentSource(owlxml) 306 ); 307 if (owlOntology.isEmpty()) { 308 reasonable = false; 309 isOWL = false; 310 isOWLSWRL = false; 311 } else { 312 owlAxioms = owlOntology.getAxioms(); 313 } 314 } catch (OWLOntologyCreationException ex) { 315 ex.printStackTrace(); 316 } 317 } 318 OWLProfile owlProfile = reasoner.getOWLProfile(); 319 if (reasonable && owlOntology != null && owlProfile != null && this instanceof Declaration) { 320 OWLProfileReport r = owlProfile.checkOntology(owlOntology); 321 for (OWLProfileViolation v : r.getViolations()) { 322 if (!v.toString().startsWith("Use of undeclared")) { 323 reasonable = false; 324 break; 325 } 326 } 327 } 328 if (owlOntology != null) { 329 ontologyManager.removeOntology(owlOntology); 330 } 331 if (!reasonable && isIntegrated()) { 332 super.setIntegrated(false); 333 } 334 //String messages = mc.toString(); 335 //if (messages.length() > 0) { 336 // System.err.println("Parser messages: " + messages); 337 //} 338 } 339 340 public void setIntegrated(boolean integrated) { 341 if (integrated && reasonable != null && !reasonable) { 342 super.setIntegrated(false); 343 } else { 344 super.setIntegrated(integrated); 345 } 346 } 347 348 public boolean contains(OntologyElement e) { 349 for (TextElement t : getTextContainer().getTextElements()) { 350 if (t instanceof OntologyTextElement) { 351 if (e == ((OntologyTextElement) t).getOntologyElement()) return true; 352 } 353 } 354 return false; 355 } 356 357 public String serialize() { 358 String s = ""; 359 for (TextElement te : getTextContainer().getTextElements()) { 360 if (te instanceof OntologyTextElement) { 361 OntologyTextElement ot = (OntologyTextElement) te; 362 s += ot.getPreText(); 363 s += "<" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + "> "; 364 s += ot.getPostText(); 365 } else { 366 s += te.getText() + " "; 367 } 368 } 369 s = s.replaceAll(" $", ""); 370 return s; 371 } 372 373 public List<SentenceDetail> getDetails() { 374 List<SentenceDetail> l = new ArrayList<SentenceDetail>(); 375 l.add(new SentenceDetail( 376 "Paraphrase", 377 StringEscapeUtils.escapeHtml(getParserResult().get(PARAPHRASE1)) 378 )); 379 l.add(new SentenceDetail( 380 "Syntax Boxes", 381 SyntaxBoxes.getBoxesHtml(getParserResult()) 382 )); 383 l.add(new SentenceDetail( 384 "Syntax Tree", 385 "<pre>" + getParserResult().get(SYNTAXPP) + "</pre>" 386 )); 387 l.add(new SentenceDetail( 388 "Logical representation", 389 "<i><pre>" + StringEscapeUtils.escapeHtml(getParserResult().get(DRSPP)) + "</pre></i>" 390 )); 391 if (isOWLSWRL()) { 392 l.add(new SentenceDetail( 393 "OWL", 394 "<i><pre>" + StringEscapeUtils.escapeHtml(getPrettyOWL()) + "</pre></i>" 395 )); 396 } 397 return l; 398 } 399 400 }