001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.acewiki.core.ontology;
016    
017    import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018    import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019    import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020    import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023    
024    import java.util.ArrayList;
025    import java.util.Arrays;
026    import java.util.List;
027    
028    import org.semanticweb.owl.model.OWLOntology;
029    import org.semanticweb.owl.model.OWLOntologyCreationException;
030    
031    import ch.uzh.ifi.attempto.ape.ACEParserResult;
032    import ch.uzh.ifi.attempto.ape.APELocal;
033    import ch.uzh.ifi.attempto.ape.Lexicon;
034    import ch.uzh.ifi.attempto.ape.LexiconEntry;
035    import ch.uzh.ifi.attempto.ape.MessageContainer;
036    import ch.uzh.ifi.attempto.preditor.text.ContextChecker;
037    import ch.uzh.ifi.attempto.preditor.text.EnglishContextChecker;
038    import ch.uzh.ifi.attempto.preditor.text.TextContainer;
039    import ch.uzh.ifi.attempto.preditor.text.TextElement;
040    
041    /**
042     * This class represents an ACE sentence which is either a declarative statement or a question.
043     * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
044     * sentences have no OWL representation and do not participate in reasoning.
045     *<p>
046     * ACE sentences can either have an ontology element as owner (in the case of asserted sentences)
047     * or it can be an independent statement that has no owner (in the case of inferred sentences).
048     *<p>
049     * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
050     * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
051     * 
052     * @author Tobias Kuhn
053     */
054    public class Sentence extends Statement {
055            
056            /**
057             * The context checker used for AceWiki.
058             */
059            public static final ContextChecker contextChecker = new EnglishContextChecker(true, true);
060            
061            private String text;
062            private boolean integrated = false;
063            
064            // These fields are evaluated lazily:
065            private TextContainer textContainer;
066            private ACEParserResult parserResult;
067            private String owlxml;
068            private Boolean reasonerParticipant;
069            private Boolean isOWL;
070            private Boolean isOWLSWRL;
071            private OWLOntology owlOntology;
072            
073            private List<OntologyElement> answerCache;
074            private long answerCacheStateID = -1;
075            
076            /**
077             * Creates a new asserted sentence. Asserted sentences must have an owner.
078             * 
079             * @param text The sentence text.
080             * @param owner The owner ontology element.
081             */
082            public Sentence(String text, OntologyElement owner) {
083                    super(owner);
084                    setText(text);
085            }
086            
087            /**
088             * Creates a new inferred sentence. Inferred sentence have no owner.
089             * 
090             * @param text The sentence text.
091             * @param ontology The ontology.
092             */
093            public Sentence(String text, Ontology ontology) {
094                    super(ontology);
095                    setText(text);
096            }
097            
098            /**
099             * Generates sentence objects out of a text container.
100             * 
101             * @param textContainer The text container.
102             * @param owner The owner ontology element of the sentences.
103             * @return A list of sentences.
104             */
105            public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
106                    ArrayList<Sentence> l = new ArrayList<Sentence>();
107                    TextContainer c = new TextContainer(contextChecker);
108                    for (TextElement e : textContainer.getTextElements()) {
109                            c.addElement(e);
110                            if (e.getText().matches("[.?]")) {
111                                    l.add(new Sentence(getUnderscoredText(c), owner));
112                                    c = new TextContainer(contextChecker);
113                            }
114                    }
115                    return l;
116            }
117            
118            /**
119             * Returns a list of text elements that represent the tokens of this sentence.
120             * 
121             * @return A token list.
122             */
123            public List<TextElement> getTextElements() {
124                    if (textContainer == null) {
125                            tokenize();
126                    }
127                    return textContainer.getTextElements();
128            }
129            
130            private void setText(String text) {
131                    // remove trailing blank spaces.
132                    this.text = text.replaceFirst("\\s+$", "");
133            }
134            
135            /**
136             * Returns the sentence text as a string. Underscores are used for compound words,
137             * e.g. "credit_card".
138             * 
139             * @return The sentence text as a string.
140             */
141            public String getText() {
142                    if (textContainer == null) {
143                            tokenize();
144                    }
145                    return getUnderscoredText(textContainer);
146            }
147            
148            /**
149             * Returns the sentence text as a string with underscores displayed as blanks. Compound
150             * words containing underscores like "credit_cards" are pretty-printed with blank characters:
151             * "credit card".
152             * 
153             * @return The sentence text as a pretty-printed string.
154             */
155            public String getPrettyText() {
156                    return textContainer.getText();
157            }
158            
159            /**
160             * Returns the parser result object.
161             * 
162             * @return The parser result object.
163             */
164            public ACEParserResult getParserResult() {
165                    if (parserResult == null) {
166                            parse();
167                    }
168                    return parserResult;
169            }
170            
171            /**
172             * Returns the OWL/XML representation of this sentence as a string.
173             * 
174             * @return The OWL/XML representation.
175             */
176            public String getOWLXML() {
177                    if (owlxml == null) {
178                            parse();
179                    }
180                    return owlxml;
181            }
182            
183            /**
184             * Returns true if this sentence participates in reasoning.
185             * 
186             * @return true if this sentence participates in reasoning.
187             */
188            public boolean isReasonerParticipant() {
189                    if (reasonerParticipant == null) {
190                            parse();
191                    }
192                    return reasonerParticipant;
193            }
194            
195            /**
196             * Returns true if this sentence has an OWL representation.
197             * 
198             * @return true if this sentence has an OWL representation.
199             */
200            public boolean isOWL() {
201                    if (isOWL == null) {
202                            parse();
203                    }
204                    return isOWL;
205            }
206            
207            /**
208             * Returns true if this sentence has an OWL or SWRL representation.
209             * 
210             * @return true if this sentence has an OWL or SWRL representation.
211             */
212            public boolean isOWLSWRL() {
213                    if (isOWLSWRL == null) {
214                            parse();
215                    }
216                    return isOWLSWRL;
217            }
218            
219            /**
220             * Returns the OWL ontology object that contains the OWL representation of this
221             * sentence. Null is returned if there is no OWL representation of this sentence
222             * or if the creation of the OWL ontology object failed.
223             * 
224             * @return The OWL ontology object.
225             */
226            public OWLOntology getOWLOntology() {
227                    if (owlxml == null) {
228                            parse();
229                    }
230                    return owlOntology;
231            }
232            
233            /**
234             * Tokenizes the sentence text. A text container object is created.
235             */
236            private void tokenize() {
237                    textContainer = new TextContainer(contextChecker);
238                    
239                    String t = "&" + text + "&";
240                    t = t.replaceAll(" ", "&");
241                    t = t.replaceAll("\\.", "&.&");
242                    t = t.replaceAll("\\?", "&?&");
243                    t = t.replaceAll("&of&", " of&");
244                    t = t.replaceAll("&by&", " by&");
245                    
246                    ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
247                    
248                    while (tokens.contains("")) {
249                            tokens.remove("");
250                    }
251                    
252                    toString();
253                    
254                    for (String s : tokens) {
255                            if (s.startsWith("<")) {
256                                    OntologyTextElement te;
257                                    try {
258                                            long oeId = new Long(s.substring(1, s.indexOf(",")));
259                                            int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
260                                            OntologyElement oe = getOntology().get(oeId);
261                                            te = OntologyTextElement.createTextElement(oe, wordNumber);
262                                    } catch (Exception ex) {
263                                            throw new RuntimeException("Could not resolve link: " + s, ex);
264                                    }
265                                    if (te != null) {
266                                            textContainer.addElement(te);
267                                    } else {
268                                            throw new RuntimeException("Could not resolve link: " + s);
269                                    }
270                            } else {
271                                    OntologyElement oe = getOntology().get(s);
272                                    
273                                    if (oe == null) {
274                                            textContainer.addElement(new TextElement(s));
275                                    } else {
276                                            // not 100% clean solution (several word forms of the same word can be identical):
277                                            int wordId = Arrays.asList(oe.getWords()).indexOf(s);
278                                            if (oe instanceof Individual) {
279                                                    // this should probably be done at a different place...
280                                                    Individual ind = (Individual) oe;
281                                                    if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) {
282                                                            String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText();
283                                                            if (precedingText.equals("the") || precedingText.equals("The")) {
284                                                                    textContainer.removeLastElement();
285                                                                    wordId--;
286                                                            }
287                                                    }
288                                            }
289                                            textContainer.addElement(OntologyTextElement.createTextElement(oe, wordId));
290                                    }
291                            }
292                    }
293            }
294            
295            /**
296             * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
297             * This method is called automatically the first time a parsing result is needed.
298             * Furthermore, it needs to be called each time a word form of an ontology element
299             * (that occurs in the sentence) has changed.
300             */
301            synchronized void parse() {
302                    APELocal.getInstance().setURI(getOntology().getURI());
303                    APELocal.getInstance().setClexEnabled(false);
304                    Lexicon lexicon = new Lexicon();
305                    for (TextElement te : getTextElements()) {
306                            if (te instanceof OntologyTextElement) {
307                                    OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
308                                    for (LexiconEntry le : oe.getLexiconEntries()) {
309                                            lexicon.addEntry(le);
310                                    }
311                            }
312                    }
313                    parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
314                    MessageContainer mc = parserResult.getMessageContainer();
315                    owlxml = parserResult.get(OWLXML);
316                    if (owlxml != null) {
317                            // Every OWL ontology object needs its own URI:
318                            long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
319                            String uri = getOntology().getURI();
320                            owlxml = owlxml.replace("URI=\"" + uri + "\">", "URI=\"" + uri + "/" + hashCode + "\">");
321                    }
322                    reasonerParticipant =
323                            (mc.getMessages("owl").size() == 0) &&
324                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
325                            (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
326                            (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
327                            (owlxml.indexOf("<SubObjectPropertyChain>") < 0) &&
328                            (owlxml.length() > 0);
329                    isOWL =
330                            (mc.getMessages("owl").size() == 0) &&
331                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
332                            (owlxml.length() > 0);
333                    isOWLSWRL =
334                            (mc.getMessages("owl").size() == 0) &&
335                            (owlxml.length() > 0);
336                    owlOntology = null;
337                    if (isOWL) {
338                    try {
339                                    owlOntology = getOntology().readOWLOntology(owlxml);
340                                    if (owlOntology.isEmpty()) {
341                                            reasonerParticipant = false;
342                                            isOWL = false;
343                                            isOWLSWRL = false;
344                                    }
345                            } catch (OWLOntologyCreationException ex) {
346                                    ex.printStackTrace();
347                            }
348                    }
349                    if (isQuestion()) {
350                            reasonerParticipant = false;
351                    }
352                    String messages = mc.toString();
353                    if (messages.length() > 0) {
354                            System.err.println("Parser messages: " + messages);
355                    }
356            }
357            
358            /**
359             * This method tries to reassert a sentence that is not yet integrated. This is
360             * used for sentences that have an OWL representation but the integration failed
361             * because it introduced an inconsistency. Later, when the ontology has changed,
362             * the integration might succeed.
363             * 
364             * @return An integer value denoting the success/failure of the operation.
365             * @see Ontology#commitSentence(Sentence)
366             */
367            public int reassert() {
368                    int success = getOntology().commitSentence(this);
369                    getOntology().save(getOwner());
370                    return success;
371            }
372            
373            /**
374             * Returns true if the sentence is integrated into the ontology.
375             * 
376             * @return true if the sentence is integrated into the ontology.
377             */
378            public boolean isIntegrated() {
379                    return integrated;
380            }
381            
382            void setIntegrated(boolean integrated) {
383                    this.integrated = integrated;
384            }
385            
386            /**
387             * Returns true if the sentence is a question.
388             * 
389             * @return true if the sentence is a question.
390             */
391            public boolean isQuestion() {
392                    return text.substring(text.length()-1).equals("?");
393            }
394            
395            /**
396             * Checks if the sentence is inferred or asserted.
397             * 
398             * @return true if the sentence is inferred, false if it is asserted.
399             */
400            public boolean isInferred() {
401                    return getOwner() == null;
402            }
403            
404            /**
405             * Checks whether the sentence contains the given word form (by word number) of the
406             * given ontology element.
407             * 
408             * @param e The ontology element.
409             * @param wordNumber The word number.
410             * @return true if the word form occurs in this sentence.
411             */
412            public boolean contains(OntologyElement e, int wordNumber) {
413                    if (textContainer == null) {
414                            tokenize();
415                    }
416                    for (TextElement t : textContainer.getTextElements()) {
417                            if (t instanceof OntologyTextElement) {
418                                    OntologyTextElement ot = (OntologyTextElement) t;
419                                    if (e == ot.getOntologyElement() && wordNumber == -1) return true;
420                                    if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
421                            }
422                    }
423                    return false;
424            }
425    
426            /**
427             * Checks whether the sentence contains the given ontology element (no matter which
428             * word form).
429             * 
430             * @param e The ontology element.
431             * @return true if the ontology element occurs in this sentence.
432             */
433            public boolean contains(OntologyElement e) {
434                    return contains(e, -1);
435            }
436            
437            /**
438             * Returns all ontology elements that answer this question. In the case the sentence has the form
439             * "what is (Individual)?" then the answer contains all concepts the individual belongs to.
440             * Otherwise, the question is processed as a "DL Query" that describes a concept. In this case,
441             * the answer consists of all individuals that belong to the concept. 
442             * The null value is returned if the sentence is not a question.
443             * 
444             * @return A list of ontology elements that are the answer for the question.
445             * @see Ontology#getAnswer(Sentence)
446             */
447            public synchronized List<OntologyElement> getAnswer() {
448                    if (!isQuestion()) return null;
449                    
450                    Ontology o = getOntology();
451                    if (answerCacheStateID != o.getStateID()) {
452                            answerCache = o.getAnswer(this);
453                            answerCacheStateID = o.getStateID();
454                    }
455                    if (answerCache == null) {
456                            return null;
457                    } else {
458                            return new ArrayList<OntologyElement>(answerCache);
459                    }
460            }
461            
462            /**
463             * Returns the cached answer if the sentence is a question. Null is returned if the the sentence
464             * is no question or there is no cached answer. This returned answer might not be up-to-date.
465             * 
466             * @return A list of ontology elements that are the cached answer for the question.
467             */
468            public List<OntologyElement> getCachedAnswer() {
469                    if (!isQuestion() || answerCache == null) return null;
470                    return new ArrayList<OntologyElement>(answerCache);
471            }
472            
473            /**
474             * Returns true if the sentence is a question and the answer to the question is cached and up-to-date
475             * and thus does not have to be recalculated.
476             * 
477             * @return true if the answer is cached.
478             */
479            public boolean isAnswerCached() {
480                    if (!isQuestion()) return false;
481                    return answerCacheStateID == getOntology().getStateID();
482            }
483            
484            private static String getUnderscoredText(TextContainer textContainer) {
485                    String t = "";
486                    for (TextElement te : textContainer.getTextElements()) {
487                            if (te instanceof OntologyTextElement) {
488                                    t += " " + ((OntologyTextElement) te).getUnderscoredText();
489                            } else if (te.getText().matches("[.?]")) {
490                                    t += te.getText();
491                            } else {
492                                    t += " " + te.getText();
493                            }
494                    }
495                    if (t.length() > 0) {
496                            t = t.substring(1);
497                    }
498                    return t;
499            }
500            
501            String serialize() {
502                    if (textContainer == null) {
503                            tokenize();
504                    }
505                    String s;
506                    if (integrated) {
507                            s = "|";
508                    } else {
509                            s = "#";
510                    }
511                    for (TextElement te : textContainer.getTextElements()) {
512                            if (te instanceof OntologyTextElement) {
513                                    OntologyTextElement ot = (OntologyTextElement) te;
514                                    s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
515                            } else {
516                                    s += " " + te.getText();
517                            }
518                    }
519                    return s + "\n";
520            }
521            
522            public String toString() {
523                    return getText();
524            }
525    
526    }