001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.acewiki.core.ontology;
016    
017    import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018    import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019    import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020    import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023    
024    import java.util.ArrayList;
025    import java.util.Arrays;
026    import java.util.List;
027    
028    import org.semanticweb.owl.model.OWLOntology;
029    import org.semanticweb.owl.model.OWLOntologyCreationException;
030    
031    import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement;
032    import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory;
033    import ch.uzh.ifi.attempto.ape.ACEParserResult;
034    import ch.uzh.ifi.attempto.ape.APELocal;
035    import ch.uzh.ifi.attempto.ape.Lexicon;
036    import ch.uzh.ifi.attempto.ape.LexiconEntry;
037    import ch.uzh.ifi.attempto.ape.MessageContainer;
038    import ch.uzh.ifi.attempto.preditor.text.BasicTextElement;
039    import ch.uzh.ifi.attempto.preditor.text.TextContainer;
040    import ch.uzh.ifi.attempto.preditor.text.TextElement;
041    
042    /**
043     * This class represents an ACE sentence which is either a declarative statement or a question.
044     * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
045     * sentences have no OWL representation and do not participate in reasoning.
046     *<p>
047     * ACE sentences can either have an ontology element as owner (in the case of asserted sentences)
048     * or it can be an independent statement that has no owner (in the case of inferred sentences).
049     *<p>
050     * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
051     * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
052     * 
053     * @author Tobias Kuhn
054     */
055    public class Sentence extends Statement {
056            
057            private String text;
058            private boolean integrated = false;
059            
060            // These fields are evaluated lazily:
061            private TextContainer textContainer;
062            private ACEParserResult parserResult;
063            private String owlxml;
064            private Boolean reasonerParticipant;
065            private Boolean isOWL;
066            private Boolean isOWLSWRL;
067            private OWLOntology owlOntology;
068            
069            private List<OntologyElement> answerCache;
070            private long answerCacheStateID = -1;
071            
072            /**
073             * Creates a new asserted sentence. Asserted sentences must have an owner.
074             * 
075             * @param text The sentence text.
076             * @param owner The owner ontology element.
077             */
078            public Sentence(String text, OntologyElement owner) {
079                    super(owner);
080                    setText(text);
081            }
082            
083            /**
084             * Creates a new inferred sentence. Inferred sentence have no owner.
085             * 
086             * @param text The sentence text.
087             * @param ontology The ontology.
088             */
089            public Sentence(String text, Ontology ontology) {
090                    super(ontology);
091                    setText(text);
092            }
093            
094            /**
095             * Generates sentence objects out of a text container.
096             * 
097             * @param textContainer The text container.
098             * @param owner The owner ontology element of the sentences.
099             * @return A list of sentences.
100             */
101            public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
102                    ArrayList<Sentence> l = new ArrayList<Sentence>();
103                    TextContainer c = new TextContainer();
104                    for (TextElement e : textContainer.getTextElements()) {
105                            c.addElement(e);
106                            if (e.getText().matches("[.?]")) {
107                                    l.add(new Sentence(getUnderscoredText(c), owner));
108                                    c = new TextContainer();
109                            }
110                    }
111                    return l;
112            }
113            
114            /**
115             * Returns a list of text elements that represent the tokens of this sentence.
116             * 
117             * @return A token list.
118             */
119            public List<TextElement> getTextElements() {
120                    if (textContainer == null) {
121                            tokenize();
122                    }
123                    textContainer.updateConnections();
124                    return textContainer.getTextElements();
125            }
126            
127            private void setText(String text) {
128                    // remove trailing blank spaces.
129                    this.text = text.replaceFirst("\\s+$", "");
130            }
131            
132            /**
133             * Returns the sentence text as a string. Underscores are used for compound words,
134             * e.g. "credit_card".
135             * 
136             * @return The sentence text as a string.
137             */
138            public String getText() {
139                    if (textContainer == null) {
140                            tokenize();
141                    }
142                    return getUnderscoredText(textContainer);
143            }
144            
145            /**
146             * Returns the sentence text as a string with underscores displayed as blanks. Compound
147             * words containing underscores like "credit_cards" are pretty-printed with blank characters:
148             * "credit card".
149             * 
150             * @return The sentence text as a pretty-printed string.
151             */
152            public String getPrettyText() {
153                    return textContainer.getText();
154            }
155            
156            /**
157             * Returns the parser result object.
158             * 
159             * @return The parser result object.
160             */
161            public ACEParserResult getParserResult() {
162                    if (parserResult == null) {
163                            parse();
164                    }
165                    return parserResult;
166            }
167            
168            /**
169             * Returns the OWL/XML representation of this sentence as a string.
170             * 
171             * @return The OWL/XML representation.
172             */
173            public String getOWLXML() {
174                    if (owlxml == null) {
175                            parse();
176                    }
177                    return owlxml;
178            }
179            
180            /**
181             * Returns true if this sentence participates in reasoning.
182             * 
183             * @return true if this sentence participates in reasoning.
184             */
185            public boolean isReasonerParticipant() {
186                    if (reasonerParticipant == null) {
187                            parse();
188                    }
189                    return reasonerParticipant;
190            }
191            
192            /**
193             * Returns true if this sentence has an OWL representation.
194             * 
195             * @return true if this sentence has an OWL representation.
196             */
197            public boolean isOWL() {
198                    if (isOWL == null) {
199                            parse();
200                    }
201                    return isOWL;
202            }
203            
204            /**
205             * Returns true if this sentence has an OWL or SWRL representation.
206             * 
207             * @return true if this sentence has an OWL or SWRL representation.
208             */
209            public boolean isOWLSWRL() {
210                    if (isOWLSWRL == null) {
211                            parse();
212                    }
213                    return isOWLSWRL;
214            }
215            
216            /**
217             * Returns the OWL ontology object that contains the OWL representation of this
218             * sentence. Null is returned if there is no OWL representation of this sentence
219             * or if the creation of the OWL ontology object failed.
220             * 
221             * @return The OWL ontology object.
222             */
223            public OWLOntology getOWLOntology() {
224                    if (owlxml == null) {
225                            parse();
226                    }
227                    return owlOntology;
228            }
229            
230            /**
231             * Tokenizes the sentence text. A text container object is created.
232             */
233            private void tokenize() {
234                    textContainer = new TextContainer();
235                    
236                    String t = "&" + text + "&";
237                    t = t.replaceAll(" ", "&");
238                    t = t.replaceAll("\\.", "&.&");
239                    t = t.replaceAll("\\?", "&?&");
240                    t = t.replaceAll("&of&", " of&");
241                    t = t.replaceAll("&by&", " by&");
242                    
243                    ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
244                    
245                    while (tokens.contains("")) {
246                            tokens.remove("");
247                    }
248                    
249                    toString();
250                    
251                    for (String s : tokens) {
252                            if (s.startsWith("<")) {
253                                    OntologyTextElement te;
254                                    try {
255                                            long oeId = new Long(s.substring(1, s.indexOf(",")));
256                                            int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
257                                            OntologyElement oe = getOntology().get(oeId);
258                                            te = TextElemFactory.createTextElement(oe, wordNumber);
259                                    } catch (Exception ex) {
260                                            throw new RuntimeException("Could not resolve link: " + s, ex);
261                                    }
262                                    if (te != null) {
263                                            textContainer.addElement(te);
264                                    } else {
265                                            throw new RuntimeException("Could not resolve link: " + s);
266                                    }
267                            } else {
268                                    OntologyElement oe = getOntology().get(s);
269                                    
270                                    if (oe == null) {
271                                            textContainer.addElement(new BasicTextElement(s));
272                                    } else {
273                                            // not 100% clean solution (several word forms of the same word can be identical):
274                                            int wordId = Arrays.asList(oe.getWords()).indexOf(s);
275                                            if (oe instanceof Individual) {
276                                                    // this should probably be done at a different place...
277                                                    Individual ind = (Individual) oe;
278                                                    if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) {
279                                                            String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText();
280                                                            if (precedingText.equals("the") || precedingText.equals("The")) {
281                                                                    textContainer.removeLastElement();
282                                                                    wordId--;
283                                                            }
284                                                    }
285                                            }
286                                            textContainer.addElement(TextElemFactory.createTextElement(oe, wordId));
287                                    }
288                            }
289                    }
290            }
291            
292            /**
293             * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
294             * This method is called automatically the first time a parsing result is needed.
295             * Furthermore, it needs to be called each time a word form of an ontology element
296             * (that occurs in the sentence) has changed.
297             */
298            synchronized void parse() {
299                    APELocal.getInstance().setURI(getOntology().getURI());
300                    APELocal.getInstance().setClexEnabled(false);
301                    Lexicon lexicon = new Lexicon();
302                    for (TextElement te : getTextElements()) {
303                            if (te instanceof OntologyTextElement) {
304                                    OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
305                                    for (LexiconEntry le : oe.getLexiconEntries()) {
306                                            lexicon.addEntry(le);
307                                    }
308                            }
309                    }
310                    parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
311                    MessageContainer mc = parserResult.getMessageContainer();
312                    owlxml = parserResult.get(OWLXML);
313                    if (owlxml != null) {
314                            // Every OWL ontology object needs its own URI:
315                            long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
316                            String uri = getOntology().getURI();
317                            owlxml = owlxml.replace("URI=\"" + uri + "\">", "URI=\"" + uri + "/" + hashCode + "\">");
318                    }
319                    reasonerParticipant =
320                            (mc.getMessages("owl").size() == 0) &&
321                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
322                            (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
323                            (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
324                            (owlxml.indexOf("<SubObjectPropertyChain>") < 0) &&
325                            (owlxml.length() > 0);
326                    isOWL =
327                            (mc.getMessages("owl").size() == 0) &&
328                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
329                            (owlxml.length() > 0);
330                    isOWLSWRL =
331                            (mc.getMessages("owl").size() == 0) &&
332                            (owlxml.length() > 0);
333                    owlOntology = null;
334                    if (isOWL) {
335                    try {
336                                    owlOntology = getOntology().readOWLOntology(owlxml);
337                                    if (owlOntology.isEmpty()) {
338                                            reasonerParticipant = false;
339                                            isOWL = false;
340                                            isOWLSWRL = false;
341                                    }
342                            } catch (OWLOntologyCreationException ex) {
343                                    ex.printStackTrace();
344                            }
345                    }
346                    if (isQuestion()) {
347                            reasonerParticipant = false;
348                    }
349                    String messages = mc.toString();
350                    if (messages.length() > 0) {
351                            System.err.println("Parser messages: " + messages);
352                    }
353            }
354            
355            /**
356             * This method tries to reassert a sentence that is not yet integrated. This is
357             * used for sentences that have an OWL representation but the integration failed
358             * because it introduced an inconsistency. Later, when the ontology has changed,
359             * the integration might succeed.
360             * 
361             * @return An integer value denoting the success/failure of the operation.
362             * @see Ontology#commitSentence(Sentence)
363             */
364            public int reassert() {
365                    int success = getOntology().commitSentence(this);
366                    getOntology().save(getOwner());
367                    return success;
368            }
369            
370            /**
371             * Returns true if the sentence is integrated into the ontology.
372             * 
373             * @return true if the sentence is integrated into the ontology.
374             */
375            public boolean isIntegrated() {
376                    return integrated;
377            }
378            
379            void setIntegrated(boolean integrated) {
380                    this.integrated = integrated;
381            }
382            
383            /**
384             * Returns true if the sentence is a question.
385             * 
386             * @return true if the sentence is a question.
387             */
388            public boolean isQuestion() {
389                    return text.substring(text.length()-1).equals("?");
390            }
391            
392            /**
393             * Checks if the sentence is inferred or asserted.
394             * 
395             * @return true if the sentence is inferred, false if it is asserted.
396             */
397            public boolean isInferred() {
398                    return getOwner() == null;
399            }
400            
401            /**
402             * Checks whether the sentence contains the given word form (by word number) of the
403             * given ontology element.
404             * 
405             * @param e The ontology element.
406             * @param wordNumber The word number.
407             * @return true if the word form occurs in this sentence.
408             */
409            public boolean contains(OntologyElement e, int wordNumber) {
410                    if (textContainer == null) {
411                            tokenize();
412                    }
413                    for (TextElement t : textContainer.getTextElements()) {
414                            if (t instanceof OntologyTextElement) {
415                                    OntologyTextElement ot = (OntologyTextElement) t;
416                                    if (e == ot.getOntologyElement() && wordNumber == -1) return true;
417                                    if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
418                            }
419                    }
420                    return false;
421            }
422    
423            /**
424             * Checks whether the sentence contains the given ontology element (no matter which
425             * word form).
426             * 
427             * @param e The ontology element.
428             * @return true if the ontology element occurs in this sentence.
429             */
430            public boolean contains(OntologyElement e) {
431                    return contains(e, -1);
432            }
433            
434            /**
435             * Returns all ontology elements that answer this question. In the case the sentence has the form
436             * "what is (Individual)?" then the answer contains all concepts the individual belongs to.
437             * Otherwise, the question is processed as a "DL Query" that describes a concept. In this case,
438             * the answer consists of all individuals that belong to the concept. 
439             * The null value is returned if the sentence is not a question.
440             * 
441             * @return A list of ontology elements that are the answer for the question.
442             * @see Ontology#getAnswer(Sentence)
443             */
444            public synchronized List<OntologyElement> getAnswer() {
445                    if (!isQuestion()) return null;
446                    
447                    Ontology o = getOntology();
448                    if (answerCacheStateID != o.getStateID()) {
449                            answerCache = o.getAnswer(this);
450                            answerCacheStateID = o.getStateID();
451                    }
452                    if (answerCache == null) {
453                            return null;
454                    } else {
455                            return new ArrayList<OntologyElement>(answerCache);
456                    }
457            }
458            
459            /**
460             * Returns the cached answer if the sentence is a question. Null is returned if the the sentence
461             * is no question or there is no cached answer. This returned answer might not be up-to-date.
462             * 
463             * @return A list of ontology elements that are the cached answer for the question.
464             */
465            public List<OntologyElement> getCachedAnswer() {
466                    if (!isQuestion() || answerCache == null) return null;
467                    return new ArrayList<OntologyElement>(answerCache);
468            }
469            
470            /**
471             * Returns true if the sentence is a question and the answer to the question is cached and up-to-date
472             * and thus does not have to be recalculated.
473             * 
474             * @return true if the answer is cached.
475             */
476            public boolean isAnswerCached() {
477                    if (!isQuestion()) return false;
478                    return answerCacheStateID == getOntology().getStateID();
479            }
480            
481            private static String getUnderscoredText(TextContainer textContainer) {
482                    String t = "";
483                    for (TextElement te : textContainer.getTextElements()) {
484                            if (te instanceof OntologyTextElement) {
485                                    t += " " + ((OntologyTextElement) te).getUnderscoredText();
486                            } else if (te.getText().matches("[.?]")) {
487                                    t += te.getText();
488                            } else {
489                                    t += " " + te.getText();
490                            }
491                    }
492                    if (t.length() > 0) {
493                            t = t.substring(1);
494                    }
495                    return t;
496            }
497            
498            String serialize() {
499                    if (textContainer == null) {
500                            tokenize();
501                    }
502                    String s;
503                    if (integrated) {
504                            s = "|";
505                    } else {
506                            s = "#";
507                    }
508                    for (TextElement te : textContainer.getTextElements()) {
509                            if (te instanceof OntologyTextElement) {
510                                    OntologyTextElement ot = (OntologyTextElement) te;
511                                    s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
512                            } else {
513                                    s += " " + te.getText();
514                            }
515                    }
516                    return s + "\n";
517            }
518            
519            public String toString() {
520                    return getText();
521            }
522    
523    }