001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.acewiki.core.ontology;
016    
017    import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018    import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019    import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020    import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023    
024    import java.util.ArrayList;
025    import java.util.Arrays;
026    import java.util.List;
027    
028    import org.semanticweb.owl.model.OWLOntology;
029    import org.semanticweb.owl.model.OWLOntologyCreationException;
030    
031    import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement;
032    import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory;
033    import ch.uzh.ifi.attempto.ape.ACEParserResult;
034    import ch.uzh.ifi.attempto.ape.APELocal;
035    import ch.uzh.ifi.attempto.ape.Lexicon;
036    import ch.uzh.ifi.attempto.ape.LexiconEntry;
037    import ch.uzh.ifi.attempto.ape.MessageContainer;
038    import ch.uzh.ifi.attempto.preditor.text.BasicTextElement;
039    import ch.uzh.ifi.attempto.preditor.text.TextContainer;
040    import ch.uzh.ifi.attempto.preditor.text.TextElement;
041    
042    /**
043     * This class represents an ACE sentence which is either a declarative statement or a question.
044     * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
045     * sentences have no OWL representation and do not participate in reasoning.
046     *<p>
047     * Each sentence belongs to exactly one article of an ontology element (the owner).
048     *<p>
049     * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
050     * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
051     * 
052     * @author Tobias Kuhn
053     */
054    public class Sentence {
055            
056            private String text;
057            private Ontology ontology;
058            private OntologyElement owner;
059            private boolean integrated = false;
060            
061            // These fields are evaluated lazily:
062            private TextContainer textContainer;
063            private ACEParserResult parserResult;
064            private String owlxml;
065            private Boolean reasonerParticipant;
066            private Boolean isOWL;
067            private Boolean isOWLSWRL;
068            private OWLOntology owlOntology;
069            
070            private List<Individual> answerCache;
071            private long answerCacheStateID = -1;
072            
073            /**
074             * Creates a new asserted sentence. Asserted sentences must have an owner.
075             * 
076             * @param text The sentence text.
077             * @param owner The owner ontology element.
078             */
079            public Sentence(String text, OntologyElement owner) {
080                    this.ontology = null;
081                    this.owner = owner;
082                    setText(text);
083            }
084            
085            /**
086             * Creates a new inferred sentence. Inferred sentence have no owner.
087             * 
088             * @param text The sentence text.
089             * @param ontology The ontology.
090             */
091            public Sentence(String text, Ontology ontology) {
092                    this.ontology = ontology;
093                    this.owner = null;
094                    setText(text);
095            }
096            
097            /**
098             * Generates sentence objects out of a text container.
099             * 
100             * @param textContainer The text container.
101             * @param owner The owner ontology element of the sentences.
102             * @return A list of sentences.
103             */
104            public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
105                    ArrayList<Sentence> l = new ArrayList<Sentence>();
106                    TextContainer c = new TextContainer();
107                    for (TextElement e : textContainer.getTextElements()) {
108                            c.addElement(e);
109                            if (e.getText().matches("[.?]")) {
110                                    l.add(new Sentence(getUnderscoredText(c), owner));
111                                    c = new TextContainer();
112                            }
113                    }
114                    return l;
115            }
116            
117            /**
118             * Loads a sentence from a serialized form.
119             * 
120             * @param serializedSentence The serialized sentence as a string.
121             * @param owner The owner ontology element of the sentence.
122             * @return A sentence object.
123             */
124            static Sentence loadSentence(String serializedSentence, OntologyElement owner) {
125                    Sentence sentence = new Sentence(serializedSentence.substring(2), owner);
126                    sentence.setIntegrated(serializedSentence.charAt(0) == '|');
127                    return sentence;
128            }
129            
130            private Ontology getOntology() {
131                    if (ontology == null) {
132                            ontology = owner.getOntology();
133                    }
134                    return ontology;
135            }
136            
137            /**
138             * Returns a list of text elements that represent the tokens of this sentence.
139             * 
140             * @return A token list.
141             */
142            public List<TextElement> getTextElements() {
143                    if (textContainer == null) {
144                            tokenize();
145                    }
146                    textContainer.updateConnections();
147                    return textContainer.getTextElements();
148            }
149            
150            /**
151             * Returns the owner ontology element of this sentence.
152             * 
153             * @return The owner ontology element.
154             */
155            public OntologyElement getOwner() {
156                    return owner;
157            }
158            
159            private void setText(String text) {
160                    // remove trailing blank spaces.
161                    this.text = text.replaceFirst("\\s+$", "");
162            }
163            
164            /**
165             * Returns the sentence text as a string. Underscores are used for compound words,
166             * e.g. "credit_card".
167             * 
168             * @return The sentence text as a string.
169             */
170            public String getText() {
171                    if (textContainer == null) {
172                            tokenize();
173                    }
174                    return getUnderscoredText(textContainer);
175            }
176            
177            /**
178             * Returns the sentence text as a string with underscores displayed as blanks. Compound
179             * words containing underscores like "credit_cards" are pretty-printed with blank characters:
180             * "credit card".
181             * 
182             * @return The sentence text as a pretty-printed string.
183             */
184            public String getPrettyText() {
185                    return textContainer.getText();
186            }
187            
188            /**
189             * Returns the parser result object.
190             * 
191             * @return The parser result object.
192             */
193            public ACEParserResult getParserResult() {
194                    if (parserResult == null) {
195                            parse();
196                    }
197                    return parserResult;
198            }
199            
200            /**
201             * Returns the OWL/XML representation of this sentence as a string.
202             * 
203             * @return The OWL/XML representation.
204             */
205            public String getOWLXML() {
206                    if (owlxml == null) {
207                            parse();
208                    }
209                    return owlxml;
210            }
211            
212            /**
213             * Returns true if this sentence participates in reasoning.
214             * 
215             * @return true if this sentence participates in reasoning.
216             */
217            public boolean isReasonerParticipant() {
218                    if (reasonerParticipant == null) {
219                            parse();
220                    }
221                    return reasonerParticipant;
222            }
223            
224            /**
225             * Returns true if this sentence has an OWL representation.
226             * 
227             * @return true if this sentence has an OWL representation.
228             */
229            public boolean isOWL() {
230                    if (isOWL == null) {
231                            parse();
232                    }
233                    return isOWL;
234            }
235            
236            /**
237             * Returns true if this sentence has an OWL or SWRL representation.
238             * 
239             * @return true if this sentence has an OWL or SWRL representation.
240             */
241            public boolean isOWLSWRL() {
242                    if (isOWLSWRL == null) {
243                            parse();
244                    }
245                    return isOWLSWRL;
246            }
247            
248            /**
249             * Returns the OWL ontology object that contains the OWL representation of this
250             * sentence. Null is returned if there is no OWL representation of this sentence
251             * or if the creation of the OWL ontology object failed.
252             * 
253             * @return The OWL ontology object.
254             */
255            public OWLOntology getOWLOntology() {
256                    if (owlxml == null) {
257                            parse();
258                    }
259                    return owlOntology;
260            }
261            
262            /**
263             * Tokenizes the sentence text. A text container object is created.
264             */
265            private void tokenize() {
266                    textContainer = new TextContainer();
267                    
268                    String t = "&" + text + "&";
269                    t = t.replaceAll(" ", "&");
270                    t = t.replaceAll("\\.", "&.&");
271                    t = t.replaceAll("\\?", "&?&");
272                    t = t.replaceAll("&of&", " of&");
273                    t = t.replaceAll("&by&", " by&");
274                    
275                    ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
276                    
277                    while (tokens.contains("")) {
278                            tokens.remove("");
279                    }
280                    
281                    toString();
282                    
283                    for (String s : tokens) {
284                            if (s.startsWith("<")) {
285                                    OntologyTextElement te;
286                                    try {
287                                            long oeId = new Long(s.substring(1, s.indexOf(",")));
288                                            int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
289                                            OntologyElement oe = getOntology().get(oeId);
290                                            te = TextElemFactory.createTextElement(oe, wordNumber);
291                                    } catch (Exception ex) {
292                                            throw new RuntimeException("Could not resolve link: " + s, ex);
293                                    }
294                                    if (te != null) {
295                                            textContainer.addElement(te);
296                                    } else {
297                                            throw new RuntimeException("Could not resolve link: " + s);
298                                    }
299                            } else {
300                                    OntologyElement oe = getOntology().get(s);
301                                    
302                                    if (oe == null) {
303                                            textContainer.addElement(new BasicTextElement(s));
304                                    } else {
305                                            // not 100% clean solution (several word forms of the same word can be identical):
306                                            int wordId = Arrays.asList(oe.getWords()).indexOf(s);
307                                            if (oe instanceof Individual) {
308                                                    // this should probably be done at a different place...
309                                                    Individual ind = (Individual) oe;
310                                                    if (ind.hasDefiniteArticle(wordId-1) && textContainer.getTextElementsCount() > 0) {
311                                                            String precedingText = textContainer.getTextElement(textContainer.getTextElementsCount()-1).getText();
312                                                            if (precedingText.equals("the") || precedingText.equals("The")) {
313                                                                    textContainer.removeLastElement();
314                                                                    wordId--;
315                                                            }
316                                                    }
317                                            }
318                                            textContainer.addElement(TextElemFactory.createTextElement(oe, wordId));
319                                    }
320                            }
321                    }
322            }
323            
324            /**
325             * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
326             * This method is called automatically the first time a parsing result is needed.
327             * Furthermore, it needs to be called each time a word form of an ontology element
328             * (that occurs in the sentence) has changed.
329             */
330            synchronized void parse() {
331                    APELocal.getInstance().setURI(getOntology().getURI());
332                    APELocal.getInstance().setClexEnabled(false);
333                    Lexicon lexicon = new Lexicon();
334                    for (TextElement te : getTextElements()) {
335                            if (te instanceof OntologyTextElement) {
336                                    OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
337                                    for (LexiconEntry le : oe.getLexiconEntries()) {
338                                            lexicon.addEntry(le);
339                                    }
340                            }
341                    }
342                    parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
343                    MessageContainer mc = parserResult.getMessageContainer();
344                    owlxml = parserResult.get(OWLXML);
345                    if (owlxml != null) {
346                            // Every OWL ontology object needs its own URI:
347                            long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
348                            owlxml = owlxml.replace("URI=\"" + ontology.getURI() + "\">", "URI=\"" + ontology.getURI() + "/" + hashCode + "\">");
349                    }
350                    reasonerParticipant =
351                            (mc.getMessages("owl").size() == 0) &&
352                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
353                            (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
354                            (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
355                            (owlxml.indexOf("<SubObjectPropertyChain>") < 0) &&
356                            (owlxml.length() > 0);
357                    isOWL =
358                            (mc.getMessages("owl").size() == 0) &&
359                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
360                            (owlxml.length() > 0);
361                    isOWLSWRL =
362                            (mc.getMessages("owl").size() == 0) &&
363                            (owlxml.length() > 0);
364                    owlOntology = null;
365                    if (isOWL) {
366                    try {
367                                    owlOntology = getOntology().readOWLOntology(owlxml);
368                                    if (owlOntology.isEmpty()) {
369                                            reasonerParticipant = false;
370                                            isOWL = false;
371                                            isOWLSWRL = false;
372                                    }
373                            } catch (OWLOntologyCreationException ex) {
374                                    ex.printStackTrace();
375                            }
376                    }
377                    if (isQuestion()) {
378                            reasonerParticipant = false;
379                    }
380                    String messages = mc.toString();
381                    if (messages.length() > 0) {
382                            System.err.println("Parser messages: " + messages);
383                    }
384            }
385            
386            /**
387             * This method tries to reassert a sentence that is not yet integrated. This is
388             * used for sentences that have an OWL representation but the integration failed
389             * because it introduced an inconsistency. Later, when the ontology has changed,
390             * the integration might succeed.
391             * 
392             * @return An integer value denoting the success/failure of the operation.
393             * @see Ontology#commitSentence(Sentence)
394             */
395            public int reassert() {
396                    int success = getOntology().commitSentence(this);
397                    getOntology().save(owner);
398                    return success;
399            }
400            
401            /**
402             * Returns true if the sentence is integrated into the ontology.
403             * 
404             * @return true if the sentence is integrated into the ontology.
405             */
406            public boolean isIntegrated() {
407                    return integrated;
408            }
409            
410            void setIntegrated(boolean integrated) {
411                    this.integrated = integrated;
412            }
413            
414            /**
415             * Returns true if the sentence is a question.
416             * 
417             * @return true if the sentence is a question.
418             */
419            public boolean isQuestion() {
420                    return text.substring(text.length()-1).equals("?");
421            }
422            
423            /**
424             * Checks if the sentence is inferred or asserted.
425             * 
426             * @return true if the sentence is inferred, false if it is asserted.
427             */
428            public boolean isInferred() {
429                    return owner == null;
430            }
431            
432            /**
433             * Checks whether the sentence contains the given word form (by word number) of the
434             * given ontology element.
435             * 
436             * @param e The ontology element.
437             * @param wordNumber The word number.
438             * @return true if the word form occurs in this sentence.
439             */
440            public boolean contains(OntologyElement e, int wordNumber) {
441                    if (textContainer == null) {
442                            tokenize();
443                    }
444                    for (TextElement t : textContainer.getTextElements()) {
445                            if (t instanceof OntologyTextElement) {
446                                    OntologyTextElement ot = (OntologyTextElement) t;
447                                    if (e == ot.getOntologyElement() && wordNumber == -1) return true;
448                                    if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
449                            }
450                    }
451                    return false;
452            }
453    
454            /**
455             * Checks whether the sentence contains the given ontology element (no matter which
456             * word form).
457             * 
458             * @param e The ontology element.
459             * @return true if the ontology element occurs in this sentence.
460             */
461            public boolean contains(OntologyElement e) {
462                    return contains(e, -1);
463            }
464            
465            /**
466             * Returns all individuals that answer this question. Questions in AceWiki are "DL Queries".
467             * They describe a concept and the answer consists of all individuals that belong to this concept.
468             * The null value is returned if the sentence is not a question.
469             * 
470             * @return A list of individuals that are the answer for the question.
471             * @see Ontology#getAnswer(Sentence)
472             */
473            public synchronized List<Individual> getAnswer() {
474                    if (!isQuestion()) return null;
475                    
476                    Ontology o = getOntology();
477                    if (answerCacheStateID != o.getStateID()) {
478                            answerCache = o.getAnswer(this);
479                            answerCacheStateID = o.getStateID();
480                    }
481                    if (answerCache == null) {
482                            return null;
483                    } else {
484                            return new ArrayList<Individual>(answerCache);
485                    }
486            }
487            
488            /**
489             * Returns true if the sentence is a question and the answer to the question is cached and does
490             * not have to be recalculated.
491             * 
492             * @return true if the answer is cached.
493             */
494            public boolean isAnswerCached() {
495                    if (!isQuestion()) return false;
496                    return answerCacheStateID == getOntology().getStateID();
497            }
498            
499            private static String getUnderscoredText(TextContainer textContainer) {
500                    String t = "";
501                    for (TextElement te : textContainer.getTextElements()) {
502                            if (te instanceof OntologyTextElement) {
503                                    t += " " + ((OntologyTextElement) te).getUnderscoredText();
504                            } else if (te.getText().matches("[.?]")) {
505                                    t += te.getText();
506                            } else {
507                                    t += " " + te.getText();
508                            }
509                    }
510                    if (t.length() > 0) {
511                            t = t.substring(1);
512                    }
513                    return t;
514            }
515            
516            String serialize() {
517                    if (textContainer == null) {
518                            tokenize();
519                    }
520                    String s;
521                    if (integrated) {
522                            s = "|";
523                    } else {
524                            s = "#";
525                    }
526                    for (TextElement te : textContainer.getTextElements()) {
527                            if (te instanceof OntologyTextElement) {
528                                    OntologyTextElement ot = (OntologyTextElement) te;
529                                    s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
530                            } else {
531                                    s += " " + te.getText();
532                            }
533                    }
534                    return s + "\n";
535            }
536            
537            public String toString() {
538                    return getText();
539            }
540    
541    }