001    // This file is part of the Attempto Java Packages.
002    // Copyright 2008, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
003    //
004    // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the
005    // terms of the GNU Lesser General Public License as published by the Free Software Foundation,
006    // either version 3 of the License, or (at your option) any later version.
007    //
008    // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY
009    // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
010    // PURPOSE. See the GNU Lesser General Public License for more details.
011    //
012    // You should have received a copy of the GNU Lesser General Public License along with the Attempto
013    // Java Packages. If not, see http://www.gnu.org/licenses/.
014    
015    package ch.uzh.ifi.attempto.acewiki.core.ontology;
016    
017    import static ch.uzh.ifi.attempto.ape.OutputType.DRSPP;
018    import static ch.uzh.ifi.attempto.ape.OutputType.OWLRDF;
019    import static ch.uzh.ifi.attempto.ape.OutputType.OWLXML;
020    import static ch.uzh.ifi.attempto.ape.OutputType.PARAPHRASE1;
021    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAX;
022    import static ch.uzh.ifi.attempto.ape.OutputType.SYNTAXPP;
023    
024    import java.util.ArrayList;
025    import java.util.Arrays;
026    import java.util.List;
027    
028    import org.semanticweb.owl.model.OWLOntology;
029    import org.semanticweb.owl.model.OWLOntologyCreationException;
030    
031    import ch.uzh.ifi.attempto.acewiki.core.text.OntologyTextElement;
032    import ch.uzh.ifi.attempto.acewiki.core.text.TextElemFactory;
033    import ch.uzh.ifi.attempto.ape.ACEParserResult;
034    import ch.uzh.ifi.attempto.ape.APELocal;
035    import ch.uzh.ifi.attempto.ape.Lexicon;
036    import ch.uzh.ifi.attempto.ape.LexiconEntry;
037    import ch.uzh.ifi.attempto.ape.MessageContainer;
038    import ch.uzh.ifi.attempto.preditor.text.BasicTextElement;
039    import ch.uzh.ifi.attempto.preditor.text.TextContainer;
040    import ch.uzh.ifi.attempto.preditor.text.TextElement;
041    
042    /**
043     * This class represents an ACE sentence which is either a declarative statement or a question.
044     * Some declarative sentences can be translated into OWL and can participate in reasoning. Other
045     * sentences have no OWL representation and do not participate in reasoning.
046     *<p>
047     * Each sentence belongs to exactly one article of an ontology element (the owner).
048     *<p>
049     * Parsing of the sentence is done lasily, i.e. at the first time when a parsing result is required.
050     * Parsing fails silently. No exceptions are thrown if a sentence is not ACE compliant.
051     * 
052     * @author Tobias Kuhn
053     */
054    public class Sentence {
055            
056            private String text;
057            private Ontology ontology;
058            private OntologyElement owner;
059            private boolean integrated = false;
060            
061            // These fields are evaluated lazily:
062            private TextContainer textContainer;
063            private ACEParserResult parserResult;
064            private String owlxml;
065            private Boolean reasonerParticipant;
066            private Boolean isOWL;
067            private Boolean isOWLSWRL;
068            private OWLOntology owlOntology;
069            
070            private List<Individual> answerCache;
071            private long answerCacheStateID = -1;
072            
073            /**
074             * Creates a new asserted sentence. Asserted sentences must have an owner.
075             * 
076             * @param text The sentence text.
077             * @param owner The owner ontology element.
078             */
079            public Sentence(String text, OntologyElement owner) {
080                    this.text = text;
081                    this.ontology = null;
082                    this.owner = owner;
083            }
084            
085            /**
086             * Creates a new inferred sentence. Inferred sentence have no owner.
087             * 
088             * @param text The sentence text.
089             * @param ontology The ontology.
090             */
091            public Sentence(String text, Ontology ontology) {
092                    this.text = text;
093                    this.ontology = ontology;
094                    this.owner = null;
095            }
096            
097            /**
098             * Generates sentence objects out of a text container.
099             * 
100             * @param textContainer The text container.
101             * @param owner The owner ontology element of the sentences.
102             * @return A list of sentences.
103             */
104            public static List<Sentence> generateSentences(TextContainer textContainer, OntologyElement owner) {
105                    ArrayList<Sentence> l = new ArrayList<Sentence>();
106                    TextContainer c = new TextContainer();
107                    for (TextElement e : textContainer.getTextElements()) {
108                            c.addElement(e);
109                            if (e.getText().matches("[.?]")) {
110                                    l.add(new Sentence(getUnderscoredText(c), owner));
111                                    c = new TextContainer();
112                            }
113                    }
114                    return l;
115            }
116            
117            /**
118             * Loads a sentence from a serialized form.
119             * 
120             * @param serializedSentence The serialized sentence as a string.
121             * @param owner The owner ontology element of the sentence.
122             * @return A sentence object.
123             */
124            static Sentence loadSentence(String serializedSentence, OntologyElement owner) {
125                    Sentence sentence = new Sentence(serializedSentence.substring(2), owner);
126                    sentence.setIntegrated(serializedSentence.charAt(0) == '|');
127                    return sentence;
128            }
129            
130            private Ontology getOntology() {
131                    if (ontology == null) {
132                            ontology = owner.getOntology();
133                    }
134                    return ontology;
135            }
136            
137            /**
138             * Returns a list of text elements that represent the tokens of this sentence.
139             * 
140             * @return A token list.
141             */
142            public List<TextElement> getTextElements() {
143                    if (textContainer == null) {
144                            tokenize();
145                    }
146                    return textContainer.getTextElements();
147            }
148            
149            /**
150             * Returns the owner ontology element of this sentence.
151             * 
152             * @return The owner ontology element.
153             */
154            public OntologyElement getOwner() {
155                    return owner;
156            }
157            
158            /**
159             * Returns the sentence text as a string. Underscores are used for compound words,
160             * e.g. "credit_card".
161             * 
162             * @return The sentence text as a string.
163             */
164            public String getText() {
165                    if (textContainer == null) {
166                            tokenize();
167                    }
168                    return getUnderscoredText(textContainer);
169            }
170            
171            /**
172             * Returns the sentence text as a string with underscores displayed as blanks. Compound
173             * words containing underscores like "credit_cards" are pretty-printed with blank characters:
174             * "credit card".
175             * 
176             * @return The sentence text as a pretty-printed string.
177             */
178            public String getPrettyText() {
179                    return textContainer.getText();
180            }
181            
182            /**
183             * Returns the parser result object.
184             * 
185             * @return The parser result object.
186             */
187            public ACEParserResult getParserResult() {
188                    if (parserResult == null) {
189                            parse();
190                    }
191                    return parserResult;
192            }
193            
194            /**
195             * Returns the OWL/XML representation of this sentence as a string.
196             * 
197             * @return The OWL/XML representation.
198             */
199            public String getOWLXML() {
200                    if (owlxml == null) {
201                            parse();
202                    }
203                    return owlxml;
204            }
205            
206            /**
207             * Returns true if this sentence participates in reasoning.
208             * 
209             * @return true if this sentence participates in reasoning.
210             */
211            public boolean isReasonerParticipant() {
212                    if (reasonerParticipant == null) {
213                            parse();
214                    }
215                    return reasonerParticipant;
216            }
217            
218            /**
219             * Returns true if this sentence has an OWL representation.
220             * 
221             * @return true if this sentence has an OWL representation.
222             */
223            public boolean isOWL() {
224                    if (isOWL == null) {
225                            parse();
226                    }
227                    return isOWL;
228            }
229            
230            /**
231             * Returns true if this sentence has an OWL or SWRL representation.
232             * 
233             * @return true if this sentence has an OWL or SWRL representation.
234             */
235            public boolean isOWLSWRL() {
236                    if (isOWLSWRL == null) {
237                            parse();
238                    }
239                    return isOWLSWRL;
240            }
241            
242            /**
243             * Returns the OWL ontology object that contains the OWL representation of this
244             * sentence.
245             * 
246             * @return The OWL ontology object.
247             * @throws OWLOntologyCreationException If the OWL ontology object creation failed.
248             */
249            public OWLOntology getOWLOntology() throws OWLOntologyCreationException {
250                    if (owlOntology == null) {
251                            owlOntology = getOntology().readOWLOntology(getOWLXML());
252                    }
253                    return owlOntology;
254            }
255            
256            /**
257             * Tokenizes the sentence text. A text container object is created.
258             */
259            private void tokenize() {
260                    textContainer = new TextContainer();
261                    
262                    String t = text;
263                    t = t.replaceAll(" ", "&");
264                    t = t.replaceAll("\\.", "&.&");
265                    t = t.replaceAll("\\?", "&?&");
266                    t = t.replaceAll("&of", " of");
267                    t = t.replaceAll("&by", " by");
268                    
269                    ArrayList<String> tokens = new ArrayList<String>(Arrays.asList(t.split("&")));
270                    
271                    while (tokens.contains("")) {
272                            tokens.remove("");
273                    }
274                    
275                    toString();
276                    
277                    for (String s : tokens) {
278                            if (s.startsWith("<")) {
279                                    try {
280                                            long oeId = new Long(s.substring(1, s.indexOf(",")));
281                                            int wordNumber = new Integer(s.substring(s.indexOf(",")+1, s.indexOf(">")));
282                                            OntologyElement oe = getOntology().get(oeId);
283                                            textContainer.addElement(TextElemFactory.createTextElement(oe, wordNumber));
284                                    } catch (Exception ex) {
285                                            throw new RuntimeException("Could not resolve link: " + s);
286                                    }
287                            } else {
288                                    OntologyElement oe = getOntology().get(s);
289                                    if (oe == null) {
290                                            textContainer.addElement(new BasicTextElement(s));
291                                    } else if (oe instanceof Individual && ((Individual) oe).hasDefiniteArticle()) {
292                                            textContainer.removeLastElement();
293                                            textContainer.addElement(TextElemFactory.createTextElement(oe, 0));
294                                    } else {
295                                            // not 100% clean solution (several word forms of the same word can be identical):
296                                            int wordId = Arrays.asList(oe.getWords()).indexOf(s);
297                                            textContainer.addElement(TextElemFactory.createTextElement(oe, wordId));
298                                    }
299                            }
300                    }
301            }
302            
303            /**
304             * Parses the sentence text. The OWL and SWRL representations are calculated if possible.
305             * This method is called automatically the first time a parsing result is needed.
306             * Furthermore, it needs to be called each time a word form of an ontology element
307             * (that occurs in the sentence) has changed.
308             */
309            synchronized void parse() {
310                    APELocal.getInstance().setURI(getOntology().getURI());
311                    APELocal.getInstance().setClexEnabled(false);
312                    Lexicon lexicon = new Lexicon();
313                    for (TextElement te : getTextElements()) {
314                            if (te instanceof OntologyTextElement) {
315                                    OntologyElement oe = ((OntologyTextElement) te).getOntologyElement();
316                                    for (LexiconEntry le : oe.getLexiconEntries()) {
317                                            lexicon.addEntry(le);
318                                    }
319                            }
320                    }
321                    parserResult = APELocal.getInstance().getMultiOutput(getText(), lexicon, PARAPHRASE1, SYNTAX, SYNTAXPP, OWLXML, OWLRDF, DRSPP);
322                    MessageContainer mc = parserResult.getMessageContainer();
323                    owlxml = parserResult.get(OWLXML);
324                    if (owlxml != null) {
325                            // Every OWL ontology object needs its own URI:
326                            long hashCode = (long) getText().hashCode() - Integer.MIN_VALUE;
327                            owlxml = owlxml.replace("URI=\"" + ontology.getURI() + "\">", "URI=\"" + ontology.getURI() + "/" + hashCode + "\">");
328                    }
329                    reasonerParticipant =
330                            (mc.getMessages("owl").size() == 0) &&
331                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
332                            (owlxml.indexOf("<ObjectExistsSelf>") < 0) &&
333                            (owlxml.indexOf("<TransitiveObjectProperty>") < 0) &&
334                            (owlxml.length() > 0);
335                    if (isQuestion()) {
336                            reasonerParticipant = false;
337                    }
338                    isOWL =
339                            (mc.getMessages("owl").size() == 0) &&
340                            (owlxml.indexOf("<swrl:Imp>") < 0) &&
341                            (owlxml.length() > 0);
342                    isOWLSWRL =
343                            (mc.getMessages("owl").size() == 0) &&
344                            (owlxml.length() > 0);
345                    String messages = mc.toString();
346                    if (messages.length() > 0) {
347                            System.err.println("Parser messages: " + messages);
348                    }
349                    owlOntology = null;
350            }
351            
352            /**
353             * This method tries to reassert a sentence that is not yet integrated. This is
354             * used for sentences that have an OWL representation but the integration failed
355             * because it introduced an inconsistency. Later, when the ontology has changed,
356             * the integration might succeed.
357             * 
358             * @return An integer value denoting the success/failure of the operation.
359             * @see Ontology#commitSentence(Sentence)
360             */
361            public int reassert() {
362                    return getOntology().commitSentence(this);
363            }
364            
365            /**
366             * Returns true if the sentence is integrated into the ontology.
367             * 
368             * @return true if the sentence is integrated into the ontology.
369             */
370            public boolean isIntegrated() {
371                    return integrated;
372            }
373            
374            void setIntegrated(boolean integrated) {
375                    this.integrated = integrated;
376            }
377            
378            /**
379             * Returns true if the sentence is a question.
380             * 
381             * @return true if the sentence is a question.
382             */
383            public boolean isQuestion() {
384                    return text.substring(text.length()-1).equals("?");
385            }
386            
387            /**
388             * Checks if the sentence is inferred or asserted.
389             * 
390             * @return true if the sentence is inferred, false if it is asserted.
391             */
392            public boolean isInferred() {
393                    return owner == null;
394            }
395            
396            /**
397             * Checks whether the sentence contains the given word form (by word number) of the
398             * given ontology element.
399             * 
400             * @param e The ontology element.
401             * @param wordNumber The word number.
402             * @return true if the word form occurs in this sentence.
403             */
404            public boolean contains(OntologyElement e, int wordNumber) {
405                    if (textContainer == null) {
406                            tokenize();
407                    }
408                    for (TextElement t : textContainer.getTextElements()) {
409                            if (t instanceof OntologyTextElement) {
410                                    OntologyTextElement ot = (OntologyTextElement) t;
411                                    if (e == ot.getOntologyElement() && wordNumber == -1) return true;
412                                    if (e == ot.getOntologyElement() && wordNumber == ot.getWordNumber()) return true;
413                            }
414                    }
415                    return false;
416            }
417    
418            /**
419             * Checks whether the sentence contains the given ontology element (no matter which
420             * word form).
421             * 
422             * @param e The ontology element.
423             * @return true if the ontology element occurs in this sentence.
424             */
425            public boolean contains(OntologyElement e) {
426                    return contains(e, -1);
427            }
428            
429            /**
430             * Returns all individuals that answer this question. Questions in AceWiki are "DL Queries".
431             * They describe a concept and the answer consists of all individuals that belong to this concept.
432             * The null value is returned if the sentence is not a question.
433             * 
434             * @return A list of individuals that are the answer for the question.
435             * @see Ontology#getAnswer(Sentence)
436             */
437            public List<Individual> getAnswer() {
438                    if (!isQuestion()) return null;
439                    
440                    Ontology o = getOntology();
441                    if (answerCacheStateID != o.getStateID()) {
442                            answerCache = o.getAnswer(this);
443                            answerCacheStateID = o.getStateID();
444                    }
445                    return new ArrayList<Individual>(answerCache);
446            }
447            
448            /**
449             * Returns true if the sentence is a question and the answer to the question is cached and does
450             * not have to be recalculated.
451             * 
452             * @return true if the answer is cached.
453             */
454            public boolean isAnswerCached() {
455                    if (!isQuestion()) return false;
456                    return answerCacheStateID == getOntology().getStateID();
457            }
458            
459            private static String getUnderscoredText(TextContainer textContainer) {
460                    String t = "";
461                    for (TextElement te : textContainer.getTextElements()) {
462                            if (te instanceof OntologyTextElement) {
463                                    t += " " + ((OntologyTextElement) te).getUnderscoredText();
464                            } else if (te.getText().matches("[.?]")) {
465                                    t += te.getText();
466                            } else {
467                                    t += " " + te.getText();
468                            }
469                    }
470                    if (t.length() > 0) {
471                            t = t.substring(1);
472                    }
473                    return t;
474            }
475            
476            String serialize() {
477                    if (textContainer == null) {
478                            tokenize();
479                    }
480                    String s;
481                    if (integrated) {
482                            s = "|";
483                    } else {
484                            s = "#";
485                    }
486                    for (TextElement te : textContainer.getTextElements()) {
487                            if (te instanceof OntologyTextElement) {
488                                    OntologyTextElement ot = (OntologyTextElement) te;
489                                    s += " <" + ot.getOntologyElement().getId() + "," + ot.getWordNumber() + ">";
490                            } else {
491                                    s += " " + te.getText();
492                            }
493                    }
494                    return s + "\n";
495            }
496            
497            public String toString() {
498                    return getText();
499            }
500    
501    }