001 // This file is part of AceWiki.
002 // Copyright 2008-2012, AceWiki developers.
003 //
004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of
006 // the License, or (at your option) any later version.
007 //
008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010 // Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
013 // not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.acewiki.aceowl;
016
017 import java.util.ArrayList;
018 import java.util.Collection;
019
020 import ch.uzh.ifi.attempto.acewiki.core.Ontology;
021 import ch.uzh.ifi.attempto.acewiki.core.OntologyElement;
022 import ch.uzh.ifi.attempto.base.AbstractOption;
023 import ch.uzh.ifi.attempto.chartparser.DynamicLexicon;
024 import ch.uzh.ifi.attempto.chartparser.LexicalRule;
025
026 /**
027 * This class manages the lexical entries for AceWiki.
028 *
029 * @author Tobias Kuhn
030 */
031 public class ACEOWLLexicon implements DynamicLexicon {
032
033 private Ontology ontology;
034
035 /**
036 * Creates a new lexicon manager.
037 */
038 public ACEOWLLexicon() {
039 }
040
041 /**
042 * This is the first method to be called and provides the ontology object.
043 *
044 * @param ontology The ontology object.
045 */
046 public void init(Ontology ontology) {
047 this.ontology = ontology;
048 }
049
050 public Collection<LexicalRule> getLexRules(AbstractOption option) {
051 String catName = option.getCategoryName();
052 Collection<LexicalRule> lexRules = new ArrayList<LexicalRule>();
053 if (catName.equals("variable")) {
054 addVariableEntries(lexRules, "variable");
055 } else if (catName.equals("reference")) {
056 addVariableEntries(lexRules, "reference");
057 } else if (catName.equals("number")) {
058 for (int i = 2 ; i < 100 ; i++) {
059 lexRules.add(new LexicalRule("number", i + ""));
060 }
061 } else {
062 for (OntologyElement el : ontology.getOntologyElements()) {
063 if (el instanceof ACEOWLOntoElement) {
064 ((ACEOWLOntoElement) el).collectLexicalRules(catName, lexRules);
065 }
066 }
067 }
068 for (LexicalRule r : lexRules) {
069 r.getCategory().setFeature("text", r.getWord().getName());
070 }
071 return lexRules;
072 }
073
074 public Collection<LexicalRule> getLexRules(String word) {
075 Collection<LexicalRule> lexRules = new ArrayList<LexicalRule>();
076 if (word.matches("[XYZ][0-9]*")) {
077 lexRules.add(new LexicalRule("variable", word));
078 lexRules.add(new LexicalRule("reference", word));
079 } else if (word.matches("[1-9][0-9]+|[2-9]")) {
080 lexRules.add(new LexicalRule("number", word));
081 } else {
082 OntologyElement oe = ontology.getElement(word);
083 if (word.startsWith("the ")) {
084 oe = ontology.getElement(word.substring(4));
085 }
086 if (oe != null && oe instanceof ACEOWLOntoElement) {
087 ((ACEOWLOntoElement) oe).collectLexicalRules(null, lexRules);
088 }
089 }
090 for (LexicalRule r : lexRules) {
091 r.getCategory().setFeature("text", r.getWord().getName());
092 }
093 return lexRules;
094 }
095
096 private static void addVariableEntries(Collection<LexicalRule> entries, String cat) {
097 for (String s : new String[] {"X", "Y", "Z"}) {
098 String t = s;
099 entries.add(new LexicalRule(cat, t));
100 }
101 }
102
103 /**
104 * Returns true if the string represents a valid word form.
105 *
106 * @param s The string.
107 * @return true if the string represents a valid word form.
108 */
109 public static boolean isValidWordOrEmpty(String s) {
110 return s.matches("([a-zA-Z][a-zA-Z0-9_-]*)?");
111 }
112
113 /**
114 * Normalizes the string. White space characters are replaced by underscores.
115 *
116 * @param s The input string.
117 * @return The normalized string.
118 */
119 public static String normalize(String s) {
120 return s.replaceAll("(\\s|_)+", "_").replaceAll("(^_|_$)", "");
121 }
122
123 }