001 // This file is part of AceWiki. 002 // Copyright 2008-2012, AceWiki developers. 003 // 004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU 005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of 006 // the License, or (at your option) any later version. 007 // 008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without 009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 010 // Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If 013 // not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.aceowl; 016 017 import java.util.ArrayList; 018 import java.util.Collection; 019 020 import ch.uzh.ifi.attempto.acewiki.core.Ontology; 021 import ch.uzh.ifi.attempto.acewiki.core.OntologyElement; 022 import ch.uzh.ifi.attempto.base.AbstractOption; 023 import ch.uzh.ifi.attempto.chartparser.DynamicLexicon; 024 import ch.uzh.ifi.attempto.chartparser.LexicalRule; 025 026 /** 027 * This class manages the lexical entries for AceWiki. 028 * 029 * @author Tobias Kuhn 030 */ 031 public class ACEOWLLexicon implements DynamicLexicon { 032 033 private Ontology ontology; 034 035 /** 036 * Creates a new lexicon manager. 037 */ 038 public ACEOWLLexicon() { 039 } 040 041 /** 042 * This is the first method to be called and provides the ontology object. 043 * 044 * @param ontology The ontology object. 045 */ 046 public void init(Ontology ontology) { 047 this.ontology = ontology; 048 } 049 050 public Collection<LexicalRule> getLexRules(AbstractOption option) { 051 String catName = option.getCategoryName(); 052 Collection<LexicalRule> lexRules = new ArrayList<LexicalRule>(); 053 if (catName.equals("variable")) { 054 addVariableEntries(lexRules, "variable"); 055 } else if (catName.equals("reference")) { 056 addVariableEntries(lexRules, "reference"); 057 } else if (catName.equals("number")) { 058 for (int i = 2 ; i < 100 ; i++) { 059 lexRules.add(new LexicalRule("number", i + "")); 060 } 061 } else { 062 for (OntologyElement el : ontology.getOntologyElements()) { 063 if (el instanceof ACEOWLOntoElement) { 064 ((ACEOWLOntoElement) el).collectLexicalRules(catName, lexRules); 065 } 066 } 067 } 068 for (LexicalRule r : lexRules) { 069 r.getCategory().setFeature("text", r.getWord().getName()); 070 } 071 return lexRules; 072 } 073 074 public Collection<LexicalRule> getLexRules(String word) { 075 Collection<LexicalRule> lexRules = new ArrayList<LexicalRule>(); 076 if (word.matches("[XYZ][0-9]*")) { 077 lexRules.add(new LexicalRule("variable", word)); 078 lexRules.add(new LexicalRule("reference", word)); 079 } else if (word.matches("[1-9][0-9]+|[2-9]")) { 080 lexRules.add(new LexicalRule("number", word)); 081 } else { 082 OntologyElement oe = ontology.getElement(word); 083 if (word.startsWith("the ")) { 084 oe = ontology.getElement(word.substring(4)); 085 } 086 if (oe != null && oe instanceof ACEOWLOntoElement) { 087 ((ACEOWLOntoElement) oe).collectLexicalRules(null, lexRules); 088 } 089 } 090 for (LexicalRule r : lexRules) { 091 r.getCategory().setFeature("text", r.getWord().getName()); 092 } 093 return lexRules; 094 } 095 096 private static void addVariableEntries(Collection<LexicalRule> entries, String cat) { 097 for (String s : new String[] {"X", "Y", "Z"}) { 098 String t = s; 099 entries.add(new LexicalRule(cat, t)); 100 } 101 } 102 103 /** 104 * Returns true if the string represents a valid word form. 105 * 106 * @param s The string. 107 * @return true if the string represents a valid word form. 108 */ 109 public static boolean isValidWordOrEmpty(String s) { 110 return s.matches("([a-zA-Z][a-zA-Z0-9_-]*)?"); 111 } 112 113 /** 114 * Normalizes the string. White space characters are replaced by underscores. 115 * 116 * @param s The input string. 117 * @return The normalized string. 118 */ 119 public static String normalize(String s) { 120 return s.replaceAll("(\\s|_)+", "_").replaceAll("(^_|_$)", ""); 121 } 122 123 }