001 // This file is part of AceWiki. 002 // Copyright 2008-2012, AceWiki developers. 003 // 004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU 005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of 006 // the License, or (at your option) any later version. 007 // 008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without 009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 010 // Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If 013 // not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.acewiki.aceowl; 016 017 import java.util.ArrayList; 018 import java.util.Collection; 019 import java.util.List; 020 021 import ch.uzh.ifi.attempto.acewiki.core.OntologyTextElement; 022 import ch.uzh.ifi.attempto.acewiki.owl.OWLIndividual; 023 import ch.uzh.ifi.attempto.ape.Gender; 024 import ch.uzh.ifi.attempto.ape.LexiconEntry; 025 import ch.uzh.ifi.attempto.base.TextContainer; 026 import ch.uzh.ifi.attempto.chartparser.LexicalRule; 027 028 /** 029 * This class stands for individuals that are represented by ACE proper names and OWL individuals. 030 * Proper names can be used either with a definite article (e.g. "the United Nations") or without 031 * (e.g. "Switzerland"). Furthermore, proper names can have an abbreviation that is a shorter 032 * name with exactly the same meaning. This abbreviation can aswell be used either with a definite 033 * article (e.g. "the UN") or without (e.g. "ACE"). 034 *<p> 035 * Proper names have four word forms. The first one is the proper name with the definite 036 * article or just the proper name if no definite article is used for this proper name. The second 037 * one is in each case just the bare proper name. The third form is the abbreviation with the 038 * definite article if there is one. The fourth form, finally, is just the bare abbreviation. If 039 * there is no abbreviation then the third and fourth form are identical to the first and second 040 * form, respectively. For proper names that do not use a definite article and that have no 041 * abbreviation, all four forms are identical. 042 *<p> 043 * 0: proper name, preceded by "the" if used with definite article. 044 * 1: bare proper name. 045 * 2: abbreviation, preceded by "the" if used with definite article; or the same as 0 if there is 046 * no abbreviation. 047 * 3: bare abbreviation; or the same as 1 if there is no abbreviation. 048 *<p> 049 * Examples: ["the United Nations", "United Nations", "the UN", "UN"]; 050 * ["the Nile", "Nile", "Nile", "Nile]; 051 * ["Switzerland", "Switzerland", "Switzerland", "Switzerland"]; 052 * ["Attempto Controlled English", "Attempto Controlled English", "ACE", "ACE"]. 053 * 054 * @author Tobias Kuhn 055 */ 056 public class ProperNameIndividual extends OWLIndividual implements ACEOWLOntoElement { 057 058 private String word, abbrev; 059 private boolean wordDefArt, abbrevDefArt; 060 061 /** 062 * Creates a new individual that has no name yet and is not registered to an ontology. 063 */ 064 public ProperNameIndividual() { 065 this.word = ""; 066 this.abbrev = null; 067 this.wordDefArt = false; 068 this.abbrevDefArt = false; 069 } 070 071 public String[] getWords() { 072 if (abbrev == null) { 073 if (wordDefArt) { 074 return new String[] {"the " + word, word, "the " + word, word}; 075 } else { 076 return new String[] {word, word, word, word}; 077 } 078 } else { 079 if (wordDefArt) { 080 if (abbrevDefArt) { 081 return new String[] {"the " + word, word, "the " + abbrev, abbrev}; 082 } else { 083 return new String[] {"the " + word, word, abbrev, abbrev}; 084 } 085 } else { 086 if (abbrevDefArt) { 087 return new String[] {word, word, "the " + abbrev, abbrev}; 088 } else { 089 return new String[] {word, word, abbrev, abbrev}; 090 } 091 } 092 } 093 } 094 095 public String[] getHeadwords() { 096 if (abbrev == null) { 097 return new String[] {getWord(1)}; 098 } else { 099 return new String[] {getWord(1), getWord(3)}; 100 } 101 } 102 103 public void setWords(String serializedWords) { 104 String[] words = serializedWords.split(";"); 105 if (words.length == 1) { 106 word = words[0]; 107 wordDefArt = false; 108 abbrev = null; 109 abbrevDefArt = false; 110 } else if (words.length == 2) { 111 word = words[1]; 112 wordDefArt = words[0].startsWith("the "); 113 abbrev = null; 114 abbrevDefArt = false; 115 } else if (words[2] == null || words[0].equals(words[2])) { 116 word = words[1]; 117 wordDefArt = words[0].startsWith("the "); 118 abbrev = null; 119 abbrevDefArt = false; 120 } else { 121 word = words[1]; 122 wordDefArt = words[0].startsWith("the "); 123 abbrev = words[3]; 124 abbrevDefArt = words[2].startsWith("the "); 125 } 126 if (abbrev != null && abbrev.length() == 0) { 127 abbrev = null; 128 } 129 } 130 131 public String serializeWords() { 132 String[] w = getWords(); 133 return w[0] + ";" + w[1] + ";" + w[2] + ";" + w[3] + ";"; 134 } 135 136 public List<LexiconEntry> getLexiconEntries() { 137 List<LexiconEntry> entries = new ArrayList<LexiconEntry>(); 138 if (wordDefArt) { 139 entries.add(LexiconEntry.createPropernameDefSgEntry(word, word, Gender.UNDEF)); 140 } else { 141 entries.add(LexiconEntry.createPropernameSgEntry(word, word, Gender.UNDEF)); 142 } 143 if (abbrev != null) { 144 if (abbrevDefArt) { 145 entries.add(LexiconEntry.createPropernameDefSgEntry(abbrev, word, Gender.UNDEF)); 146 } else { 147 entries.add(LexiconEntry.createPropernameSgEntry(abbrev, word, Gender.UNDEF)); 148 } 149 } 150 return entries; 151 } 152 153 public String getType() { 154 return "Proper Name"; 155 } 156 157 public String getInternalType() { 158 return "propername"; 159 } 160 161 /** 162 * Returns true if the proper name has to be used with the definite article "the". 163 * 164 * @return true if the definite article "the" has to be used. 165 */ 166 public boolean hasDefiniteArticle() { 167 return wordDefArt; 168 } 169 170 /** 171 * Returns true if the given word form uses the definite article "the". This returns 172 * always false for 1 and 3. 173 * 174 * @param wordNumber the word number 175 * @return true if the definite article "the" is used for the word form of the 176 * given word number 177 */ 178 public boolean hasDefiniteArticle(int wordNumber) { 179 if (wordNumber == 0) { 180 return wordDefArt; 181 } else if (wordNumber == 2) { 182 return abbrevDefArt; 183 } else { 184 return false; 185 } 186 } 187 188 /** 189 * Returns the abbreviation (without definite article) or null if there is no abbreviation. 190 * 191 * @return the abbreviation 192 */ 193 public String getAbbreviation() { 194 if (abbrev == null) return null; 195 return abbrev; 196 } 197 198 public String getIRISuffix() { 199 return word; 200 } 201 202 public void collectLexicalRules(String catName, Collection<LexicalRule> lexRules) { 203 if (catName == null || catName.equals("propername")) { 204 lexRules.add(new LexicalRule("propername", getWord(0))); 205 if (getAbbreviation() != null) { 206 lexRules.add(new LexicalRule("propername", getWord(2))); 207 } 208 } 209 } 210 211 public TextContainer getAnswerText() { 212 return new TextContainer(new OntologyTextElement(this, 1)); 213 } 214 215 }