001 // This file is part of AceWiki.
002 // Copyright 2008-2012, AceWiki developers.
003 //
004 // AceWiki is free software: you can redistribute it and/or modify it under the terms of the GNU
005 // Lesser General Public License as published by the Free Software Foundation, either version 3 of
006 // the License, or (at your option) any later version.
007 //
008 // AceWiki is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
009 // even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
010 // Lesser General Public License for more details.
011 //
012 // You should have received a copy of the GNU Lesser General Public License along with AceWiki. If
013 // not, see http://www.gnu.org/licenses/.
014
015 package ch.uzh.ifi.attempto.acewiki.aceowl;
016
017 import java.util.ArrayList;
018 import java.util.Collection;
019 import java.util.List;
020
021 import ch.uzh.ifi.attempto.acewiki.core.OntologyTextElement;
022 import ch.uzh.ifi.attempto.acewiki.owl.OWLIndividual;
023 import ch.uzh.ifi.attempto.ape.Gender;
024 import ch.uzh.ifi.attempto.ape.LexiconEntry;
025 import ch.uzh.ifi.attempto.base.TextContainer;
026 import ch.uzh.ifi.attempto.chartparser.LexicalRule;
027
028 /**
029 * This class stands for individuals that are represented by ACE proper names and OWL individuals.
030 * Proper names can be used either with a definite article (e.g. "the United Nations") or without
031 * (e.g. "Switzerland"). Furthermore, proper names can have an abbreviation that is a shorter
032 * name with exactly the same meaning. This abbreviation can aswell be used either with a definite
033 * article (e.g. "the UN") or without (e.g. "ACE").
034 *<p>
035 * Proper names have four word forms. The first one is the proper name with the definite
036 * article or just the proper name if no definite article is used for this proper name. The second
037 * one is in each case just the bare proper name. The third form is the abbreviation with the
038 * definite article if there is one. The fourth form, finally, is just the bare abbreviation. If
039 * there is no abbreviation then the third and fourth form are identical to the first and second
040 * form, respectively. For proper names that do not use a definite article and that have no
041 * abbreviation, all four forms are identical.
042 *<p>
043 * 0: proper name, preceded by "the" if used with definite article.
044 * 1: bare proper name.
045 * 2: abbreviation, preceded by "the" if used with definite article; or the same as 0 if there is
046 * no abbreviation.
047 * 3: bare abbreviation; or the same as 1 if there is no abbreviation.
048 *<p>
049 * Examples: ["the United Nations", "United Nations", "the UN", "UN"];
050 * ["the Nile", "Nile", "Nile", "Nile];
051 * ["Switzerland", "Switzerland", "Switzerland", "Switzerland"];
052 * ["Attempto Controlled English", "Attempto Controlled English", "ACE", "ACE"].
053 *
054 * @author Tobias Kuhn
055 */
056 public class ProperNameIndividual extends OWLIndividual implements ACEOWLOntoElement {
057
058 private String word, abbrev;
059 private boolean wordDefArt, abbrevDefArt;
060
061 /**
062 * Creates a new individual that has no name yet and is not registered to an ontology.
063 */
064 public ProperNameIndividual() {
065 this.word = "";
066 this.abbrev = null;
067 this.wordDefArt = false;
068 this.abbrevDefArt = false;
069 }
070
071 public String[] getWords() {
072 if (abbrev == null) {
073 if (wordDefArt) {
074 return new String[] {"the " + word, word, "the " + word, word};
075 } else {
076 return new String[] {word, word, word, word};
077 }
078 } else {
079 if (wordDefArt) {
080 if (abbrevDefArt) {
081 return new String[] {"the " + word, word, "the " + abbrev, abbrev};
082 } else {
083 return new String[] {"the " + word, word, abbrev, abbrev};
084 }
085 } else {
086 if (abbrevDefArt) {
087 return new String[] {word, word, "the " + abbrev, abbrev};
088 } else {
089 return new String[] {word, word, abbrev, abbrev};
090 }
091 }
092 }
093 }
094
095 public String[] getHeadwords() {
096 if (abbrev == null) {
097 return new String[] {getWord(1)};
098 } else {
099 return new String[] {getWord(1), getWord(3)};
100 }
101 }
102
103 public void setWords(String serializedWords) {
104 String[] words = serializedWords.split(";");
105 if (words.length == 1) {
106 word = words[0];
107 wordDefArt = false;
108 abbrev = null;
109 abbrevDefArt = false;
110 } else if (words.length == 2) {
111 word = words[1];
112 wordDefArt = words[0].startsWith("the ");
113 abbrev = null;
114 abbrevDefArt = false;
115 } else if (words[2] == null || words[0].equals(words[2])) {
116 word = words[1];
117 wordDefArt = words[0].startsWith("the ");
118 abbrev = null;
119 abbrevDefArt = false;
120 } else {
121 word = words[1];
122 wordDefArt = words[0].startsWith("the ");
123 abbrev = words[3];
124 abbrevDefArt = words[2].startsWith("the ");
125 }
126 if (abbrev != null && abbrev.length() == 0) {
127 abbrev = null;
128 }
129 }
130
131 public String serializeWords() {
132 String[] w = getWords();
133 return w[0] + ";" + w[1] + ";" + w[2] + ";" + w[3] + ";";
134 }
135
136 public List<LexiconEntry> getLexiconEntries() {
137 List<LexiconEntry> entries = new ArrayList<LexiconEntry>();
138 if (wordDefArt) {
139 entries.add(LexiconEntry.createPropernameDefSgEntry(word, word, Gender.UNDEF));
140 } else {
141 entries.add(LexiconEntry.createPropernameSgEntry(word, word, Gender.UNDEF));
142 }
143 if (abbrev != null) {
144 if (abbrevDefArt) {
145 entries.add(LexiconEntry.createPropernameDefSgEntry(abbrev, word, Gender.UNDEF));
146 } else {
147 entries.add(LexiconEntry.createPropernameSgEntry(abbrev, word, Gender.UNDEF));
148 }
149 }
150 return entries;
151 }
152
153 public String getType() {
154 return "Proper Name";
155 }
156
157 public String getInternalType() {
158 return "propername";
159 }
160
161 /**
162 * Returns true if the proper name has to be used with the definite article "the".
163 *
164 * @return true if the definite article "the" has to be used.
165 */
166 public boolean hasDefiniteArticle() {
167 return wordDefArt;
168 }
169
170 /**
171 * Returns true if the given word form uses the definite article "the". This returns
172 * always false for 1 and 3.
173 *
174 * @param wordNumber the word number
175 * @return true if the definite article "the" is used for the word form of the
176 * given word number
177 */
178 public boolean hasDefiniteArticle(int wordNumber) {
179 if (wordNumber == 0) {
180 return wordDefArt;
181 } else if (wordNumber == 2) {
182 return abbrevDefArt;
183 } else {
184 return false;
185 }
186 }
187
188 /**
189 * Returns the abbreviation (without definite article) or null if there is no abbreviation.
190 *
191 * @return the abbreviation
192 */
193 public String getAbbreviation() {
194 if (abbrev == null) return null;
195 return abbrev;
196 }
197
198 public String getIRISuffix() {
199 return word;
200 }
201
202 public void collectLexicalRules(String catName, Collection<LexicalRule> lexRules) {
203 if (catName == null || catName.equals("propername")) {
204 lexRules.add(new LexicalRule("propername", getWord(0)));
205 if (getAbbreviation() != null) {
206 lexRules.add(new LexicalRule("propername", getWord(2)));
207 }
208 }
209 }
210
211 public TextContainer getAnswerText() {
212 return new TextContainer(new OntologyTextElement(this, 1));
213 }
214
215 }