001 // This file is part of the Attempto Java Packages. 002 // Copyright 2008-2009, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 003 // 004 // The Attempto Java Packages is free software: you can redistribute it and/or modify it under the 005 // terms of the GNU Lesser General Public License as published by the Free Software Foundation, 006 // either version 3 of the License, or (at your option) any later version. 007 // 008 // The Attempto Java Packages is distributed in the hope that it will be useful, but WITHOUT ANY 009 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 010 // PURPOSE. See the GNU Lesser General Public License for more details. 011 // 012 // You should have received a copy of the GNU Lesser General Public License along with the Attempto 013 // Java Packages. If not, see http://www.gnu.org/licenses/. 014 015 package ch.uzh.ifi.attempto.preditor.text; 016 017 import ch.uzh.ifi.attempto.ape.ACEUtils; 018 import ch.uzh.ifi.attempto.chartparser.StringRef; 019 import ch.uzh.ifi.attempto.chartparser.Terminal; 020 021 /** 022 * This is a simple implementation of a context checker for the English language. The words "a" and 023 * "an" are adapted accoring to the following word, e.g. "a","apple" becomes "an","apple", and "an", 024 * "customer" becomes "a","customer". Furthermore, words can be capitalized if they are at sentence- 025 * initial position. Capitalization can be turned on or off. In both cases exceptions can be defined 026 * using the feature "capitalize" that is read from the category of the text element. If 027 * exceptions are enabled and the value of "capitalize" is "true" or "false" then this overrides the 028 * default capitalization behavior. 029 * 030 * @author Tobias Kuhn 031 */ 032 public class EnglishContextChecker implements ContextChecker { 033 034 private boolean defaultCapitalize, exceptionsEnabled; 035 036 /** 037 * Creates a new English context checker. 038 * 039 * @param defaultCapitalize true if words should be capitalized by default. 040 * @param exceptionsEnabled true if exceptions can be defined using the feature "capitalize". 041 */ 042 public EnglishContextChecker(boolean defaultCapitalize, boolean exceptionsEnabled) { 043 setDefaultCapitalize(defaultCapitalize); 044 setExceptionsEnabled(exceptionsEnabled); 045 } 046 047 /** 048 * Creates a new English context checker with no exceptions for capitalization. 049 * 050 * @param defaultCapitalize true if words should be capitalized by default. 051 */ 052 public EnglishContextChecker(boolean defaultCapitalize) { 053 setDefaultCapitalize(defaultCapitalize); 054 } 055 056 /** 057 * Returns whether words are capitalized by default. 058 * 059 * @return true if words are capitalized by default. 060 */ 061 public boolean isDefaultCapitalize() { 062 return defaultCapitalize; 063 } 064 065 /** 066 * Enables or disables the default capitalization. 067 * 068 * @param defaultCapitalize true if words should be capitalized by default. 069 */ 070 public void setDefaultCapitalize(boolean defaultCapitalize) { 071 this.defaultCapitalize = defaultCapitalize; 072 } 073 074 /** 075 * Returns whether capitalization exceptions are enabled. 076 * 077 * @return true if capitalization exceptions are enabled. 078 */ 079 public boolean areExceptionsEnabled() { 080 return exceptionsEnabled; 081 } 082 083 /** 084 * Enables or disables capitalization exceptions. 085 * 086 * @param exceptionsEnabled true if exceptions the feature "capitalize" should be used to define 087 * capitalization exceptions. 088 */ 089 public void setExceptionsEnabled(boolean exceptionsEnabled) { 090 this.exceptionsEnabled = exceptionsEnabled; 091 } 092 093 public String getTextInContext(TextElement textElement, String precedingText, String followingText) { 094 String text = textElement.getOriginalText(); 095 String t; 096 boolean capitalize = false; 097 if (precedingText == null || precedingText.matches("(\\.|\\?|\\!)")) { 098 capitalize = defaultCapitalize; 099 if (exceptionsEnabled) { 100 boolean isException = true; 101 for (Terminal cat : textElement.getCategories()) { 102 StringRef sr = cat.getFeature("capitalize"); 103 String s = null; 104 if (sr != null) s = sr.getString(); 105 106 if (defaultCapitalize && !"false".equals(s)) { 107 isException = false; 108 break; 109 } else if (!defaultCapitalize && !"true".equals(s)) { 110 isException = false; 111 break; 112 } 113 } 114 if (isException) { 115 capitalize = !capitalize; 116 } 117 } 118 } 119 if (capitalize && text.length() > 0) { 120 String f = text.substring(0, 1); 121 t = f.toUpperCase() + text.substring(1); 122 } else { 123 t = text; 124 } 125 126 if (followingText != null && t.matches("(A|a)n?")) { 127 if (ACEUtils.useIndefiniteArticleAn(followingText)) { 128 t = t.substring(0, 1) + "n"; 129 } else { 130 t = t.substring(0, 1); 131 } 132 } 133 return t; 134 } 135 136 }