User:Dan Polansky/CFI.java
Jump to navigation
Jump to search
class CFI {
/** Determines whether a term should be included in English
Wiktionary in a given sense. */
boolean shouldBeIncluded(String term, String sense) {
return // Is attested, and
isAttested(term, sense) &&
// If it is from fictional universe, then it meets its criteria, and
(!fromFictionalUniverse(term, sense) || meetsFictionalUniverseCriteria(term, sense)) &&
// If it is a proper name, then it meets specific citeria for proper names, and
(!isProperName(term,sense) || shouldProperNameBeIncludedGivenAttestation(term, sense)) &&
// If it is not a proper name, then it is not a semantic sum of parts
(isProperName(term,sense) || !isSemanticSumOfParts(term, sense));
}
/** Determines whether the term is attested in a given sense.
Four criteria of attestation are considered. */
boolean isAttested(String term, String sense) {
return isInWidespreadUse(term, sense) ||
isInOneWellknownWork(term, sense) ||
hasThreeSuitableQuotations(term, sense) ||
( getLanguage(term).isExtict() && hasOneContemporaneousQuotation(term, sense));
// The fourth condition is broken; how would one know the language merely from the term?
// The language would have to be passed as an argument.
}
/** Determines whether the term has three suitable quotations
in a given sense, spanning at least a year. */
boolean hasThreeSuitableQuotations(term, sense) {
// The current implementation is extremely resource-wasting. To be reimplemented
// in such a way that humans can conveniently follow the method.
List allQuotations = Corpus.getAllQuotations(term); // A call of a super-mighty method
List candidateQuotations = new ArrayList();
// Filter all quotations by the requirements of being durably archived and used rather than mentioned
for (i=allQuotations.iterator(); i.hasNext();) {
String quotation = (String)i.next();
if (isFromDurablyArchivedSource(quotation) && // Broken; a string alone does not tell its source.
isUseRatherThanMention(quotation, term, sense))
candidateQuotations.add(quotation)
}
// Remove dependent quotations
for (i=candidateQuotations.iterator(); i.hasNext();) {
String quotation = (String)i.next();
for (j=candidateQuotations.iterator(); j.hasNext();) {
String quotation2 = (String)j.next();
if (quotation2.equals(quotation1))
break; // Loop only up to the same quotation, checking only the previous ones.
if (isDependentQuotation(quotation, quotation2)) {
candidateQuotations.remove(quotation);
break; }}}
// Determine time span
if (getTimeSpanInYears(candidateQuotations) < 1.0)
return false;
return candidateQuotations.size() >= 3; // At least three suitable quotations
}
/** Determines whether the term in the given sense is a semantic
sum of parts. Examples of sums of parts include "brown leaf". */
boolean isSemanticSumOfParts(String term, String sense) {
// To be implemented
if (hasNoSpaceAndNoHyphen(term)) {
return true; //This may be controversial for German.
} else {
// It is a multi-word term.
return true; // Dummy return value
}
}
/** Determines whether a proper name should be included in a given sense given
it is attested. */
boolean shouldProperNameBeIncludedGivenAttestation(String term, String sense) {
// Specific cases
if (isGivenName(term, sense) || isSurname(term, sense) || isPatronymic(term, sense))
return true;
if (isBrandOfPhysicalProduct(term, sense))
return shouldBrandOfPhysicalProductBeIncluded(term, sense);
// The general case
return true; // Dummy return value; the general case is unimplemented for lacking consensus.
}
/** Determines whether a brand of physical product should be included using
seven requirements on attesting quotations. */
boolean shouldBrandOfPhysicalProductBeIncluded(term, sense) {
return false; // Dummy return value; complex criteria that largely lead to exclusion.
}
}
/*
Issues:
* Missing constructed languages
* Missing company names (I don't care)
* Language and part of speech probably need to be added to all arguments of methods;
for simplicity's sake, I have omitted them.
*/