User:Dan Polansky/CFI.py

""" This is a dated code for a dated CFI.

Issues:
* Missing constructed languages
* Missing company names (I don't care)
* Etc.
"""

def shouldBeIncluded(term, sense):
  """ Determines whether a term should be included in English
      Wiktionary in a given sense. The sense knows the language and part of speech.
      Two of the "not X or Y" forms below are encoding of "if X then Y" AKA "X => Y".
  """
  return isAttested(term, sense) and
         (not fromFictionalUniverse(term, sense) or meetsFictionalUniverseCriteria(term, sense)) and
         (not isProperName(term,sense) or shouldProperNameBeIncludedGivenAttestation(term, sense)) and
         (isProperName(term,sense) or not isSemanticSumOfParts(term, sense))

def isAttested(term, sense):
  return isInWidespreadUse(term, sense) or
         #isInOneWellknownWork(term, sense) or # Deleted via vote
         hasThreeSuitableQuotations(term, sense) or
         ( sense.getLanguage().isExtict() and hasOneContemporaneousQuotation(term, sense))

def hasThreeSuitableQuotations(term, sense):
  # The current implementation is extremely resource-wasting.

  allQuotations = Corpus.getAllQuotations(term) # A call of a super-mighty method    
  candidateQuotations = []

  # Filter all quotations by the requirements of being durably archived and used rather than mentioned
  for quotation in allQuotations:
    if isFromDurablyArchivedSource(quotation) and # Broken; a string alone does not tell its source.
       isUseRatherThanMention(quotation, term, sense) and
       quotation.getLanguage() == sense.getLanguage():
      candidateQuotations.append(quotation)

  # Remove dependent quotations
  for quotation in candidateQuotations:
    for quotation2 in candidateQuotations:
      if quotation2 == quotation1:
        break # Loop only up to the same quotation, checking only the previous ones.
      if isDependentQuotation(quotation, quotation2):
        candidateQuotations.remove(quotation)
        break

  # Determine time span
  if getTimeSpanInYears(candidateQuotations) < 1.0:
    return False

  return candidateQuotations.size() >= 3 # At least three suitable quotations

def isSemanticSumOfParts(term, sense):
  if hasNoSpaceAndNoHyphen(term):
    return True # This may be controversial for German.
  else:
    # It is a multi-word term.
    compoundSemantics = Semantics()
    for component in termComponents(term):
      compoundSemantics.expand(component.getSense())
    return sense.getSemantics() == compoundSemantics
    # The above is simplified but seems to capture the gist

def shouldProperNameBeIncludedGivenAttestation(term, sense):
  # Specific cases
  if isGivenName(term, sense) or isSurname(term, sense) or isPatronymic(term, sense):
    return True
  if isBrandOfProductOrService(term, sense):
    return shouldBrandOfProductOrServiceBeIncluded(term, sense)
  # The general case
  return True # Dummy return value; the general case is unimplemented for lacking consensus.

def shouldBrandOfProductOrServiceBeIncluded(term, sense):
  return False # Dummy return value; complex criteria that largely lead to exclusion.

def isDependentQuotation(quotation, quotation2):
  return verbatimOrNearVerbatimQuotation(quotation, quotation2) or
         verbatimOrNearVerbatimQuotationOfSingleOrigSource(quotation, quotation2) or
         quotation.getAuthor() == quotation2.getAuthor()

def isUseRatherThanMention(quotation, term, sense):
  return not termInLeftHandPartOfDef(term, quotation) and
         not re.match(".*is called " + term, quotation) and
         not re.match(".*" + term + " is a name", quotation) and         
         not False # Dummy: the rest
User:Dan Polansky/CFI.py

Navigation menu

Search