User:SnowyCinema/findwords.py
(Redirected from User:PseudoSkull/findwords.py)
import re
file_to_process = open("file_to_process.txt", "r")
list_of_possible_entries = []
for line in file_to_process:
# takes out all symbols that should never appear in entries
line = line.rstrip()
line = re.sub('\[', ' ', line)
line = re.sub('\]', ' ', line)
line = re.sub('\(', ' ', line)
line = re.sub('\)', ' ', line)
line = re.sub('\{', ' ', line)
line = re.sub('\}', ' ', line)
line = re.sub('\/', ' ', line)
line = re.sub('\\\\', ' ', line)
line = re.sub('\!', ' ', line)
line = re.sub('\?', ' ', line)
line = re.sub('\…', ' ', line)
line = re.sub('\"', ' ', line)
line = re.sub('\:', ' ', line)
line = re.sub('\;', ' ', line)
line = re.sub('\>', ' ', line)
line = re.sub('\<', ' ', line)
line = re.sub('\|', ' ', line)
line = re.sub('\*', ' ', line)
line = re.sub('\•', ' ', line)
line = re.sub('\.', ' ', line)
line = re.sub('\—', ' ', line)
line = re.sub('\–', ' ', line)
line = re.sub('\,', ' ', line)
line = re.sub('\[', ' ', line)
line = re.sub('\=', ' ', line)
line = re.sub('--', ' ', line)
line = re.sub('”', ' ', line)
line = re.sub('“', ' ', line)
line = re.sub('@', ' ', line)
line = re.sub('_', ' ', line)
line = re.sub('#', ' ', line)
line = re.sub('©', ' ', line)
line = re.sub('\$', ' ', line)
line = re.sub('¢', ' ', line)
line = re.sub('™', ' ', line)
line = re.sub('®', ' ', line)
line = re.sub('♂', ' ', line)
line = re.sub('♀', ' ', line)
line = re.sub('⚥', ' ', line)
line = re.sub('\+', ' ', line)
line = re.sub('~', ' ', line)
line = re.sub('«', ' ', line)
line = re.sub('»', ' ', line)
line = re.sub('‹', ' ', line)
line = re.sub('›', ' ', line)
line = re.sub('\^', ' ', line)
line = re.sub("’", "'", line)
line = re.sub("‘", "'", line)
line = re.sub("⚪", "'", line)
line = line.split()
for item in line:
#checks for integers and floats
def RepresentsInt(x):
try:
int(x)
return True
except ValueError:
return False
def normal(x):
#checks for one-letter entries and duplicates
if x not in list_of_possible_entries and RepresentsInt(x) == False and not x.endswith("-"):
if len(x) > 1:
list_of_possible_entries.append(x)
# add lowercase item along with uppercase item
if x[0].isupper():
list_of_possible_entries.append(x.lower())
if x.isupper():
list_of_possible_entries.append(x.lower().capitalize())
normal(item)
if "-" in item:
unhyphenated = re.sub("-", "", item)
normal(unhyphenated)
withinhyphen = item.split("-")
for term in withinhyphen:
normal(term)
spacehyphen = re.sub('-', ' ', item)
normal(spacehyphen)
if item.endswith("'s"):
newapostropheitem = item[:-2]
normal(newapostropheitem)
if item.startswith("'") or item.endswith("'"):
noapostropheitem = item.strip("'")
normal(noapostropheitem)
list_of_possible_entries.sort(key=str.casefold)
generated_list = open("generated_list.txt", "w+")
for word in list_of_possible_entries:
l_en = "{{l|en|" + word + "}}\n\n"
generated_list.write(l_en)