class Words:
def __init__(self, the_text=""):
self.the_text = the_text
if (the_text !=""):
self.words = self.text_to_words()
else:
self.words = []
def setText(self, the_text):
self.the_text = the_text
def getWords(self):
return self.words
def text_to_words(self):
""" return a list of words with all punctuation and numbers removed,
and all in lowercase based on the given text string.
"""
my_substitutions = str.maketrans( # If you find any of these
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&()*+,-./:;<=>?@[]^_`{|}~'\\",
# Replace them by these
"abcdefghijklmnopqrstuvwxyz ")
# Translate the text now.
cleaned_text = self.the_text.translate(my_substitutions)
wds = cleaned_text.split()
return wds
def remove_dups(self):
""" Return a new list in which all duplicates
from the words, have been removed.
"""
temp=[]
for i in self.words:
if i not in temp:
temp.append(i)
return temp
from words import Words
class WordProcessor:
def __init__(self, filename):
self.filename = filename
self.vocabulary = []
self.wordcount = -1
def load_string_from_file(self):
""" Read words from filename, return a string
composed of the file content.
"""
f = open(self.filename, "r")
file_content = f.read()
f.close()
return file_content
def processText(self, text):
words = Words(text)
wds = words.getWords()
self.wordcount = len(wds)
self.vocabulary=words.remove_dups()
#TO-DO-BY-YOU to construct vocabulary
def getWordcount(self):
""" Return the number of words extracted from the file.
Note that the duplicate words are also counted
"""
return self.wordcount
def getVocabulary(self):
""" Return the vocabulary extracted from the file.
Note that there is no duplicate word contained in the vocabulary
"""
return self.vocabulary
#TO-DO-BY-YOU to return vocabulary
wp = WordProcessor("brooks.txt")
filecontent = wp.load_string_from_file()
wp.processText(filecontent)
print(wp.getWordcount())
print(wp.getVocabulary())