hübsche Wordcloud
Und hier zeige ich nochmal wie man die Wordcloud von den Styles her anpassen kann, um es ein bisschen hübscher zu machen.
Plus zeige ich wie man eine Mask benutzt.
def main():
f = open("books/lafcadio.txt", "r")
book = f.read()
f.close()
words = nltk.tokenize.word_tokenize(book)
stopwords = nltk.corpus.stopwords.words("english")
# filtered_words = []
# for word in words:
# if (word not in stopwords):
# filtered_words.append(word)
## Filtering Step
words = [word.lower() for word in words]
words = [word for word in words if word not in stopwords]
# words = [word for word in words if not word.isalpha()]
words = [word for word in words if word not in string.punctuation]
words = [word for word in words if any(c.isalpha() for c in word)]
words = [word for word in words if word[0].isalpha()]
# words = [word for word in words if len(word) == 2 and word[0].isalpha() and word[1] in string.punctuation]
## Stemming Step
stemmer = nltk.stem.PorterStemmer()
words = [stemmer.stem(word) for word in words]
text = " ".join(words)
cloud_mask = np.array(Image.open("assets/cloud.png"))
wordcloud = WordCloud(
mask = cloud_mask,
contour_color = "grey",
contour_width = 10,
background_color = "white",
height = 1000,
width = 1000,
max_words = 200,
# font_path = "assets/RubikSprayPaint-Regular.ttf",
random_state = 5,
# min_font_size = 40,
# max_font_size = 200,
# font_step = 10,
# include_numbers = True
colormap = colormaps["tab20"],
).generate(text).to_file("output/cloud_pretty.png")
plt.imshow(wordcloud)
plt.show()