========================================
== nltk.corpus.stopwords.words('english')
----------------------------------------
mustn more when now m their d too we her
couldn't ours with most were and off hasn haven him
shan't aren you've during being ve mightn should've because doesn't
its yours haven't should couldn didn't do hadn't s just
about she you'd for myself so other am had which
ourselves doesn who my did to won't have again at
all these few before wasn wasn't that very by needn
there or this doing further of re shan each an
itself shouldn't ain be weren same why ll only once
then above you'll mightn't isn't was where been our under
not if i no own your over are in you
can out having it's than hadn how you're me here
a y between isn does that'll both yourselves mustn't such
yourself until ma hasn't into whom don is some o
through herself needn't t shouldn what down himself against any
but won as she's themselves from on below aren't the
they wouldn theirs will weren't hers don't them his didn
up while has wouldn't he after it nor those
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
sentences_with_stopwords = [
"I am a boy",
"you are a girl",
"he is a man",
]
# 우선 word tokenize
sentences_with_stopwords = [word_tokenize(s.lower().strip())
for s in sentences_with_stopwords]
# stopword에 포함되는 word들은 모두 제외함.
sentences_without_stopwords = [
[w for w in w_l if w not in stopwords.words('english')]
for w_l in sentences_with_stopwords
]
print("=="*20)
for s1, s2 in zip(sentences_with_stopwords, sentences_without_stopwords):
print(f"{s1} => {s2}")
print("=="*20)
========================================
['i', 'am', 'a', 'boy'] => ['boy']
['you', 'are', 'a', 'girl'] => ['girl']
['he', 'is', 'a', 'man'] => ['man']
========================================
댓글남기기