from nltk.corpus import movie_reviews
sentences = [list(s) for s in movie_reviews.sents()]
from gensim.models.word2vec import Word2Vec
model = Word2Vec(sentences, size=100)
# 여기서 사이즈는 vector의 크기를 말합니다. nn의 최종 layer size라고 생각해도 됩니다.
# 뉴럴넷의 사이즈를 좀 깊게 만들어보고 싶은데, gensim에서 자동으로 해주는지 모르겠네요.
model.init_sims(replace=True)# 학습 완료 후, 필요없는 메모리 삭제
print("vector of {}:".format('man'))
print(model.wv['man'])
for w1, w2 in [('actor', 'actress'), ('man', 'woman')]:
print("similarity of {} and {}:".format(w1, w2))
print(model.wv.similarity(w1, w2))
print()
print(model.wv.most_similar('man'))
print()
# actor + she - actress
print(model.wv.most_similar(positive=['actor', 'she'], negative='actress', topn=1))
# actress + he - actor
print(model.wv.most_similar(positive=['actress', 'he'], negative='actor', topn=1))
vector of man:
[ 0.0961192 0.00437185 0.03467375 0.26454127 0.02940031 0.02288906
0.02179242 0.02122602 -0.04023347 0.01882667 -0.02628091 0.03881754
-0.06698899 0.14932805 -0.0474763 0.02154444 -0.14287125 0.21027721
-0.02145271 -0.02302506 0.03378444 0.0153384 0.07713746 0.15382759
0.12019698 -0.1113359 -0.0593212 -0.19330981 0.00104285 -0.08455317
0.01631272 0.05338057 -0.05085417 -0.00311475 0.00962412 0.12170225
-0.09533398 0.09420326 -0.03652238 -0.13428254 -0.13235193 0.12731819
0.06866247 0.01878783 0.07961938 -0.02594641 0.1502616 -0.09824947
0.11498301 0.03205505 -0.13049409 -0.11235102 0.18670972 0.20496006
0.11508316 -0.08224259 0.07353425 0.05081542 0.07457628 -0.06560689
0.06979878 0.00345675 0.15629113 0.11683072 -0.07166454 -0.08964456
0.0704189 0.01116451 -0.07762457 -0.15145063 -0.23548466 0.17004918
-0.08410266 0.05517555 -0.12813565 0.08467376 0.15465082 -0.01966195
-0.126249 0.05783306 -0.10403755 -0.01165899 -0.19907828 0.07758326
-0.05870183 -0.04015943 0.01824598 -0.01141299 0.18254025 -0.01973889
-0.06274197 -0.03590406 -0.10662002 0.10698724 0.0145605 -0.12283614
-0.0986573 0.08289269 0.02273848 -0.09652561]
similarity of actor and actress:
0.871707394306
similarity of man and woman:
0.907711044147
[('woman', 0.9077110886573792), ('girl', 0.8656805157661438), ('boy', 0.8102990984916687), ('child', 0.8080374598503113), ('killer', 0.8013476729393005), ('doctor', 0.7803214192390442), ('kid', 0.7172398567199707), ('secret', 0.7040114998817444), ('person', 0.7038406729698181), ('guy', 0.6979759931564331)]
[('he', 0.3257312476634979)]
[('she', 0.3937234878540039)]
댓글남기기