class text_mining():
def find_word(self):
from time import sleep
txt=input('스크립트')
word=input('단어')
winter=open(txt,'r')
sum1=0
for winter_list in winter:
a=winter_list.split(' ')
for b in a:
sum1=sum1+b.lower().count(word)
msg='\r찾은 개수: %d' %(sum1)
print(msg,end='')
sleep(0.00001)
def list_word(self):
import re
import simplejson
file = input('스크립트')
word = []
for j in open('e:\data\\' + file, 'r'): # 파일 로딩
for k in re.sub('[^A-z ]', '', re.sub('\n', '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제
word.append(k)
word = list(filter(lambda x: x != '', word))
f = open('e:\data\output.txt', 'w')
simplejson.dump(word, f)
f.close()
def positve(self):
import re
import simplejson
positive = []
sum = 0
word = []
txt=input('스크립트')
for i in open("e:\data\positive-words.txt", 'r'): # 긍정단어 리스트 만들기
positive.append((re.sub("\n", '', i)).upper())
for j in open(txt, 'r'): # 셜록홈즈 로딩
for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제
if k in positive: # 긍정단어 카운트
word.append(k)
sum = sum + 1
print('{}개의 긍정단어'.format(sum))
word=list(filter(lambda x:x!='',word))
f=open('e:\data\out_pos.txt','w')
simplejson.dump(word,f)
f.close()
def negative(self):
import re
import simplejson
negative = []
sum = 0
word = []
txt=input('스크립트')
for i in open("e:\data\\negative-words.txt", 'r'): # 긍정단어 리스트 만들기
negative.append((re.sub("\n", '', i)).upper())
for j in open(txt, 'r'): # 셜록홈즈 로딩
for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제
if k in negative: # 긍정단어 카운트
word.append(k)
sum = sum + 1
print('{}개의 부정단어'.format(sum))
word=list(filter(lambda x:x!='',word))
f=open('e:\data\out_neg.txt','w')
simplejson.dump(word,f)
f.close()
def show_cloud(self):
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
from os import path
file = input('워드 클라우드를 그릴 텍스트 파일을 입력하세요 ~ ')
d = path.dirname('e:\data\\')
text = open(path.join(d, file), mode='r', encoding='UTF-8').read()
wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white',
width=1000, height=800, colormap='PuRd').generate(text)
plt.figure(figsize=(13, 13))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
def special_cnt(self):
file = input('스크립트를 입력하세요 ~ ')
text_file = open('e:\data\\' + file, 'r')
lines = text_file.readlines()
total1 = 0
total2 = 0
for i in lines:
cnt = sum(j.isalpha() for j in i)
cnt += sum(j.isdigit() for j in i)
cnt += sum(j.isspace() for j in i)
total1 += len(i)
total2 += cnt
print(total1 - total2)
tp=text_mining()