Re:오늘 검사 받아야할 마지막 문제 텍스트 마이닝 클래스 모듈을 답글로 올려주세요 ~

<pre style="background-color:#2b2b2b;color:#a9b7c6;font-family:'Consolas';font-size:11.3pt;"><pre style="font-family: Consolas; font-size: 11.3pt;">class text_mining(): def __init__(self): print('mining package') def find_word(self): from time import sleep filepath = input("파일의 위치를 입력하세요: ") word = input("찾고 싶은 단어를 입력하세요: ") file = open(filepath) cnt = 0 for i in file: a = i.split() for n in a: cnt += n.lower().count(word.lower()) num = '\r찾은 갯수 %d ' % cnt + '\n' print(num, end='') def positive_cnt(self): import re import simplejson positive = [] sum = 0 for i in open("./mypackage/positive-words.txt", 'r'): # 긍정단어 리스트 만들기 positive.append((re.sub("\n", '', i)).upper()) # print(positive) pos_list = [] pos_num = 0 sherlock=[] filepath = input("파일 경로를 입력하세요: ") for j in open(filepath): # 셜록홈즈 로딩 for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제 if k != '': sherlock.append(k) if k in positive: # 긍정단어 카운트 pos_list.append(k) sum = sum + 1 f = open('./scriptkword.txt','w') for i in sherlock: f.write(i + '\n') f.close() f = open('./pos_word.txt', 'w') for i in pos_list: f.write(i + '\n') f.close() print(sum) # print(sherlock) def negative_cnt(self): import re negative = [] sum = 0 for i in open("./mypackage/negative-words.txt", 'r'): # 긍정단어 리스트 만들기 negative.append((re.sub("\n", '', i)).upper()) # print(positive) pos_list = [] pos_num = 0 sherlock = [] filepath = input("파일 경로를 입력하세요: ") for j in open(filepath): # 셜록홈즈 로딩 for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제 if k != '': sherlock.append(k) if k in negative: # 부정단어 카운트 pos_list.append(k) sum = sum + 1 f = open('./sherlockword.txt', 'w') for i in sherlock: f.write(i + '\n') f.close() f = open('./neg_word.txt', 'w') for i in pos_list: f.write(i + '\n') f.close() print(sum) def show_cloud(self): from wordcloud import WordCloud, STOPWORDS # 워드 클라우딩 모듈 import matplotlib.pyplot as plt # 시각화 모듈 from os import path # 텍스트 파일을 불러오기 위한 open, path 하기 위해 os 임포트 filepath = input("파일이 있는 상위 경로를 입력하세요: ") d = path.dirname(filepath) # 텍스트 파일이 있는 상위 디렉토리를 path로 지정 filename = input("파일 이름을 입력하세요: ") text = open(path.join(d, filename), mode="r", encoding="ANSI").read() # 텍스트파일을 open 하는데 reading만 되게 (mode="r"), UTF-8 방식으로 불러옴(UTF-8) wordcloud = WordCloud( # 폰트 위치(거의 기본적으로 C://Windows//Fonts 안에 들어있습니다) stopwords=STOPWORDS, background_color='white', # STOPWORDS 옵션은 공백/줄바꾸기 기준으로 단어를 추출해 냅니다 width=1000, # background_color는 워드클라우드 배경색을 나타냅니다. 'black'으로하면 검은색이 됩니다. height=800, # width와 height는 워드클라우드의 크기를 지정해 줍니다. colormap='Blues').generate(text) # colormap은 워드 색깔을 지정해주는데 첨부한 색감표를 사용하시면 됩니다. generate() 메소드는 # 워드 클라우드를 생성합니다 plt.figure(figsize=(13, 13)) # matplotlib의 pyplot을 figsize로 생성합니다 plt.imshow(wordcloud) # 워드 클라우드 이미지를 pyplot에 띄웁니다 plt.axis("off") # pyplot에 x, y축 표시를 없앱니다. plt.show() # 워드 클라우드를 보여줍니다 def special_cnt(self): filepath = input("파일의 위치를 입력하세요: ") text_file = open(filepath, 'r') lines = text_file.readlines() total = 0 for s in lines: for i in s: total += 1 print(total) totalalpha = 0 totalnum = 0 totalspace = 0 for s in lines: worda = sum(i.isalpha() for i in s) wordd = sum(i.isdigit() for i in s) words = sum(i.isspace() for i in s) totalalpha += worda totalnum += wordd totalspace += words c = total - (totalnum + totalalpha + totalspace) print(c)</pre><pre style="font-family: Consolas; font-size: 11.3pt;"> </pre><pre style="font-family: Consolas; font-size: 11.3pt;">#### 사용법 ####</pre><pre style="font-family: Consolas; font-size: 11.3pt;"> </pre><pre style="font-family: Consolas; font-size: 11.3pt;">from mypackage.text_mining import text_mining # mypackage 라는 이름으로 패키지를 생성# text_mining.py 파일 안에 text_mining 클래스 생성 a = text_mining() a.find_word() # d:/data/Sherlock.txt a.positive_cnt() # d:/data/Sherlock.txt a.negative_cnt() # d:/data/Sherlock.txt a.show_cloud() # d :/data/ # pos_word.txt or neg_word.txt a.special_cnt() # d:/data/Sherlock.txt</pre></pre>