Re:오늘 검사 받아야할 마지막 문제 텍스트 마이닝 클래스 모듈을 답글로 올려주세요 ~

<pre style='color: rgb(169, 183, 198); font-family: "굴림체"; font-size: 12pt; background-color: rgb(43, 43, 43);'> from time import sleep import re from wordcloud import WordCloud, STOPWORDS # 워드 클라우딩 모듈 import matplotlib.pyplot as plt # 시각화 모듈 from os import path # 텍스트 파일을 불러오기 위한 open, path 하기 위해 os 임포트 class Text_mining(): def __init__(self): self.something = 0 def find_word(self): script = input(" 스크립트명을 입력하세요 ~ ") word = input(" 찾을 단어를 입력하세요 ~ ") file = open("D:/ITWILL_sw/python/data/" + script, "r") summation = 0 for winter_list in file: a = winter_list.split(' ') for b in a: summation += b.lower().count(word) msg = '\r %d 개' %(summation) print(' '*len(msg), end='') # print(' ', end='') print(msg, end='') sleep(.0001) def positive(self): filename = input( "스크립트를 입력하세요 ~ ") # positive 사전 불러오기 positive = [] sum = 0 for i in open("D:/ITWILL_sw/python/data/positive-words.txt", 'r'): # 긍정단어 리스트 만들기 positive.append((re.sub("\n", '', i)).upper()) print(positive) # posive 단어 리스트 만들고 및 갯 수 찾기 pos_list = [] for j in open("D:/ITWILL_sw/python/data/" + filename, 'r'): # 셜록홈즈 로딩 for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제 if k in positive: # 긍정단어 카운트 pos_list.append(k) pos_list = list(filter(lambda x: x != '', pos_list)) f = open("D:/ITWILL_sw/python/data/downloadfiles/output_pos.txt", "w") simplejson.dump(pos_list, f) f.close() # wordcloud = WordCloud(font_path='C://Windows//Fonts//arial.TTF', # 폰트 위치(거의 기본적으로 C://Windows//Fonts 안에 들어있습니다) # stopwords=STOPWORDS, background_color='white', # STOPWORDS 옵션은 공백/줄바꾸기 기준으로 단어를 추출해 냅니다 # width=1000, # background_color는 워드클라우드 배경색을 나타냅니다. 'black'으로하면 검은색이 됩니다. # height=800, # width와 height는 워드클라우드의 크기를 지정해 줍니다. # colormap='PuRd').generate("+".join(pos_list)) # colormap은 워드 색깔을 지정해주는데 첨부한 색감표를 사용하시면 됩니다. generate() 메소드는 # # font = ImageFont.truetype("arial.ttf", 15) # # 워드 클라우드를 생성합니다 # plt.figure(figsize=(13,13)) # matplotlib의 pyplot을 figsize로 생성합니다 # plt.imshow(wordcloud) # 워드 클라우드 이미지를 pyplot에 띄웁니다 # plt.axis("off") # pyplot에 x, y축 표시를 없앱니다. # plt.show() return pos_list, len(pos_list) def negative(self): filename = input("스크립트를 입력하세요 ~ ") # negative 사전 불러오기 negative = [] sum = 0 for i in open("D:/ITWILL_sw/python/data/negative-words.txt", 'r'): # 긍정단어 리스트 만들기 negative.append((re.sub("\n", '', i)).upper()) print(negative) # negative 단어 리스트 만들고 및 갯 수 찾기 neg_list = [] for j in open("D:/ITWILL_sw/python/data/" + filename, 'r'): # 셜록홈즈 로딩 for k in re.sub('[^A-z ]', '', re.sub("\n", '', j)).strip().upper().split(' '): # 셜록홈즈 텍스트 정제 if k in negative: # 긍정단어 카운트 neg_list.append(k) neg_list = list(filter(lambda x: x != '', neg_list)) f = open("D:/ITWILL_sw/python/data/downloadfiles/output_neg.txt", "w") simplejson.dump(neg_list, f) f.close() return neg_list, len(neg_list) def wordcloud(self): filename = input( "스크립트를 입력하세요 ~ ") d = path.dirname("D:/ITWILL_sw/python/data/downloadfiles\\") # 텍스트 파일이 있는 상위 디렉토리를 path로 지정 text = open(path.join(d, filename), mode="r", encoding="UTF-8").read() # 텍스트파일을 open 하는데 reading만 되게 (mode="r"), UTF-8 방식으로 불러옴(UTF-8) wordcloud = WordCloud(font_path='C://Windows//Fonts//arial.TTF', # 폰트 위치(거의 기본적으로 C://Windows//Fonts 안에 들어있습니다) stopwords=STOPWORDS, background_color='white', # STOPWORDS 옵션은 공백/줄바꾸기 기준으로 단어를 추출해 냅니다 width=1000, # background_color는 워드클라우드 배경색을 나타냅니다. 'black'으로하면 검은색이 됩니다. height=800, # width와 height는 워드클라우드의 크기를 지정해 줍니다. colormap='PuRd').generate(text) # colormap은 워드 색깔을 지정해주는데 첨부한 색감표를 사용하시면 됩니다. generate() 메소드는 # font = ImageFont.truetype("arial.ttf", 15) # 워드 클라우드를 생성합니다 plt.figure(figsize=(13,13)) # matplotlib의 pyplot을 figsize로 생성합니다 plt.imshow(wordcloud) # 워드 클라우드 이미지를 pyplot에 띄웁니다 plt.axis("off") # pyplot에 x, y축 표시를 없앱니다. return plt.show() # 워드 클라우드를 보여줍니다 def special_cnt(self): filename = input( " 스크립트를 입력하세요 ~ ") text_file = open("D:/ITWILL_sw/python/data/" + filename, "r") lines = text_file.readlines() words = 0 total = 0 for s in lines: for i in s: words += i.isdigit() + i.isalpha() + i.isspace() total += len(s) return total - words text_mining = Text_mining() text_mining.special_cnt()</pre>