import urllib.request as rq
from bs4 import BeautifulSoup
def start(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',}
url = rq.Request(url, headers = headers)
res = rq.urlopen(url).read()
return BeautifulSoup(res, "html.parser")
def fetch_list_url(url, tag):
global result
soup = start(url)
for link in soup.select(tag[0]):
result.append(link.get_text())
#print('link =' ,link['href'], link.get_text())
news_fetch(link['href'], tag)
def news_fetch(url, tag):
soup = start(url)
for link in soup.select(tag[1]):
result.append(link.get_text())
result = []
url_list = ['http://search.daum.net/search?w=news&q={search}&spacing=0&p={page}&cp=16bfGN9mQcFhOx4F5l&cpname=%EA%B2%BD%ED%96%A5%EC%8B%A0%EB%AC%B8', ["#clusterResultUL > li > div.wrap_cont > div > div > a", "#container > div.main_container > div.art_cont > div.art_body > p"]]
search_text = str(input("검색어를 입력하세요 : ").encode("utf-8"))[2:-1].replace('\\x','%')
for i in range(1,2):
url, tag = url_list[0].format(search=search_text, page=i), url_list[1]
fetch_list_url(url, tag)
print(result)