첫번째
def Mj_Webscroll() : # url 페이지,검색어 부분을 {} 로 입력
from bs4 import BeautifulSoup
import re
import urllib.request
url_name = input('url without page and keyword ')
page_start = int(input('start page '))
page_end = int(input('end page '))+1
keyword = input('keyword ')
keyword = urllib.parse.quote('keyword')
parmas_pc=[]
parmas_mb=[]
for page in range (page_start,page_end):
list_url = str(url_name.format(page,keyword))
url = urllib.request.Request(list_url)
f = urllib.request.urlopen(url).read().decode("utf-8")
soup = BeautifulSoup( f , "html.parser")
for i in soup.select("div.inn > h3.board-list.h3.pc_only > a") :
parmas_pc.append(i.get("href"))
for i in soup.select("div.inn > h3.board-list.h3.mb_only > a") :
parmas_mb.append(i.get("href"))
print(parmas_pc)
print(len(parmas_pc))
print(parmas_mb)
print(len(parmas_mb))
Mj_Webscroll()
# 돌릴때 입력할 URL
https://search.hankookilbo.com/Search?Page={}&tab=NEWS&sort=relation&searchText={}&searchTypeSet=TITLE,CONTENTS&selectedPeriod=%EC%A0%84%EC%B2%B4&filter=head
두번째
def Mj_Webscroll() :
from bs4 import BeautifulSoup
import re
import urllib.request
keyword = urllib.parse.quote('인공지능')
parmas = []
for page in range (1,2) :
list_url = "https://search.hankookilbo.com/Search?Page={}&tab=NEWS&sort=recent&searchText={}&searchTypeSet=TITLE,CONTENTS&selectedPeriod=%EC%A0%84%EC%B2%B4&filter=head".format(page,keyword)
url = urllib.request.Request(list_url)
f = urllib.request.urlopen(url).read().decode("utf-8")
soup = BeautifulSoup( f , "html.parser")
for a in soup.find_all("div",class_="inn") :
for b in a.find_all("h3",class_="board-list h3 pc_only") :
for c in b.find_all("a") :
parmas.append(c.get("href"))
print(parmas)
print(len(parmas))
Mj_Webscroll()