import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import math
# ChromeOptions 객체를 생성합니다.
options = Options()
# Chrome driver의 경로를 넣어줍니다.
driver = webdriver.Chrome(service=Service('chromedriver.exe'), options=options)
# 페이지 수
url = ' https://www.kbchachacha.com/public/search/main.kbc#!?_menu=buy&page=1&sort=-orderDate&makerCode=101&classCode=1101'
driver.get(url)
time.sleep(5)
soup = BeautifulSoup(driver.page_source, 'lxml')
page_num = math.floor(int(soup.find("span", attrs={"class": "__total"}).get_text().replace(',', ''))/25)
# 코드가 실행되는 동안 기다릴 최대 시간을 설정합니다.
wait = WebDriverWait(driver, 10)
url = [(' https://www.kbchachacha.com/public/search/main.kbc#!?_menu=buy&page= {}&sort=-orderDate&makerCode=101&classCode=1101', 1)]
#url = [(' https://www.kbchachacha.com/public/search/main.kbc#!?_menu=buy&page= {}&sort=-orderDate&makerCode=101&classCode=1101', page_num)]
urls = []
# 페이지 URL을 생성합니다.
for i in url:
page = i[1]
for j in range(1, page+1):
url = i[0].format(str(j))
urls.append(url)
df_cars = [] # 데이터를 저장할 리스트 생성
# 차량 정보 칼럼명
info = ["이름", "차량번호", "링크", "연식", "주행거리", "연료", "배기량", "색상", "보증정보", "가격", "신차대비가격"]
# 옵션 칼럼명
coloptions = ["옵션_선루프", "옵션_파노라마선루프", "옵션_열선앞", "옵션_열선뒤", "옵션_전방센서", "옵션_후방센서", "옵션_전방캠", "옵션_후방캠", "옵션_어라운드뷰", "옵션_네비순정"]
# 보험 및 사고 정보 칼럼명
colacci_info1 = ["보험이력등록", "소유자변경횟수", "사고상세_전손", "사고상세_침수전손", "사고상세_침수분손", "사고상세_도난", "보험_내차피해(횟수)", "보험_내차피해(가격)", "사고상세_타차가해(횟수)", "보험_내차피해(가격)"]
cols = info + coloptions + colacci_info1 # 모든 칼럼명을 합칩니다.
# 각 URL에서 차량 정보를 추출합니다.
for url in urls:
driver.get(url)
time.sleep(5) # 페이지가 완전히 로드되기를 기다립니다.
soup = BeautifulSoup(driver.page_source, "lxml")
cars = soup.find_all("div", attrs={"class": "area"})
# 페이지수 마지막쪽으로 가면 제대로 판매준비중 차량이 있음. 그런 차량이 나올시 종료
try:
cars = soup.find("div", attrs={"class": "cs-list02 cs-list02--ratio small-tp generalRegist"}).find_all("div", attrs={"class": "area"})
except:
continue
links = []
#print(cars)
#한 URL에서 모든 차량에 대해 실행합니다.
for car in cars:
if 'data-car-seq=' in str(car):
link = "https://www.kbchachacha.com/" + car.a["href"]
links.append(link)
#print(links)
else:
continue
for link in links:
print(link)
driver.get(link)
time.sleep(2)
soup2 = BeautifulSoup(driver.page_source, "lxml")
# infobox = soup2.find("div", attrs={"class": "info-util box"})
# try:
# ratiopr = infobox.find("b")
# except:
# continue
name = soup2.find("strong", attrs={"class": "car-buy-name"})
guarn = soup2.find("div", attrs={"class": "box-txt"})
state = soup2.find("table", attrs={"class": "detail-info-table"})
carnumber = state.find("th", text = '차량정보').find_next_sibling("td")
year = state.find("th", text='연식').find_next_sibling("td")
km = state.find("th", text='주행거리').find_next_sibling("td")
fuel = state.find("th", text='연료').find_next_sibling("td")
amount = state.find("th", text='배기량').find_next_sibling("td")
color = state.find("th", text='색상').find_next_sibling("td")
price = soup2.find("div", attrs={"class": "car-buy-price"}).find('dd').find('strong')
# option_table = soup2.find("div", attrs={"class": "tbl-option"})
# checkoptions = []
# if option_table.find("th", text='외관') != None:
# #for option in findoptions:
# #checkoptions.append(option_check(option_table, option))
# print(1)
# else:
# checkoptions = ['']*len(coloptions)
# if infobox.find("span", attrs={"class": "round-ln insurance"}).find_next("i").find_next("em") == None:
# acc1 = '미등록'
# else:
# acc1 = '등록'
# findacci_info1 = []
# try:
# if acc1 == '등록':
# acc1table = soup2.find("div", attrs={"class": "info-insurance"})
# insurdt1 = acc1table.find("th", text="차량번호/소유자변경").find_next_sibling("td").get_text()[-2]
# insuraccis1 = acc1table.find("th", text="자동차보험 특수사고").find_next_sibling("td").get_text().split('/')
# insurdt2 = insuraccis1[0][-2]
# insurdt3 = insuraccis1[1][-2]
# insurdt4 = insuraccis1[2][-2]
# insurdt5 = insuraccis1[3][-1]
# insuraccis2 = acc1table.find("th", text="보험사고(내차피해)").find_next_sibling("td").get_text().split('회')
# insurdt6 = insuraccis2[0]
# insurdt7 = insuraccis2[1][2:-2]
# insuraccis3 = acc1table.find("th", text="보험사고(타차가해)").find_next_sibling("td").get_text().split('회')
# insurdt8 = insuraccis3[0]
# insurdt9 = insuraccis3[1][2:-2]
# findacci_info1 = [insurdt1, insurdt2, insurdt3, insurdt4, insurdt5, insurdt6, insurdt7, insurdt8, insurdt9]
# else:
# findacci_info1 = ['']*(len(colacci_info1)-1)
# except:
# findacci_info1 = [''] * (len(colacci_info1)-1)
# temp = [name.get_text(), carnumber.get_text(), link, year.get_text(), km.get_text(), fuel.get_text(), amount.get_text(),
# color.get_text(), guarn.get_text(), price.get_text(), ratiopr.get_text()] + checkoptions + [acc1] + findacci_info1
# df_cars.append(temp)
# 추출한 데이터를 DataFrame으로 변환합니다.
df_cars = pd.DataFrame(data=df_cars, columns=cols)
# DataFrame을 CSV 파일로 저장합니다.
df_cars.to_csv('cars.csv')
driver.quit() # 웹드라이버를 종료합니다.