1. 쿠팡 상품 크롤링
from selenium import webdriver as wd
from selenium.webdriver.common.keys import Keys
import time
import os
keyword = '피부 유수분 측정기'
main_url = 'https://www.coupang.com/'
item_list = [ ]
driver = wd.Chrome("c:/python/chromedriver.exe")
driver.get(main_url)
driver.find_element_by_id("headerSearchKeyword").send_keys(keyword, Keys.RETURN)
# 파일 생성
wfile = open(os.getcwd() + f"/{keyword[0]}{keyword[1]}{keyword[2]}_item_result.txt", mode='w', encoding='utf-8')
try:
time.sleep(3)
boxItems = driver.find_elements_by_css_selector(' ul.search-product-list > li.search-product ')
# 상품 하나 하나 접근
for li in boxItems:
print('썸네일 : ', li.find_element_by_css_selector('img').get_attribute('src'))
wfile.write('썸네일 : ' + li.find_element_by_css_selector('img').get_attribute('src') + '\n')
print('상품명 : ', li.find_element_by_css_selector('div.name').text)
wfile.write('상품명 : ' + li.find_element_by_css_selector('div.name').text + '\n')
print('배송비 : ', li.find_element_by_css_selector('div.badges').text)
wfile.write('배송비 : ' + li.find_element_by_css_selector('div.badges').text + '\n')
print('가격 : ' , li.find_element_by_css_selector('strong').text)
wfile.write('가격 : ' + li.find_element_by_css_selector('strong').text + '\n')
print("=" * 50)
wfile.write("=" * 50 + '\n')
except Exception as e:
print('팝업메뉴는 제외')
finally:
wfile.close() # file close
driver.quit() # 웹브라우저 close, quit
import sys
sys.exit() # 파이썬 프로세스(프로그램) 을 종료
2. 인터투어 여행상품
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
main_url = "http://tour.interpark.com"
keyword = '스페인'
tour_list = []
driver = wd.Chrome("c:/python/chromedriver.exe")
driver.get(main_url)
# 검색창을 찾아서 검색어를 입력
driver.find_element_by_id("SearchGNBText").send_keys(keyword, Keys.RETURN)
# 검색 버튼을 클릭 <button class="search-btn" type="button" ~~
#driver.find_element_by_css_selector("button.search-btn").click()
# wait 필요 : 페이지를 로드하고 난 후, 바로 데이터를 찾는 것은 지양
# 명시적 대기 : 특정요소가 발견되거나 혹은 지정된 시간만큼 대기한다
# cloudflare 페이지도 고려(10초)를 한다
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located( (By.CLASS_NAME, 'searchAllBox') )
)
except Exception as e:
print('오류 발생',e)
# div.searchAllBox > div.moreBtnWrap > button.moreBtn
# .searchAllBox > .moreBtnWrap > .moreBtn
driver.find_element_by_css_selector('.searchAllBox > .moreBtnWrap > .moreBtn').click()
# 묵시적 대기
time.sleep(2)
# 파일 생성
wfile = open(os.getcwd() + "/tour_result_all1.txt", mode='w', encoding='utf-8')
try:
time.sleep(2)
# 전체 상품 목록
# div.searchAllBox > ul.boxList > li.boxItem
# .searchAllBox > .boxList > .boxItem
# .searchAllBox > .boxList > li
boxItems = driver.find_elements_by_css_selector('.searchAllBox > .boxList > .boxItem')
# 상품 하나 접근
for li in boxItems:
print('썸네일: ', li.find_element_by_css_selector('img').get_attribute('src'))
wfile.write('썸네일: ' + li.find_element_by_css_selector('img').get_attribute('src') + '\n')
print('상품명: ', li.find_element_by_css_selector('h5.infoTitle').text)
wfile.write('상품명: ' + li.find_element_by_css_selector('h5.infoTitle').text + '\n')
print('커멘트: ', li.find_element_by_css_selector('.info').text)
wfile.write('커멘트: ' + li.find_element_by_css_selector('.info').text + '\n')
print('가격: ', li.find_element_by_css_selector('.infoPrice').text)
wfile.write('가격: ' + li.find_element_by_css_selector('.infoPrice').text + '\n')
print("="*50)
wfile.write("="*50 + '\n')
except Exception as e:
print('팝업메뉴는 제외')
finally:
wfile.close() # file close
driver.quit() # 웹브라우저 close, quit
import sys
sys.exit() # 파이썬 프로세스(프로그램) 을 종료