crawling-task-6

알바 정보 csv로 만들기

import os
import csv
import requests
from bs4 import BeautifulSoup

os.system("clear")
alba_url = "http://www.alba.co.kr"

home_res = requests.get(alba_url)
links = BeautifulSoup(home_res.text, "html.parser").find('div',{'id':'MainSuperBrand'}).find_all('a',{'class': 'goodsBox-info'})

for a in links:
  href = a.attrs['href']
  # 방어코드
  file_name = a.find('span',{'class':'company'}).get_text().replace('/',',')

  file = open(file_name, mode="w")
  writer = csv.writer(file)
  writer.writerow(["place", "title", "time", "pay", "date"])
  
  each_page = requests.get(href)
  contents = BeautifulSoup(each_page.text, "html.parser").find('tbody').find_all('tr',{'class': ''})
  for content in contents:
    # 방어코드
    bplace = content.find('td',{'class':'local'})
    if bplace:
      place= bplace.get_text()
      title = content.find('span',{'class':'company'}).get_text()
      TIS = content.find('td',{'class':'data'}).find('span').get_text()
      paystd = content.find('td',{'class':'pay'}).find('span',{'class':'payIcon'}).get_text()
      payunit = content.find('td',{'class':'pay'}).find('span',{'class':'number'}).get_text()
      pay = paystd + payunit
      post = content.find('td',{'class':'regDate'}).get_text()
      writer.writerow([place,title,TIS,pay,post])

메모

에러가 나는 부분에 방어 코드 작성
- 같은 html 태그와 패턴으로 작성된 페이지에서
  내용도 같은데 에러가 나는 이유를 모르겠는 부분도 있었다
csv파일명으로 입력 불가한 문자를 js의 정규식처럼 방어하는 것도 좋을 것 같다
- 이번 경우 명확하게 에러가 나는 문자가 있으므로 간단하게 replace로 해결
확장자명 실수 확인
- => file = open(f”{file_name}.csv”, mode=”w”)

crawling-task-6

알바 정보 csv로 만들기

메모

Further Reading

crawling-task-1

crawling-task-2

crawling-task-3

Trending Tags