Home crawling-task-1
Post
Cancel

crawling-task-1

flask로 웹 만들기

main.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from flask import Flask, render_template, request, redirect, send_file

# 만든 부분 불러오기
from scrapper import get_jobs
from exporter import save_to_file

app = Flask("SuperScrapper")

db = {}

@app.route("/")
def home():
  return render_template("potato.html")

@app.route("/report")
def report():
  word = request.args.get('word')
  if word:
    word = word.lower()
    existingJobs = db.get(word)
    if existingJobs:
      jobs = existingJobs
    else:
      jobs = get_jobs(word)
      db[word] = jobs
  else:
    return redirect("/")
  return render_template(
    "report.html",
    searchingBy=word,
    resultsNumber=len(jobs),
    jobs=jobs
  )

@app.route("/export")
def export():
  try:
    word = request.args.get('word')
    if not word:
      raise Exception()
    word = word.lower()
    jobs = db.get(word)
    if not jobs:
      raise Exception()
    save_to_file(jobs)
    return send_file('jobs.csv')
  except:
    return redirect("/")

app.run(host="0.0.0.0")  

/templates

  • potato.html
1
2
3
4
5
6
7
8
9
10
11
12
13
<!DOCTYPE html>
  <html>
    <head>
    <title>Job Search</title>
  </head>
  <body>
    <h1>Job Search</h1>
    <form action="/report" method="get">
      <input placeholder='Search for a job' required name="word">
      <button>Search</button>
    </form>
  </body>
</html>
  • report.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
<!DOCTYPE html>
<html>
  <head>
    <title>Job Search</title>
    <style>
        section {
          display:grid;
          gap:20px;
          grid-template-columns: repeat(4, 1fr);
        }
    </style>
  </head>
  <body>
    <h1>Search Resuls</h1>
    <h3>Found  results for: </h3>
    <a href="/export?word=">Export to CSV</a>
    <section>
      <h4>Title</h4>
      <h4>Company</h4>
      <h4>Location</h4>
      <h4>Link</h4>
      
    <section>
  </body>
</html>

exporter.py

1
2
3
4
5
6
7
8
9
import csv

def save_to_file(jobs):
  file = open("jobs.csv", mode="w")
  writer = csv.writer(file)
  writer.writerow(["Title", "Company", "Location", "Link"])
  for job in jobs:
    writer.writerow(list(job.values()))
  return

scrapper.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import requests
from bs4 import BeautifulSoup

def get_last_page(url):
    result = requests.get(url)
    soup = BeautifulSoup(result.text, "html.parser")
    pages = soup.find("div", {"class": "s-pagination"}).find_all("a")
    last_page = pages[-2].get_text(strip=True)
    return int(last_page)


def extract_job(html):
    title = html.find("h2").find("a")["title"]
    company, location = html.find("h3").find_all(
        "span", recursive=False)
    company = company.get_text(strip=True)
    location = location.get_text(
        strip=True).strip("-").strip(" \r").strip("\n")
    job_id = html['data-jobid']
    return {
        'title': title,
        'company': company,
        'location': location,
        "apply_link": f"https://stackoverflow.com/jobs/{job_id}"
    }


def extract_jobs(last_page, url):
    jobs = []
    for page in range(last_page):
        print(f"Scrapping SO: Page: {page}")
        result = requests.get(f"{url}&pg={page+1}")
        soup = BeautifulSoup(result.text, "html.parser")
        results = soup.find_all("div", {"class": "-job"})
        for result in results:
            job = extract_job(result)
            jobs.append(job)
    return jobs


def get_jobs(word):
    url = f"https://stackoverflow.com/jobs?q={word}&sort=i"
    last_page = get_last_page(url)
    jobs = extract_jobs(last_page, url)
    return jobs

메모

  • flask
    1
    2
    
      # return render_template("bbb.html", cc = word)
      # html 쪽에선  로 사용하면 값 가져다 사용 가능
    
This post is licensed under CC BY 4.0 by the author.