reddit 긁어 오기
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
subreddits = [
"javascript",
"reactjs",
"reactnative",
"programming",
"css",
"golang",
"flutter",
"rust",
"django"
]
def get_info(data,select):
upvotes=data.find("div",{"class":"_1rZYMD_4xY3gRcSS3p8ODO"})
title=data.find("h3",{"class":"_eYtD2XCVieq6emjKBH3m"})
url=data.find("a",{"class":"SQnoC3ObvgnGjWt90zD9Z"})
if url is not None:
if upvotes:
upvotes=upvotes.get_text()
if title:
title=title.get_text()
if url:
url="https://reddit.com"+url.get('href')
return {"title" : title, "upvotes" : upvotes, "url" : url,"select" : select}
app = Flask("DayEleven")
@app.route("/")
def home():
return render_template("home.html",subreddits=subreddits)
@app.route("/read")
def read():
selects = request.args.to_dict().keys()
# input name : on
info = []
for select in selects:
res = requests.get(f"https://www.reddit.com/{select}/top/?t=month",headers=headers)
datas = BeautifulSoup(res.text,"html.parser").find_all("div",{"class":"_1oQyIsiPHYt6nx7VOmd1sz"})
for data in datas:
info.append(get_info(data,select))
# 방어코드
info = list(filter(None,info))
for i in info:
if type(i["upvotes"]) is str:
if i["upvotes"].find("k") != -1:
i["upvotes"] = int(float(i["upvotes"].replace("k",""))*1000)
else:
i["upvotes"] = int(i["upvotes"])
info_mixed=sorted(info, key=lambda vote: (vote["upvotes"]), reverse=True)
return render_template("read.html",lists = selects, info = info_mixed)
app.run(host="0.0.0.0")
templates
- home.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
<!DOCTYPE html>
<html>
<head>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1>Reddit Reader</h1>
<p>A service to aggregate all your favorite subreddits</p>
</header>
<main>
<form action="/read">
<h4>Select the subreddits you're interested on:</h4>
<ul>
{% for subreddit in subreddits %}
<li>
<input type="checkbox" id="{{subreddit}}" name="r/{{subreddit}}">
<label for="{{subreddit}}">r/{{subreddit}}</label>
</li>
{% endfor %}
</ul>
<button type="submit">Aggregate</button>
</form>
</main>
</body>
</html>
- read.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
<!DOCTYPE html>
<html>
<head>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1>
<a href="/">Reddit Reader</a>
</h1>
<h3>Reading:
{%for list in lists %}
{{list}}
{% endfor %}
</h3>
</header>
<main>
{% for i in info %}
<div>
<h3>
<a href="{{i.url}}" target="_blank">{{i.title}}</a>
</h3>
<h4>{{i.upvotes}} upvotes · {{i.select}}</h4>
<hr>
</div>
{% endfor %}
</main>
<footer>
<button type="button" onclick="location.href='/'">Back to Home</button>
</footer>
</body>
</html>
메모
-
가져온 내용이 json인지 html인지 구분해서 soup필요 여부 판단
-
어려웠던 부분
- 가져오는 사이트마다 구조가 다르다는 점
- None type 방어하는 방법
- 파이썬 문법