Comparing sensitive data, confidential files or internal emails?

Most legal and privacy policies prohibit uploading sensitive data online. Diffchecker Desktop ensures your confidential information never leaves your computer. Work offline and compare documents securely.

3주차 과제 비교

Created Diff never expires
9 removals
15 lines
23 additions
29 lines
## 웹 크롤링에 필요한 세팅: requests와 bs4 패키지
import requests
import requests
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)
data = requests.get('https://www.genie.co.kr/chart/top200?ditc=M&rtm=N&ymd=20210701',headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')
soup = BeautifulSoup(data.text, 'html.parser')


trs = soup.select('#body-content > div.newest-list > div > table > tbody > tr')
## 지니뮤직의 1~50위 곡의 순위/곡명/가수를 스크래핑해보자
# 순위
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.number
# 곡명
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.title.ellipsis
# 가수
#body-content > div.newest-list > div > table > tbody > tr:nth-child(1) > td.info > a.artist.ellipsis
# 공통부분:
#body-content > div.newest-list > div > table > tbody > tr


for tr in trs:

title = tr.select_one('td.info > a.title.ellipsis').text.strip()
# 최종 정리:
rank = tr.select_one('td.number').text[0:2].strip()
musics = list(soup.select("#body-content > div.newest-list > div > table > tbody > tr"))
artist = tr.select_one('td.info > a.artist.ellipsis').text
for music in musics:
rank = music.select_one("td.number").text[:2].replace('\n', ' ')
title = music.select_one("td.info > a.title.ellipsis").text.strip()
artist = music.select_one("td.info > a.artist.ellipsis").text.strip()

print(rank, title, artist)
print(rank, title, artist)