DataAnalysis/웹크롤링

카페베이 coffeebay 웹크롤링 매장정보 가져오기 (beautifulsoup)

_DAMI 2022. 4. 22. 22:55
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

result = []
#커피베이
for pageNum in range(1,34):
    url = 'https://www.coffeebay.com/home/store/store_area?sido=&gugun=&sc_column=&sc_string=&svc_wifi_at=&svc_catering_at=&svc_event_at=&svc_24hr_at=&svc_smok_at=&svc_park_at=&svc_kidsrm_at=&svc_bookrm_at=&svc_pwdrrm_at=&svc_meetrm_at=&svc_bizrm_at=&per_page=%s' %str(pageNum+20)
    response = requests.get(url)

    print('-------------------------- " ', pageNum)

    if response.status_code == 200:
        html = response.content
        soup = BeautifulSoup(html, 'html.parser',from_encoding='cp949')

        tbody = soup.find('tbody')
        tr_list=tbody.find_all('tr')
        for tr in tr_list:

            loc = tr.find('td', class_="only-mobile-hidden").text  # 지역
            name = tr.find('td', class_="mobile-store").text  # 지점명
            addr = tr.find('a', class_="address").text  # 주소
            tel = tr.find('td', class_="mobile-tel").text  # 번호
            result.append([loc, name, addr, tel])
            print(loc, name, addr, tel)

    else :
        print(response.status_code)

pdResult = pd.DataFrame(result, columns=['loc', 'name', 'addr', 'tel'])
pdResult.to_csv('caffenbay_stores.csv', encoding='cp949')

acount=pdResult.groupby(['loc']).describe()
print(acount.iloc[:,0:1])