概要

このスクリプトはkw_list.txtの各行に書かれた検索語について,Googleの検索1位のタイトルとURLを取得し,CSVファイル(url_list.csv)を返します.

スクリプト

import requests
from bs4 import BeautifulSoup

def scraping(search_word):
url = f'https://www.google.co.jp/search?hl=ja&q={search_word}'
request = requests.get(url)
soup = BeautifulSoup(request.text, "html.parser")
search_site_list = soup.select('div.kCrYT > a')
return search_site_list

def output(search_site_list, pages_num = 1):
for rank, site in zip(range(1, pages_num + 1), search_site_list):
try:
site_title = site.select('h3.zBAuLc')[0].text
except IndexError:
site_title = ''
site_url = site['href'].replace('/url?q=', '')
with open('./url_list.csv', 'a', encoding='shift-jis') as f:
print(site_title + ", " + site_url, file=f)

with open('./kw_list.txt', 'r', encoding='utf-8') as f:
for line in f:
if line == '':
continue
search_site_list = scraping(search_word = line)
output(search_site_list)