概要
このスクリプトはkw_list.txtの各行に書かれた検索語について,Googleの検索1位のタイトルとURLを取得し,CSVファイル(url_list.csv)を返します.
スクリプト
import requestsfrom bs4 import BeautifulSoupdef scraping (search_word ): url = f'https://www.google.co.jp/search?hl=ja&q={search_word} ' request = requests.get(url) soup = BeautifulSoup(request.text, "html.parser" ) search_site_list = soup.select('div.kCrYT > a' ) return search_site_list def output (search_site_list, pages_num = 1 ): for rank, site in zip (range (1 , pages_num + 1 ), search_site_list): try : site_title = site.select('h3.zBAuLc' )[0 ].text except IndexError: site_title = '' site_url = site['href' ].replace('/url?q=' , '' ) with open ('./url_list.csv' , 'a' , encoding='shift-jis' ) as f: print (site_title + ", " + site_url, file=f) with open ('./kw_list.txt' , 'r' , encoding='utf-8' ) as f: for line in f: if line == '' : continue search_site_list = scraping(search_word = line) output(search_site_list)