22
loading...
This website collects cookies to deliver better user experience
BaeutifulSoup
, Requests
, and lxml
libraries. An alternative API solution will be shown.import requests, lxml
from bs4 import BeautifulSoup
from serpapi import GoogleSearch
CSS
selectors using SelectorGadget Chrome extension.import requests, lxml
from bs4 import BeautifulSoup
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
params = {
"q": "spider man watch online",
"hl": "en",
"gl": "us",
}
html = requests.get("https://www.google.com/search", params=params, headers=headers)
soup = BeautifulSoup(html.text, 'lxml')
for result in soup.select('.JkUS4b'):
name = result.select_one('.i3LlFf').text
link = result['href']
price = result.select_one('.V8xno').text
print(f'{name}\n{link}\n{price}\n')
----------
'''
Hulu
https://www.hulu.com/watch/f82b95f5-13da-4acd-b378-7d3f6864919f
Premium subscription
Sling TV
https://watch.sling.com/1/program/ada829bcd452424d936dfd39a66a3f5e/watch?trackingid=google-feed
Premium subscription
...
'''
import json # just for pretty output
from serpapi import GoogleSearch
params = {
"api_key": "API_KEY",
"engine": "google",
"q": "spider man watch online",
"gl": "us",
"hl": "en", # country to use for the Google search
}
search = GoogleSearch(params)
results = search.get_dict()
for results in results['available_on']:
print(json.dumps(results, indent=2, ensure_ascii=False))
---------
'''
{
"name": "Hulu",
"link": "https://www.hulu.com/watch/f82b95f5-13da-4acd-b378-7d3f6864919f",
"price": "Premium subscription",
"thumbnail": "https://serpapi.com/searches/60ded28817f923a90145853c/images/c4dcca5efe0b92c0095874b9b1935aa7b3c0b903ab87dba3b1f2d3dea0f89733.png"
}
...
'''