32
loading...
This website collects cookies to deliver better user experience
beautifulsoup
, requests
, lxml
libraries.Note: HTML layout might be changed in the future thus some of CSS
selectors might not work. Let me know if something isn't working.
pip install requests
pip install lxml
pip install beautifulsoup4
CSS
selectors because of select()
/select_one()
beautifulsoup
methods that accepts CSS
selectors. CSS
selectors reference.from bs4 import BeautifulSoup
import requests, lxml, json
for news_result in soup.select('#news-carousel .card'):
# further code..
CSS
selectors:from bs4 import BeautifulSoup
import requests, lxml, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
params = {
'q': 'dune 2021',
'source': 'web'
}
def get_organic_news_results():
html = requests.get('https://search.brave.com/search', headers=headers, params=params)
soup = BeautifulSoup(html.text, 'lxml')
data = []
for news_result in soup.select('#news-carousel .card'):
title = news_result.select_one('.title').text.strip()
link = news_result['href']
time_published = news_result.select_one('.card-footer__timestamp').text.strip()
source = news_result.select_one('.anchor').text.strip()
favicon = news_result.select_one('.favicon')['src']
thumbnail = news_result.select_one('.img-bg')['style'].split(', ')[0].replace("background-image: url('", "").replace("')", "")
data.append({
'title': title,
'link': link,
'time_published': time_published,
'source': source,
'favicon': favicon,
'thumbnail': thumbnail
})
print(json.dumps(data, indent=2, ensure_ascii=False))
get_organic_news_results()
---------------
# part of the output
'''
[
{
"title": "Zendaya talks potential 'Dune' sequel, what she admires about Tom ...",
"link": "https://www.goodmorningamerica.com/culture/story/zendaya-talks-potential-dune-sequel-admires-tom-holland-80555190",
"time_published": "17 hours ago",
"source": "goodmorningamerica.com",
"favicon": "https://imgr.search.brave.com/NygzuIHo7PzzX-7H4OjswMN4xwJ7u3_eEXq55_xXDog/fit/32/32/ce/1/aHR0cDovL2Zhdmlj/b25zLnNlYXJjaC5i/cmF2ZS5jb20vaWNv/bnMvZDQwMjIyNDJk/MjRjZGRmNjI4NmY2/NzUzY2I5YTkyMzIz/YTM4OTJiOTM3YjBm/NDk3OTVjNTIwOTY0/Nzg0YmUwYy93d3cu/Z29vZG1vcm5pbmdh/bWVyaWNhLmNvbS8",
"thumbnail": "https://imgr.search.brave.com/z-Za3HgnUCgTAP8vloSHS33eC0UkjIM8JsMdngGw_Rk/fit/200/200/ce/1/aHR0cHM6Ly9zLmFi/Y25ld3MuY29tL2lt/YWdlcy9HTUEvemVu/ZGF5YS1maWxlLWd0/eS1qZWYtMjExMDEz/XzE2MzQxMzkxNzQw/MjNfaHBNYWluXzE2/eDlfOTkyLmpwZw"
}
...
]
'''