42
loading...
This website collects cookies to deliver better user experience
The process of computationally identifying and categorizing opinions expressed in a piece of text, especially in order to determine whether the writer's attitude towards a particular topic, product, etc. is positive, negative, or neutral.
import requests
import pandas
from bs4 import BeautifulSoup
import numpy as np
df = pandas.read_csv('sentiment.csv')
sen = df['word']
cat = df['sentiment']
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
unwanted = ['BBC World News TV', 'BBC World Service Radio', 'News daily newsletter', 'Mobile app', 'Get in touch']
news = []
import requests
import pandas
from bs4 import BeautifulSoup
import numpy as np
df = pandas.read_csv('sentiment.csv')
sen = df['word']
cat = df['sentiment']
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
unwanted = ['BBC World News TV', 'BBC World Service Radio', 'News daily newsletter', 'Mobile app', 'Get in touch']
news = []
neutral = []
bad = []
good = []
for x in headlines:
if x.text.strip() not in unwanted and x.text.strip() not in news:
news.append(x.text.strip())
import requests
import pandas
from bs4 import BeautifulSoup
import numpy as np
df = pandas.read_csv('sentiment.csv')
sen = df['word']
cat = df['sentiment']
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
unwanted = ['BBC World News TV', 'BBC World Service Radio', 'News daily newsletter', 'Mobile app', 'Get in touch']
news = []
neutral = []
bad = []
good = []
for x in headlines:
if x.text.strip() not in unwanted and x.text.strip() not in news:
news.append(x.text.strip())
if x.text.strip() not in unwanted and x.text.strip() not in news:
condition.for i in range(len(df['n'])):
if sen[i] in x.text.strip().lower():
if cat[i] == 0:
bad.append(x.text.strip().lower())
else:
good.append(x.text.strip().lower())
for i in range(len(df["n"])):
loop makes sure to search the headlines for any of the words in the sentiment.csv dataset.if sen[i] in x.text.strip().lower():
condition, the if cat[i] == 0:
condition then finds if the found word has a negative or positive sentiment and adds the headline to either the bad or good array.lower()
function converts all the letters inside the headlines to lowercase. This is done because the word search algorithm is case sensitive.import requests
import pandas
from bs4 import BeautifulSoup
import numpy as np
df = pandas.read_csv('sentiment.csv')
sen = df['word']
cat = df['sentiment']
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
unwanted = ['BBC World News TV', 'BBC World Service Radio', 'News daily newsletter', 'Mobile app', 'Get in touch']
news = []
neutral = []
bad = []
good = []
for x in headlines:
if x.text.strip() not in unwanted and x.text.strip() not in news:
news.append(x.text.strip())
for i in range(len(df['n'])):
if sen[i] in x.text.strip().lower():
if cat[i] == 0:
bad.append(x.text.strip().lower())
else:
good.append(x.text.strip().lower())
badp = len(bad)
goodp = len(good)
nep = len(news) - (badp + goodp)
print('Scraped headlines: '+ str(len(news)))
print('Headlines with negative sentiment: ' + str(badp) + '\nHeadlines with positive sentiment: ' + str(goodp) + '\nHeadlines with neutral sentiment: ' + str(nep))
import requests
import pandas
from bs4 import BeautifulSoup
import numpy as np
df = pandas.read_csv('sentiment.csv')
sen = df['word']
cat = df['sentiment']
url='https://www.bbc.com/news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
headlines = soup.find('body').find_all('h3')
unwanted = ['BBC World News TV', 'BBC World Service Radio', 'News daily newsletter', 'Mobile app', 'Get in touch']
news = []
neutral = []
bad = []
good = []
for x in headlines:
if x.text.strip() not in unwanted and x.text.strip() not in news:
news.append(x.text.strip())
for i in range(len(df['n'])):
if sen[i] in x.text.strip().lower():
if cat[i] == 0:
bad.append(x.text.strip().lower())
else:
good.append(x.text.strip().lower())
badp = len(bad)
goodp = len(good)
nep = len(news) - (badp + goodp)
print('Scraped headlines: '+ str(len(news)))
print('Headlines with negative sentiment: ' + str(badp) + '\nHeadlines with positive sentiment: ' + str(goodp) + '\nHeadlines with neutral sentiment: ' + str(nep))