38
loading...
This website collects cookies to deliver better user experience
sklearn.datasets
comes with a few small standard datasets that do not require downloading any file from some external website. # Load scikit-learn's datasets
from sklearn import datasets
# Load digits dataset
digits = datasets.load_digits()
from sklearn.datasets import load_iris
data = load_iris()
data.target[[10, 25, 50]]
# Import the seaborn module
import seaborn as sns
sns.get_dataset_names()
['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes',
'diamonds', 'dots', 'exercise',
'flights', 'fmri', 'gammas', 'geyser', 'iris',
'mpg', 'penguins', 'planets', 'tips', 'titanic']
load_dataset()
function you can load the required dataset.import pandas as pd
import seaborn as sns
df = sns.load_dataset('tips')
print df.head()
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
sep = ' '
parameter allows us to define the delimiter used in the file.# Load library
import pandas as pd
# Create URL
url = 'https://tinyurl.com/titanic-csv'
# Load dataset
dataframe = pd.read_csv(url)
# View first two rows
dataframe.head(2)
PassengerId Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 892 3 Kelly, Mr. James male 34.5 0 0 330911 7.8292 NaN Q
1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 0 363272 7.0000 NaN S
2 894 2 Myles, Mr. Thomas Francis male 62.0 0 0 240276 9.6875 NaN Q
3 895 3 Wirz, Mr. Albert male 27.0 0 0 315154 8.6625 NaN S
4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female 22.0 1 1 3101298 12.2875 NaN S
'c:\users\rainer\desktop'
location, then the url to be used will be:# Create url
url = 'c:\\users\\rainer\\desktop\\sample_1.csv'
# Load dataset
dataframe = pd.read_csv(url)
# Create url
url = 'c:\\users\\rainer\\desktop\\sample_1.csv'
# Load dataset
dataframe = pd.read_table(url, sep = ',')
# Load dataset
dataframe = pd.read_csv('sample_1.csv')
print(dataframe)
item white blue red yellow
0 ruler 1 3 5 2
1 cup 2 5 9 2
2 pen 6 1 4 0
3 book 4 1 2 1
pd.read_excel('data.xls')
which returns the DataFrame composed of the data tabulated in the spreadsheets. However, if you need to load the data in a spreadsheet, then specify the name of the specific sheet or the number of the sheet (index) just as the second argument as shownpd.read_excel('data.xls','Sheet2')
or pd.read_excel('data.xls',1)
import pandas as pd
# Load file into Data variable
Data = pd.read_excel("Book1.xlsx","Sheet1")
# Print
Print(Data.head())
NAME AGE BEST COLOR BOOK NO. PAGE
0 Edwin 5yrs Green 7 2404
1 Muuo Ian 6yrs Purple 8 2405
2 Joel 5yrs Orange 9 2406
3 Mush 5yrs Red 10 2407
4 Nyandeng 10yrs Green 11 2408
import pandas as pd
# Load file into Data variable
Data = pd.read_excel("Book1.xlsx","Sheet2")
# Print
Print(Data.head())
NAME AGE BEST COLOR BOOK NO. PAGE
0 Fred 10yrs Yellow 25 2410
1 Bianca 13yrs Blue 26 2412
2 Martin 6yrs Green 27 2413
3 Genevieve 3yrs Violet 28 2414
4 Karen 12yrs Yellow 29 2415