24
loading...
This website collects cookies to deliver better user experience
Term | Frequency |
---|---|
going | 3 |
to | 3 |
i | 2 |
am | 2 |
today | 2 |
it | 1 |
is | 1 |
rain | 1 |
drink | 1 |
coffee | 1 |
capital | 1 |
Term/document No | going | it | to | i | am | is | rain | today | drink | coffee | capital |
---|---|---|---|---|---|---|---|---|---|---|---|
1. | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
2. | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 |
3. | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
vectorizer = CountVectorizer()
doc = ["It is going to rain today",
"I am going to drink coffee",
"I am going to capital today"]
X = vectorizer.fit_transform(doc)
column = vectorizer.get_feature_names()
df = pd.DataFrame(X.toarray(), columns=column)
df
Term | TF value(doc1) | TF value(doc2) | Tf value(doc3) |
---|---|---|---|
going | 0.1666 | 0.1666 | 0.1666 |
to | 0.1666 | 0.1666 | 0.1666 |
i | 0 | 0.1666 | 0.1666 |
am | 0 | 0.1666 | 0.1666 |
it | 0.1666 | 0 | 0 |
is | 0.1666 | 0 | 0 |
rain | 0.1666 | 0 | 0 |
today | 0.1666 | 0 | 0.1666 |
drink | 0 | 0.1666 | 0 |
coffee | 0 | 0.1666 | 0 |
capital | 0 | 0 | 0.1666 |
Term | IDF value |
---|---|
going | 0 |
to | 0 |
i | 0.17609 |
am | 0.17609 |
today | 0.17609 |
it | 0.47712 |
is | 0.47712 |
rain | 0.47712 |
drink | 0.47712 |
coffee | 0.47712 |
capital | 0.47712 |
Term/document No | going | it | to | i | am | is | rain | today | drink | coffee | capital |
---|---|---|---|---|---|---|---|---|---|---|---|
1. | 0 | 0.07948 | 0 | 0 | 0 | 0.07948 | 0.07948 | 0.02933 | 0 | 0 | 0 |
2. | 0 | 0 | 0 | 0.02933 | 0.02933 | 0 | 0 | 0 | 0.07948 | 0.07948 | 0 |
3. | 0 | 0 | 0 | 0.02933 | 0.02933 | 0 | 0 | 0.02933 | 0 | 0 | 0.07948 |
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(doc)
column = vectorizer.get_feature_names()
df = pd.DataFrame(X.toarray(), columns=column)