26
loading...
This website collects cookies to deliver better user experience
txtai
and all dependencies.pip install txtai
from datasets import load_dataset
from txtai.pipeline import HFTrainer
trainer = HFTrainer()
# Hugging Face dataset
ds = load_dataset("glue", "sst2")
model, tokenizer = trainer("google/bert_uncased_L-2_H-128_A-2", ds["train"], columns=("sentence", "label"))
from txtai.pipeline import Labels
labels = Labels((model, tokenizer), dynamic=False)
# Determine accuracy on validation set
results = [row["label"] == labels(row["sentence"])[0][0] for row in ds["validation"]]
sum(results) / len(ds["validation"])
0.8268348623853211
data = [{"text": "This is a test sentence", "label": 0}, {"text": "This is not a test", "label": 1}]
model, tokenizer = trainer("google/bert_uncased_L-2_H-128_A-2", data)
import pandas as pd
df = pd.DataFrame(data)
model, tokenizer = trainer("google/bert_uncased_L-2_H-128_A-2", data)
ds = load_dataset("glue", "stsb")
model, tokenizer = trainer("google/bert_uncased_L-2_H-128_A-2", ds["train"], columns=("sentence1", "sentence2", "label"))
labels = Labels((model, tokenizer), dynamic=False)
labels([("Sailing to the arctic", "Dogs and cats don't get along"),
("Walking down the road", "Walking down the street")])
[[(0, 0.5648878216743469)], [(0, 0.97544926404953)]]