29
loading...
This website collects cookies to deliver better user experience
txtai
and all dependencies.pip install txtai datasets pandas
from datasets import load_dataset
from txtai.pipeline import HFTrainer
ds = load_dataset("squad_v2")
trainer = HFTrainer()
trainer("google/bert_uncased_L-2_H-128_A-2", ds["train"].select(range(3000)), task="question-answering", output_dir="bert-tiny-squadv2")
print("Training complete")
# Training data
data = [
{"question": "What ingredient?", "context": "Pour 1 can whole tomatoes", "answers": "tomatoes"},
{"question": "What ingredient?", "context": "Dice 1 yellow onion", "answers": "onion"},
{"question": "What ingredient?", "context": "Cut 1 red pepper", "answers": "pepper"},
{"question": "What ingredient?", "context": "Peel and dice 1 clove garlic", "answers": "garlic"},
{"question": "What ingredient?", "context": "Put 1/2 lb beef", "answers": "beef"},
]
model, tokenizer = trainer("bert-tiny-squadv2", data, task="question-answering", num_train_epochs=10)
from transformers import pipeline
questions = pipeline("question-answering", model="bert-tiny-squadv2")
questions("What ingredient?", "Peel and dice 1 shallot")
{'answer': 'dice 1 shallot',
'end': 23,
'score': 0.05128436163067818,
'start': 9}
from transformers import pipeline
questions = pipeline("question-answering", model=model.to("cpu"), tokenizer=tokenizer)
questions("What ingredient?", "Peel and dice 1 shallot")
{'answer': 'shallot', 'end': 23, 'score': 0.13187439739704132, 'start': 16}