Instructions to use Short-Answer-Feedback/bart-finetuned-saf-communication-networks with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Short-Answer-Feedback/bart-finetuned-saf-communication-networks with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("Short-Answer-Feedback/bart-finetuned-saf-communication-networks") model = AutoModelForSeq2SeqLM.from_pretrained("Short-Answer-Feedback/bart-finetuned-saf-communication-networks") - Notebooks
- Google Colab
- Kaggle
| import numpy as np | |
| import torch | |
| from evaluate import load as load_metric | |
| from sklearn.metrics import accuracy_score, f1_score | |
| from tqdm.auto import tqdm | |
| MAX_TARGET_LENGTH = 128 | |
| # load evaluation metrics | |
| sacrebleu = load_metric('sacrebleu') | |
| rouge = load_metric('rouge') | |
| meteor = load_metric('meteor') | |
| bertscore = load_metric('bertscore') | |
| # use gpu if it's available | |
| device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | |
| def flatten_list(l): | |
| """ | |
| Utility function to convert a list of lists into a flattened list | |
| Params: | |
| l (list of lists): list to be flattened | |
| Returns: | |
| A flattened list with the elements of the original list | |
| """ | |
| return [item for sublist in l for item in sublist] | |
| def extract_feedback(predictions): | |
| """ | |
| Utility function to extract the feedback from the predictions of the model | |
| Params: | |
| predictions (list): complete model predictions | |
| Returns: | |
| feedback (list): extracted feedback from the model's predictions | |
| """ | |
| feedback = [] | |
| # iterate through predictions and try to extract predicted feedback | |
| for pred in predictions: | |
| try: | |
| fb = pred.split(':', 1)[1] | |
| except IndexError: | |
| try: | |
| if pred.lower().startswith('partially correct'): | |
| fb = pred.split(' ', 1)[2] | |
| else: | |
| fb = pred.split(' ', 1)[1] | |
| except IndexError: | |
| fb = pred | |
| feedback.append(fb.strip()) | |
| return feedback | |
| def extract_labels(predictions): | |
| """ | |
| Utility function to extract the labels from the predictions of the model | |
| Params: | |
| predictions (list): complete model predictions | |
| Returns: | |
| feedback (list): extracted labels from the model's predictions | |
| """ | |
| labels = [] | |
| for pred in predictions: | |
| if pred.lower().startswith('correct'): | |
| label = 'Correct' | |
| elif pred.lower().startswith('partially correct'): | |
| label = 'Partially correct' | |
| elif pred.lower().startswith('incorrect'): | |
| label = 'Incorrect' | |
| else: | |
| label = 'Unknown label' | |
| labels.append(label) | |
| return labels | |
| def compute_metrics(predictions, labels): | |
| """ | |
| Compute evaluation metrics from the predictions of the model | |
| Params: | |
| predictions (list): complete model predictions | |
| labels (list): golden labels (previously tokenized) | |
| Returns: | |
| results (dict): dictionary with the computed evaluation metrics | |
| predictions (list): list of the decoded predictions of the model | |
| """ | |
| # extract feedback and labels from the model's predictions | |
| predicted_feedback = extract_feedback(predictions) | |
| predicted_labels = extract_labels(predictions) | |
| # extract feedback and labels from the golden labels | |
| reference_feedback = [x.split('Feedback:', 1)[1].strip() for x in labels] | |
| reference_labels = [x.split('Feedback:', 1)[0].strip() for x in labels] | |
| # compute HF metrics | |
| sacrebleu_score = sacrebleu.compute(predictions=predicted_feedback, references=[[x] for x in reference_feedback])['score'] | |
| rouge_score = rouge.compute(predictions=predicted_feedback, references=reference_feedback)['rouge2'] | |
| meteor_score = meteor.compute(predictions=predicted_feedback, references=reference_feedback)['meteor'] | |
| bert_score = bertscore.compute( | |
| predictions=predicted_feedback, | |
| references=reference_feedback, | |
| lang='en', | |
| rescale_with_baseline=True) | |
| # use sklearn to compute accuracy and f1 score | |
| reference_labels_np = np.array(reference_labels) | |
| accuracy = accuracy_score(reference_labels_np, predicted_labels) | |
| f1_weighted = f1_score(reference_labels_np, predicted_labels, average='weighted') | |
| f1_macro = f1_score( | |
| reference_labels_np, | |
| predicted_labels, | |
| average='macro', | |
| labels=['Incorrect', 'Partially correct', 'Correct']) | |
| results = { | |
| 'sacrebleu': sacrebleu_score, | |
| 'rouge': rouge_score, | |
| 'meteor': meteor_score, | |
| 'bert_score': np.array(bert_score['f1']).mean().item(), | |
| 'accuracy': accuracy, | |
| 'f1_weighted': f1_weighted, | |
| 'f1_macro': f1_macro | |
| } | |
| return results | |
| def evaluate(model, tokenizer, dataloader): | |
| """ | |
| Evaluate model on the given dataset | |
| Params: | |
| model (PreTrainedModel): seq2seq model | |
| tokenizer (PreTrainedTokenizer): tokenizer from HuggingFace | |
| dataloader (torch Dataloader): dataloader of the dataset to be used for evaluation | |
| Returns: | |
| results (dict): dictionary with the computed evaluation metrics | |
| predictions (list): list of the decoded predictions of the model | |
| """ | |
| decoded_preds, decoded_labels = [], [] | |
| model.eval() | |
| # iterate through batchs in the dataloader | |
| for batch in tqdm(dataloader): | |
| with torch.no_grad(): | |
| batch = {k: v.to(device) for k, v in batch.items()} | |
| # generate tokens from batch | |
| generated_tokens = model.generate( | |
| batch['input_ids'], | |
| attention_mask=batch['attention_mask'], | |
| max_length=MAX_TARGET_LENGTH | |
| ) | |
| # get golden labels from batch | |
| labels_batch = batch['labels'] | |
| # decode model predictions and golden labels | |
| decoded_preds_batch = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
| decoded_labels_batch = tokenizer.batch_decode(labels_batch, skip_special_tokens=True) | |
| decoded_preds.append(decoded_preds_batch) | |
| decoded_labels.append(decoded_labels_batch) | |
| # convert predictions and golden labels into flattened lists | |
| predictions = flatten_list(decoded_preds) | |
| labels = flatten_list(decoded_labels) | |
| # compute metrics based on predictions and golden labels | |
| results = compute_metrics(predictions, labels) | |
| return results, predictions |