= "../data/splits"
cleaned_data = Live(dir="../eval", dvcyaml=False, report=None) live
Evaluating Model
Define kfold for evaluation
= 1 KFOLD
Load Evaluation Dataset
= pd.read_csv(f'{cleaned_data}/val/FAA-{KFOLD}.csv',header=0) val_dataset
Import fine-tuned model
= '../model/'
model_path = AutoModelForSequenceClassification.from_pretrained(model_path)
model = AutoTokenizer.from_pretrained(model_path) tokenizer
= val_dataset.text text
= []
predictions = []
actual_predictions
for row in text:
= tokenizer(row, return_tensors="pt")
inputs
with torch.no_grad():
= model(**inputs).logits
logits
predictions.append(logits) actual_predictions.append(logits.argmax().item())
Visualizations
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
Prediction Heat Maps
Count correct predictions and add to heat map
= 0
correct = np.zeros((7,7), dtype=float)
heat_map
for index, label in enumerate(val_dataset.label):
if label == actual_predictions[index]:
+= 1
correct
6 - actual_predictions[index]][label] = heat_map[ 6 - actual_predictions[index]][label] + 1
heat_map[
print("Correct based on my actual predictions: ", correct/len(actual_predictions))
Save Metrics
= {'accuracy' : correct/len(actual_predictions)}
metrics = metrics
live.summary live.make_summary()
Normalize heat map
for i, category in enumerate(heat_map):
= 0
total
for val in category:
= total + val
total
for j, val in enumerate(category):
= val / total heat_map[i][j]
Plot heat map
= plt.subplots(figsize=(11,9))
fig, ax True)
fig.set_tight_layout(# color map
= ['II','ME','AU','AF','DE','EQ','AI']
labels = ['AI','EQ','DE','AF','AU','ME','II']
y_labels ="Blues",xticklabels=labels, yticklabels=y_labels, annot=True) sb.heatmap(heat_map,cmap
= val_dataset.label.tolist()
actual = actual_predictions
predicted "confusion_matrix", actual, predicted) live.log_sklearn_plot(