Evaluating Model

cleaned_data = "../data/splits"
live = Live(dir="../eval", dvcyaml=False, report=None)

Define kfold for evaluation

KFOLD = 1

Load Evaluation Dataset

val_dataset = pd.read_csv(f'{cleaned_data}/val/FAA-{KFOLD}.csv',header=0)

Import fine-tuned model

model_path = '../model/'
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

text = val_dataset.text

predictions = []
actual_predictions = []

for row in text:
    inputs = tokenizer(row, return_tensors="pt")
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    predictions.append(logits)
    actual_predictions.append(logits.argmax().item())

Visualizations

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

Prediction Heat Maps

Count correct predictions and add to heat map

correct = 0
heat_map = np.zeros((7,7), dtype=float)

for index, label in enumerate(val_dataset.label):
    if label == actual_predictions[index]:
        correct += 1
    
    heat_map[6 - actual_predictions[index]][label] = heat_map[ 6 - actual_predictions[index]][label] + 1

print("Correct based on my actual predictions: ", correct/len(actual_predictions))

Save Metrics

metrics = {'accuracy' : correct/len(actual_predictions)}
live.summary = metrics
live.make_summary()

Normalize heat map

for i, category in enumerate(heat_map):
    total = 0
    
    for val in category:
        total = total + val
        
    for j, val in enumerate(category):
        heat_map[i][j] = val / total

Plot heat map

fig, ax = plt.subplots(figsize=(11,9))
fig.set_tight_layout(True)
# color map
labels = ['II','ME','AU','AF','DE','EQ','AI']
y_labels = ['AI','EQ','DE','AF','AU','ME','II']
sb.heatmap(heat_map,cmap="Blues",xticklabels=labels, yticklabels=y_labels, annot=True)

actual = val_dataset.label.tolist()
predicted = actual_predictions
live.log_sklearn_plot("confusion_matrix", actual, predicted)