This repository was archived by the owner on Feb 13, 2025. It is now read-only.
forked from katerynaCh/multimodal-emotion-recognition
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidation.py
120 lines (105 loc) · 4.26 KB
/
validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
This code is based on https://github.com/okankop/Efficient-3DCNNs
"""
import torch
from torch.autograd import Variable
import time
from utils import AverageMeter, calculate_accuracy
def val_epoch_multimodal(
epoch, data_loader, model, criterion, opt, logger, modality="both", dist=None
):
# for evaluation with single modality, specify which modality to keep and which distortion to apply for the other modaltiy:
#'noise', 'addnoise' or 'zeros'. for paper procedure, with 'softhard' mask use 'zeros' for evaluation, with 'noise' use 'noise'
print("validation at epoch {}".format(epoch))
assert modality in ["both", "audio", "video"]
model.eval()
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()
end_time = time.time()
for i, (inputs_audio, inputs_visual, targets) in enumerate(data_loader):
data_time.update(time.time() - end_time)
if modality == "audio":
print("Skipping video modality")
if dist == "noise":
print("Evaluating with full noise")
inputs_visual = torch.randn(inputs_visual.size())
elif dist == "addnoise": # opt.mask == -4:
print("Evaluating with noise")
inputs_visual = inputs_visual + (
torch.mean(inputs_visual)
+ torch.std(inputs_visual) * torch.randn(inputs_visual.size())
)
elif dist == "zeros":
inputs_visual = torch.zeros(inputs_visual.size())
else:
print("UNKNOWN DIST!")
elif modality == "video":
print("Skipping audio modality")
if dist == "noise":
print("Evaluating with noise")
inputs_audio = torch.randn(inputs_audio.size())
elif dist == "addnoise": # opt.mask == -4:
print("Evaluating with added noise")
inputs_audio = inputs_audio + (
torch.mean(inputs_audio)
+ torch.std(inputs_audio) * torch.randn(inputs_audio.size())
)
elif dist == "zeros":
inputs_audio = torch.zeros(inputs_audio.size())
inputs_visual = inputs_visual.permute(0, 2, 1, 3, 4)
inputs_visual = inputs_visual.reshape(
inputs_visual.shape[0] * inputs_visual.shape[1],
inputs_visual.shape[2],
inputs_visual.shape[3],
inputs_visual.shape[4],
)
targets = targets.to(opt.device)
with torch.no_grad():
inputs_visual = Variable(inputs_visual)
inputs_audio = Variable(inputs_audio)
targets = Variable(targets)
outputs = model(inputs_audio, inputs_visual)
loss = criterion(outputs, targets)
prec1, prec5 = calculate_accuracy(outputs.data, targets.data, topk=(1, 5))
top1.update(prec1, inputs_audio.size(0))
top5.update(prec5, inputs_audio.size(0))
losses.update(loss.data, inputs_audio.size(0))
batch_time.update(time.time() - end_time)
end_time = time.time()
print(
"Epoch: [{0}][{1}/{2}]\t"
"Time {batch_time.val:.5f} ({batch_time.avg:.5f})\t"
"Data {data_time.val:.5f} ({data_time.avg:.5f})\t"
"Loss {loss.val:.4f} ({loss.avg:.4f})\t"
"Prec@1 {top1.val:.5f} ({top1.avg:.5f})\t"
"Prec@5 {top5.val:.5f} ({top5.avg:.5f})".format(
epoch,
i + 1,
len(data_loader),
batch_time=batch_time,
data_time=data_time,
loss=losses,
top1=top1,
top5=top5,
)
)
logger.log(
{
"epoch": epoch,
"loss": losses.avg.item(),
"prec1": top1.avg.item(),
"prec5": top5.avg.item(),
}
)
return losses.avg.item(), top1.avg.item()
def val_epoch(
epoch, data_loader, model, criterion, opt, logger, modality="both", dist=None
):
print("validation at epoch {}".format(epoch))
if opt.model == "multimodalcnn":
return val_epoch_multimodal(
epoch, data_loader, model, criterion, opt, logger, modality, dist=dist
)