Skip to content

Commit

Permalink
Balancing between obj and noobj part of confidence loss. Computation …
Browse files Browse the repository at this point in the history
…of mAP by class-wise average of APs.
  • Loading branch information
eriklindernoren committed Oct 1, 2018
1 parent 959e0ff commit 38e2ecc
Show file tree
Hide file tree
Showing 4 changed files with 370 additions and 238 deletions.
233 changes: 135 additions & 98 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,109 +15,120 @@
import matplotlib.pyplot as plt
import matplotlib.patches as patches


def create_modules(module_defs):
"""
Constructs module list of layer blocks from module configuration in module_defs
"""
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams['channels'])]
output_filters = [int(hyperparams["channels"])]
module_list = nn.ModuleList()
for i, module_def in enumerate(module_defs):
modules = nn.Sequential()

if module_def['type'] == 'convolutional':
bn = int(module_def['batch_normalize'])
filters = int(module_def['filters'])
kernel_size = int(module_def['size'])
pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def['stride']),
padding=pad,
bias=not bn))
if module_def["type"] == "convolutional":
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
modules.add_module(
"conv_%d" % i,
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
if module_def['activation'] == 'leaky':
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
if module_def["activation"] == "leaky":
modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

elif module_def['type'] == 'upsample':
upsample = nn.Upsample( scale_factor=int(module_def['stride']),
mode='nearest')
modules.add_module('upsample_%d' % i, upsample)
elif module_def["type"] == "upsample":
upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
modules.add_module("upsample_%d" % i, upsample)

elif module_def['type'] == 'route':
layers = [int(x) for x in module_def["layers"].split(',')]
elif module_def["type"] == "route":
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[layer_i] for layer_i in layers])
modules.add_module('route_%d' % i, EmptyLayer())
modules.add_module("route_%d" % i, EmptyLayer())

elif module_def['type'] == 'shortcut':
filters = output_filters[int(module_def['from'])]
elif module_def["type"] == "shortcut":
filters = output_filters[int(module_def["from"])]
modules.add_module("shortcut_%d" % i, EmptyLayer())

elif module_def["type"] == "yolo":
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def['classes'])
img_height = int(hyperparams['height'])
num_classes = int(module_def["classes"])
img_height = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
modules.add_module('yolo_%d' % i, yolo_layer)
modules.add_module("yolo_%d" % i, yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)

return hyperparams, module_list


class EmptyLayer(nn.Module):
"""Placeholder for 'route' and 'shortcut' layers"""

def __init__(self):
super(EmptyLayer, self).__init__()


class YOLOLayer(nn.Module):
"""Detection layer"""

def __init__(self, anchors, num_classes, img_dim):
super(YOLOLayer, self).__init__()
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
self.bbox_attrs = 5 + num_classes
self.img_dim = img_dim
self.image_dim = img_dim
self.ignore_thres = 0.5
self.lambda_coord = 1

self.mse_loss = nn.MSELoss()
self.bce_loss = nn.BCELoss()
self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss
self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss
self.ce_loss = nn.CrossEntropyLoss() # Class loss

def forward(self, x, targets=None):
bs = x.size(0)
g_dim = x.size(2)
stride = self.img_dim / g_dim
nA = self.num_anchors
nB = x.size(0)
nG = x.size(2)
stride = self.image_dim / nG

# Tensors for cuda support
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor

prediction = x.view(bs, self.num_anchors, self.bbox_attrs, g_dim, g_dim).permute(0, 1, 3, 4, 2).contiguous()
prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()

# Get outputs
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
conf = torch.sigmoid(prediction[..., 4]) # Conf
x = torch.sigmoid(prediction[..., 0]) # Center x
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.

# Calculate offsets for each grid
grid_x = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).repeat(bs*self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
grid_y = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).t().repeat(bs*self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
scaled_anchors = [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(w.shape)
anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(h.shape)
grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))

# Add offset and scale with anchors
pred_boxes = FloatTensor(prediction[..., :4].shape)
Expand All @@ -133,74 +144,100 @@ def forward(self, x, targets=None):
self.mse_loss = self.mse_loss.cuda()
self.bce_loss = self.bce_loss.cuda()

nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes.cpu().data,
targets.cpu().data,
scaled_anchors,
self.num_anchors,
self.num_classes,
g_dim,
self.ignore_thres,
self.img_dim)

nProposals = int((conf > 0.25).sum().item())
nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
pred_boxes=pred_boxes.cpu().data,
pred_conf=pred_conf.cpu().data,
pred_cls=pred_cls.cpu().data,
target=targets.cpu().data,
anchors=scaled_anchors.cpu().data,
num_anchors=nA,
num_classes=self.num_classes,
grid_size=nG,
ignore_thres=self.ignore_thres,
img_dim=self.image_dim,
)

nProposals = int((pred_conf > 0.25).sum().item())
recall = float(nCorrect / nGT) if nGT else 1

# Handle masks
mask = Variable(mask.type(FloatTensor))
cls_mask = Variable(mask.unsqueeze(-1).repeat(1, 1, 1, 1, self.num_classes).type(FloatTensor))
conf_mask = Variable(conf_mask.type(FloatTensor))
mask = Variable(mask.type(ByteTensor))
conf_mask = Variable(conf_mask.type(ByteTensor))

# Handle target variables
tx = Variable(tx.type(FloatTensor), requires_grad=False)
ty = Variable(ty.type(FloatTensor), requires_grad=False)
tw = Variable(tw.type(FloatTensor), requires_grad=False)
th = Variable(th.type(FloatTensor), requires_grad=False)
tx = Variable(tx.type(FloatTensor), requires_grad=False)
ty = Variable(ty.type(FloatTensor), requires_grad=False)
tw = Variable(tw.type(FloatTensor), requires_grad=False)
th = Variable(th.type(FloatTensor), requires_grad=False)
tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
tcls = Variable(tcls.type(FloatTensor), requires_grad=False)
tcls = Variable(tcls.type(LongTensor), requires_grad=False)

# Get conf mask where gt and where there is no gt
conf_mask_true = mask
conf_mask_false = conf_mask - mask

# Mask outputs to ignore non-existing objects
loss_x = self.lambda_coord * self.bce_loss(x * mask, tx * mask)
loss_y = self.lambda_coord * self.bce_loss(y * mask, ty * mask)
loss_w = self.lambda_coord * self.mse_loss(w * mask, tw * mask) / 2
loss_h = self.lambda_coord * self.mse_loss(h * mask, th * mask) / 2
loss_conf = self.bce_loss(conf * conf_mask, tconf * conf_mask)
loss_cls = self.bce_loss(pred_cls * cls_mask, tcls * cls_mask)
loss_x = self.mse_loss(x[mask], tx[mask])
loss_y = self.mse_loss(y[mask], ty[mask])
loss_w = self.mse_loss(w[mask], tw[mask])
loss_h = self.mse_loss(h[mask], th[mask])
loss_conf = (1 / conf_mask_false.sum().float()) * self.bce_loss(
pred_conf[conf_mask_false], tconf[conf_mask_false]
) + (1 / conf_mask_true.sum().float()) * self.bce_loss(pred_conf[conf_mask_true], tconf[conf_mask_true])
loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall
return (
loss,
loss_x.item(),
loss_y.item(),
loss_w.item(),
loss_h.item(),
loss_conf.item(),
loss_cls.item(),
recall,
)

else:
# If not in training phase return predictions
output = torch.cat((pred_boxes.view(bs, -1, 4) * stride, conf.view(bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1)
return output.data
output = torch.cat(
(
pred_boxes.view(nB, -1, 4) * stride,
pred_conf.view(nB, -1, 1),
pred_cls.view(nB, -1, self.num_classes),
),
-1,
)
return output


class Darknet(nn.Module):
"""YOLOv3 object detection model"""

def __init__(self, config_path, img_size=416):
super(Darknet, self).__init__()
self.module_defs = parse_model_config(config_path)
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.seen = 0
self.header_info = np.array([0, 0, 0, self.seen, 0])
self.loss_names = ['x', 'y', 'w', 'h', 'conf', 'cls', 'recall']
self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall"]

def forward(self, x, targets=None):
is_training = targets is not None
output = []
self.losses = defaultdict(float)
layer_outputs = []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def['type'] in ['convolutional', 'upsample']:
if module_def["type"] in ["convolutional", "upsample"]:
x = module(x)
elif module_def['type'] == 'route':
layer_i = [int(x) for x in module_def['layers'].split(',')]
elif module_def["type"] == "route":
layer_i = [int(x) for x in module_def["layers"].split(",")]
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
elif module_def['type'] == 'shortcut':
layer_i = int(module_def['from'])
elif module_def["type"] == "shortcut":
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif module_def['type'] == 'yolo':
elif module_def["type"] == "yolo":
# Train phase: get loss
if is_training:
x, *losses = module[0](x, targets)
Expand All @@ -212,76 +249,76 @@ def forward(self, x, targets=None):
output.append(x)
layer_outputs.append(x)

self.losses['recall'] /= 3
self.losses["recall"] /= 3
return sum(output) if is_training else torch.cat(output, 1)


def load_weights(self, weights_path):
"""Parses and loads the weights stored in 'weights_path'"""

#Open the weights file
# Open the weights file
fp = open(weights_path, "rb")
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values

# Needed to write header when saving weights
self.header_info = header

self.seen = header[3]
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
fp.close()

ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def['type'] == 'convolutional':
if module_def["type"] == "convolutional":
conv_layer = module[0]
if module_def['batch_normalize']:
if module_def["batch_normalize"]:
# Load BN bias, weights, running mean and running variance
bn_layer = module[1]
num_b = bn_layer.bias.numel() # Number of biases
num_b = bn_layer.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
bn_layer.bias.data.copy_(bn_b)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
bn_layer.weight.data.copy_(bn_w)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
bn_layer.running_mean.data.copy_(bn_rm)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
bn_layer.running_var.data.copy_(bn_rv)
ptr += num_b
else:
# Load conv. bias
num_b = conv_layer.bias.numel()
conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
conv_layer.bias.data.copy_(conv_b)
ptr += num_b
# Load conv. weights
num_w = conv_layer.weight.numel()
conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
conv_layer.weight.data.copy_(conv_w)
ptr += num_w

"""
@:param path - path of the new weights file
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
"""

def save_weights(self, path, cutoff=-1):

fp = open(path, 'wb')
fp = open(path, "wb")
self.header_info[3] = self.seen
self.header_info.tofile(fp)

# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
if module_def['type'] == 'convolutional':
if module_def["type"] == "convolutional":
conv_layer = module[0]
# If batch norm, load bn first
if module_def['batch_normalize']:
if module_def["batch_normalize"]:
bn_layer = module[1]
bn_layer.bias.data.cpu().numpy().tofile(fp)
bn_layer.weight.data.cpu().numpy().tofile(fp)
Expand Down
Loading

0 comments on commit 38e2ecc

Please sign in to comment.