Skip to content

Commit

Permalink
adding new tiny-yolo
Browse files Browse the repository at this point in the history
  • Loading branch information
pjreddie committed Sep 8, 2016
1 parent b8eb8b0 commit 6b38dcd
Show file tree
Hide file tree
Showing 17 changed files with 131 additions and 63 deletions.
48 changes: 17 additions & 31 deletions cfg/yolo-tiny.cfg → cfg/tiny-yolo.cfg
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
[net]
batch=64
subdivisions=64
subdivisions=2
height=448
width=448
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.0001
saturation=.75
exposure=.75
hue = .1

learning_rate=0.0005
policy=steps
steps=20,40,60,80,20000,30000
scales=5,5,2,2,.1,.1
steps=200,400,600,800,20000,30000
scales=2.5,2,2,2,.1,.1
max_batches = 40000

[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5

[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
Expand All @@ -33,6 +30,7 @@ size=2
stride=2

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
Expand All @@ -44,6 +42,7 @@ size=2
stride=2

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
Expand All @@ -55,6 +54,7 @@ size=2
stride=2

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
Expand All @@ -66,6 +66,7 @@ size=2
stride=2

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
Expand All @@ -77,6 +78,7 @@ size=2
stride=2

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
Expand All @@ -88,37 +90,21 @@ size=2
stride=2

[convolutional]
filters=1024
batch_normalize=1
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=1024
size=3
stride=1
pad=1
activation=leaky

[convolutional]
filters=1024
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[connected]
output=256
activation=linear

[connected]
output=4096
activation=leaky

[dropout]
probability=.5

[connected]
output= 1470
activation=linear
Expand Down
2 changes: 1 addition & 1 deletion src/activation_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
__device__ float relu_activate_kernel(float x){return x*(x>0);}
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
__device__ float relie_activate_kernel(float x){return x*(x>0);}
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
Expand Down
2 changes: 1 addition & 1 deletion src/activations.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
static inline float relu_activate(float x){return x*(x>0);}
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
static inline float relie_activate(float x){return x*(x>0);}
static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
Expand Down
1 change: 1 addition & 0 deletions src/classifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
args.min = net.min_crop;
args.max = net.max_crop;
args.angle = net.angle;
args.aspect = net.aspect;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
Expand Down
19 changes: 18 additions & 1 deletion src/connected_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void denormalize_connected_layer(layer l)
{
int i, j;
for(i = 0; i < l.outputs; ++i){
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
for(j = 0; j < l.inputs; ++j){
l.weights[i*l.inputs + j] *= scale;
}
Expand All @@ -198,6 +198,23 @@ void denormalize_connected_layer(layer l)
}
}


void statistics_connected_layer(layer l)
{
if(l.batch_normalize){
printf("Scales ");
print_statistics(l.scales, l.outputs);
printf("Rolling Mean ");
print_statistics(l.rolling_mean, l.outputs);
printf("Rolling Variance ");
print_statistics(l.rolling_variance, l.outputs);
}
printf("Biases ");
print_statistics(l.biases, l.outputs);
printf("Weights ");
print_statistics(l.weights, l.outputs);
}

#ifdef GPU

void pull_connected_layer(connected_layer l)
Expand Down
1 change: 1 addition & 0 deletions src/connected_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ void forward_connected_layer(connected_layer layer, network_state state);
void backward_connected_layer(connected_layer layer, network_state state);
void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
void denormalize_connected_layer(layer l);
void statistics_connected_layer(layer l);

#ifdef GPU
void forward_connected_layer_gpu(connected_layer layer, network_state state);
Expand Down
35 changes: 35 additions & 0 deletions src/darknet.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,39 @@ void normalize_net(char *cfgfile, char *weightfile, char *outfile)
save_weights(net, outfile);
}

void statistics_net(char *cfgfile, char *weightfile)
{
gpu_index = -1;
network net = parse_network_cfg(cfgfile);
if (weightfile) {
load_weights(&net, weightfile);
}
int i;
for (i = 0; i < net.n; ++i) {
layer l = net.layers[i];
if (l.type == CONNECTED && l.batch_normalize) {
printf("Connected Layer %d\n", i);
statistics_connected_layer(l);
}
if (l.type == GRU && l.batch_normalize) {
printf("GRU Layer %d\n", i);
printf("Input Z\n");
statistics_connected_layer(*l.input_z_layer);
printf("Input R\n");
statistics_connected_layer(*l.input_r_layer);
printf("Input H\n");
statistics_connected_layer(*l.input_h_layer);
printf("State Z\n");
statistics_connected_layer(*l.state_z_layer);
printf("State R\n");
statistics_connected_layer(*l.state_r_layer);
printf("State H\n");
statistics_connected_layer(*l.state_h_layer);
}
printf("\n");
}
}

void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
{
gpu_index = -1;
Expand Down Expand Up @@ -374,6 +407,8 @@ int main(int argc, char **argv)
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
Expand Down
23 changes: 12 additions & 11 deletions src/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
return X;
}

matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
int i;
matrix X;
Expand All @@ -110,7 +110,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,

for(i = 0; i < n; ++i){
image im = load_image_color(paths[i], 0, 0);
image crop = random_augment_image(im, angle, min, max, size);
image crop = random_augment_image(im, angle, aspect, min, max, size);
int flip = rand_r(&data_seed)%2;
if (flip) flip_image(crop);
random_distort_image(crop, hue, saturation, exposure);
Expand Down Expand Up @@ -676,15 +676,16 @@ void *load_thread(void *ptr)
load_args a = *(struct load_args*)ptr;
if(a.exposure == 0) a.exposure = 1;
if(a.saturation == 0) a.saturation = 1;
if(a.aspect == 0) a.aspect = 1;

if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == STUDY_DATA){
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
Expand All @@ -699,7 +700,7 @@ void *load_thread(void *ptr)
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
}
free(ptr);
Expand Down Expand Up @@ -741,13 +742,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
return d;
}

data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
data d = {0};
d.indexes = calloc(n, sizeof(int));
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
Expand Down Expand Up @@ -783,25 +784,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d;
}

data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
}

data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.w = size;
d.h = size;
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;
Expand Down
9 changes: 5 additions & 4 deletions src/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ typedef struct load_args{
int scale;
float jitter;
float angle;
float aspect;
float saturation;
float exposure;
float hue;
Expand All @@ -76,11 +77,11 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_go(char *filename);

box_label *read_boxes(char *filename, int *n);
Expand Down
7 changes: 7 additions & 0 deletions src/detector.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ static void convert_detections(float *predictions, int classes, int num, int squ
int box_index = index * (classes + 5);
boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
if(1){
boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
}
boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
for(j = 0; j < classes; ++j){
Expand Down Expand Up @@ -237,6 +241,9 @@ void validate_detector(char *cfgfile, char *weightfile)
free_image(val_resized[t]);
}
}
for(j = 0; j < classes; ++j){
fclose(fps[j]);
}
fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
}

Expand Down
Loading

0 comments on commit 6b38dcd

Please sign in to comment.