Skip to content

Commit

Permalink
Add drrg (open-mmlab#189)
Browse files Browse the repository at this point in the history
* merge drrg

* directory structure&fix redundant import

* docstrings

* fix isort

* drrg readme

* merge drrg

* directory structure&fix redundant import

* docstrings

* fix isort

* drrg readme

* add unittests&fix docstrings

* revert test_loss

* add unittest

* add unittests

* fix docstrings

* fix docstrings

* fix yapf

* fix yapf

* Update test_textdet_head.py

* Update test_textdet_head.py

* add unittests

* add unittests

* add unittests

* fix docstrings

* fix docstrings

* fix docstring

* fix unittests

* fix pytest

* fix pytest

* fix pytest

* fix variable names

Co-authored-by: Hongbin Sun <[email protected]>
  • Loading branch information
HolyCrap96 and cuhk-hbsun committed May 18, 2021
1 parent ed6b3b8 commit 2414c65
Show file tree
Hide file tree
Showing 28 changed files with 2,925 additions and 21 deletions.
23 changes: 23 additions & 0 deletions configs/textdet/drrg/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# DRRG

## Introduction

[ALGORITHM]

```bibtex
@article{zhang2020drrg,
title={Deep relational reasoning graph network for arbitrary shape text detection},
author={Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng},
booktitle={CVPR},
pages={9699-9708},
year={2020}
}
```

## Results and models

### CTW1500

| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download |
| :--------------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [DRRG](/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 1200 | 640 | 0.822 | 0.858 | 0.840 | [model](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500-1abf4f67.pth) \ [log](https://download.openmmlab.com/mmocr/textdet/drrg/20210511_234719.log) |
110 changes: 110 additions & 0 deletions configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
_base_ = [
'../../_base_/schedules/schedule_1200e.py',
'../../_base_/default_runtime.py'
]
model = dict(
type='DRRG',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='DRRGHead',
in_channels=32,
text_region_thr=0.3,
center_region_thr=0.4,
link_thr=0.80,
loss=dict(type='DRRGLoss')))
train_cfg = None
test_cfg = None

dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'

img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=60,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='DRRGTargets'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=[
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
],
visualize=dict(flag=False, boundary_key='gt_text_mask')),
dict(
type='Collect',
keys=[
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 640),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline))

evaluation = dict(interval=20, metric='hmean-iou')
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNET', in_channels=[256, 512, 1024, 2048], out_channels=32),
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='TextSnakeHead',
in_channels=32,
Expand Down Expand Up @@ -96,18 +96,18 @@
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=test_pipeline))

evaluation = dict(interval=10, metric='hmean-iou')
3 changes: 2 additions & 1 deletion mmocr/datasets/pipelines/textdet_targets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from .base_textdet_targets import BaseTextDetTargets
from .dbnet_targets import DBNetTargets
from .drrg_targets import DRRGTargets
from .fcenet_targets import FCENetTargets
from .panet_targets import PANetTargets
from .psenet_targets import PSENetTargets
from .textsnake_targets import TextSnakeTargets

__all__ = [
'BaseTextDetTargets', 'PANetTargets', 'PSENetTargets', 'DBNetTargets',
'FCENetTargets', 'TextSnakeTargets'
'FCENetTargets', 'TextSnakeTargets', 'DRRGTargets'
]
Loading

0 comments on commit 2414c65

Please sign in to comment.