det_mv3_east.yml 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. Global:
  2. use_gpu: true
  3. epoch_num: 10000
  4. log_smooth_window: 20
  5. print_batch_step: 2
  6. save_model_dir: ./output/east_mv3/
  7. save_epoch_step: 1000
  8. # evaluation is run every 5000 iterations after the 4000th iteration
  9. eval_batch_step: [4000, 5000]
  10. # if pretrained_model is saved in static mode, load_static_weights must set to True
  11. load_static_weights: True
  12. cal_metric_during_train: False
  13. pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
  14. checkpoints:
  15. save_inference_dir:
  16. use_visualdl: False
  17. infer_img:
  18. save_res_path: ./output/det_east/predicts_east.txt
  19. Architecture:
  20. model_type: det
  21. algorithm: EAST
  22. Transform:
  23. Backbone:
  24. name: MobileNetV3
  25. scale: 0.5
  26. model_name: large
  27. Neck:
  28. name: EASTFPN
  29. model_name: small
  30. Head:
  31. name: EASTHead
  32. model_name: small
  33. Loss:
  34. name: EASTLoss
  35. Optimizer:
  36. name: Adam
  37. beta1: 0.9
  38. beta2: 0.999
  39. lr:
  40. # name: Cosine
  41. learning_rate: 0.001
  42. # warmup_epoch: 0
  43. regularizer:
  44. name: 'L2'
  45. factor: 0
  46. PostProcess:
  47. name: EASTPostProcess
  48. score_thresh: 0.8
  49. cover_thresh: 0.1
  50. nms_thresh: 0.2
  51. Metric:
  52. name: DetMetric
  53. main_indicator: hmean
  54. Train:
  55. dataset:
  56. name: SimpleDataSet
  57. data_dir: ./train_data/icdar2015/text_localization/
  58. label_file_list:
  59. - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
  60. ratio_list: [1.0]
  61. transforms:
  62. - DecodeImage: # load image
  63. img_mode: BGR
  64. channel_first: False
  65. - DetLabelEncode: # Class handling label
  66. - EASTProcessTrain:
  67. image_shape: [512, 512]
  68. background_ratio: 0.125
  69. min_crop_side_ratio: 0.1
  70. min_text_size: 10
  71. - KeepKeys:
  72. keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
  73. loader:
  74. shuffle: True
  75. drop_last: False
  76. batch_size_per_card: 16
  77. num_workers: 8
  78. Eval:
  79. dataset:
  80. name: SimpleDataSet
  81. data_dir: ./train_data/icdar2015/text_localization/
  82. label_file_list:
  83. - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
  84. transforms:
  85. - DecodeImage: # load image
  86. img_mode: BGR
  87. channel_first: False
  88. - DetLabelEncode: # Class handling label
  89. - DetResizeForTest:
  90. limit_side_len: 2400
  91. limit_type: max
  92. - NormalizeImage:
  93. scale: 1./255.
  94. mean: [0.485, 0.456, 0.406]
  95. std: [0.229, 0.224, 0.225]
  96. order: 'hwc'
  97. - ToCHWImage:
  98. - KeepKeys:
  99. keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  100. loader:
  101. shuffle: False
  102. drop_last: False
  103. batch_size_per_card: 1 # must be 1
  104. num_workers: 2