det_r50_vd_sast_totaltext.yml 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. Global:
  2. use_gpu: true
  3. epoch_num: 5000
  4. log_smooth_window: 20
  5. print_batch_step: 2
  6. save_model_dir: ./output/sast_r50_vd_tt/
  7. save_epoch_step: 1000
  8. # evaluation is run every 5000 iterations after the 4000th iteration
  9. eval_batch_step: [4000, 5000]
  10. # if pretrained_model is saved in static mode, load_static_weights must set to True
  11. load_static_weights: True
  12. cal_metric_during_train: False
  13. pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
  14. checkpoints:
  15. save_inference_dir:
  16. use_visualdl: False
  17. infer_img:
  18. save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
  19. Architecture:
  20. model_type: det
  21. algorithm: SAST
  22. Transform:
  23. Backbone:
  24. name: ResNet_SAST
  25. layers: 50
  26. Neck:
  27. name: SASTFPN
  28. with_cab: True
  29. Head:
  30. name: SASTHead
  31. Loss:
  32. name: SASTLoss
  33. Optimizer:
  34. name: Adam
  35. beta1: 0.9
  36. beta2: 0.999
  37. lr:
  38. # name: Cosine
  39. learning_rate: 0.001
  40. # warmup_epoch: 0
  41. regularizer:
  42. name: 'L2'
  43. factor: 0
  44. PostProcess:
  45. name: SASTPostProcess
  46. score_thresh: 0.5
  47. sample_pts_num: 6
  48. nms_thresh: 0.2
  49. expand_scale: 1.2
  50. shrink_ratio_of_width: 0.2
  51. Metric:
  52. name: DetMetric
  53. main_indicator: hmean
  54. Train:
  55. dataset:
  56. name: SimpleDataSet
  57. data_dir: ./train_data/
  58. label_file_list: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
  59. ratio_list: [0.5, 0.5]
  60. transforms:
  61. - DecodeImage: # load image
  62. img_mode: BGR
  63. channel_first: False
  64. - DetLabelEncode: # Class handling label
  65. - SASTProcessTrain:
  66. image_shape: [512, 512]
  67. min_crop_side_ratio: 0.3
  68. min_crop_size: 24
  69. min_text_size: 4
  70. max_text_size: 512
  71. - KeepKeys:
  72. keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
  73. loader:
  74. shuffle: True
  75. drop_last: False
  76. batch_size_per_card: 4
  77. num_workers: 4
  78. Eval:
  79. dataset:
  80. name: SimpleDataSet
  81. data_dir: ./train_data/
  82. label_file_list:
  83. - ./train_data/total_text_icdar_14pt/test_label_json.txt
  84. transforms:
  85. - DecodeImage: # load image
  86. img_mode: BGR
  87. channel_first: False
  88. - DetLabelEncode: # Class handling label
  89. - DetResizeForTest:
  90. resize_long: 768
  91. - NormalizeImage:
  92. scale: 1./255.
  93. mean: [0.485, 0.456, 0.406]
  94. std: [0.229, 0.224, 0.225]
  95. order: 'hwc'
  96. - ToCHWImage:
  97. - KeepKeys:
  98. keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  99. loader:
  100. shuffle: False
  101. drop_last: False
  102. batch_size_per_card: 1 # must be 1
  103. num_workers: 2