random_crop_data.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # -*- coding:utf-8 -*-
  2. from __future__ import absolute_import
  3. from __future__ import division
  4. from __future__ import print_function
  5. from __future__ import unicode_literals
  6. import numpy as np
  7. import cv2
  8. import random
  9. def is_poly_in_rect(poly, x, y, w, h):
  10. poly = np.array(poly)
  11. if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
  12. return False
  13. if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
  14. return False
  15. return True
  16. def is_poly_outside_rect(poly, x, y, w, h):
  17. poly = np.array(poly)
  18. if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
  19. return True
  20. if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
  21. return True
  22. return False
  23. def split_regions(axis):
  24. regions = []
  25. min_axis = 0
  26. for i in range(1, axis.shape[0]):
  27. if axis[i] != axis[i - 1] + 1:
  28. region = axis[min_axis:i]
  29. min_axis = i
  30. regions.append(region)
  31. return regions
  32. def random_select(axis, max_size):
  33. xx = np.random.choice(axis, size=2)
  34. xmin = np.min(xx)
  35. xmax = np.max(xx)
  36. xmin = np.clip(xmin, 0, max_size - 1)
  37. xmax = np.clip(xmax, 0, max_size - 1)
  38. return xmin, xmax
  39. def region_wise_random_select(regions, max_size):
  40. selected_index = list(np.random.choice(len(regions), 2))
  41. selected_values = []
  42. for index in selected_index:
  43. axis = regions[index]
  44. xx = int(np.random.choice(axis, size=1))
  45. selected_values.append(xx)
  46. xmin = min(selected_values)
  47. xmax = max(selected_values)
  48. return xmin, xmax
  49. def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
  50. h, w, _ = im.shape
  51. h_array = np.zeros(h, dtype=np.int32)
  52. w_array = np.zeros(w, dtype=np.int32)
  53. for points in text_polys:
  54. points = np.round(points, decimals=0).astype(np.int32)
  55. minx = np.min(points[:, 0])
  56. maxx = np.max(points[:, 0])
  57. w_array[minx:maxx] = 1
  58. miny = np.min(points[:, 1])
  59. maxy = np.max(points[:, 1])
  60. h_array[miny:maxy] = 1
  61. # ensure the cropped area not across a text
  62. h_axis = np.where(h_array == 0)[0]
  63. w_axis = np.where(w_array == 0)[0]
  64. if len(h_axis) == 0 or len(w_axis) == 0:
  65. return 0, 0, w, h
  66. h_regions = split_regions(h_axis)
  67. w_regions = split_regions(w_axis)
  68. for i in range(max_tries):
  69. if len(w_regions) > 1:
  70. xmin, xmax = region_wise_random_select(w_regions, w)
  71. else:
  72. xmin, xmax = random_select(w_axis, w)
  73. if len(h_regions) > 1:
  74. ymin, ymax = region_wise_random_select(h_regions, h)
  75. else:
  76. ymin, ymax = random_select(h_axis, h)
  77. if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h:
  78. # area too small
  79. continue
  80. num_poly_in_rect = 0
  81. for poly in text_polys:
  82. if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
  83. ymax - ymin):
  84. num_poly_in_rect += 1
  85. break
  86. if num_poly_in_rect > 0:
  87. return xmin, ymin, xmax - xmin, ymax - ymin
  88. return 0, 0, w, h
  89. class EastRandomCropData(object):
  90. def __init__(self,
  91. size=(640, 640),
  92. max_tries=10,
  93. min_crop_side_ratio=0.1,
  94. keep_ratio=True,
  95. **kwargs):
  96. self.size = size
  97. self.max_tries = max_tries
  98. self.min_crop_side_ratio = min_crop_side_ratio
  99. self.keep_ratio = keep_ratio
  100. def __call__(self, data):
  101. img = data['image']
  102. text_polys = data['polys']
  103. ignore_tags = data['ignore_tags']
  104. texts = data['texts']
  105. all_care_polys = [
  106. text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
  107. ]
  108. # 计算crop区域
  109. crop_x, crop_y, crop_w, crop_h = crop_area(
  110. img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
  111. # crop 图片 保持比例填充
  112. scale_w = self.size[0] / crop_w
  113. scale_h = self.size[1] / crop_h
  114. scale = min(scale_w, scale_h)
  115. h = int(crop_h * scale)
  116. w = int(crop_w * scale)
  117. if self.keep_ratio:
  118. padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
  119. img.dtype)
  120. padimg[:h, :w] = cv2.resize(
  121. img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
  122. img = padimg
  123. else:
  124. img = cv2.resize(
  125. img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
  126. tuple(self.size))
  127. # crop 文本框
  128. text_polys_crop = []
  129. ignore_tags_crop = []
  130. texts_crop = []
  131. for poly, text, tag in zip(text_polys, texts, ignore_tags):
  132. poly = ((poly - (crop_x, crop_y)) * scale).tolist()
  133. if not is_poly_outside_rect(poly, 0, 0, w, h):
  134. text_polys_crop.append(poly)
  135. ignore_tags_crop.append(tag)
  136. texts_crop.append(text)
  137. data['image'] = img
  138. data['polys'] = np.array(text_polys_crop)
  139. data['ignore_tags'] = ignore_tags_crop
  140. data['texts'] = texts_crop
  141. return data
  142. class PSERandomCrop(object):
  143. def __init__(self, size, **kwargs):
  144. self.size = size
  145. def __call__(self, data):
  146. imgs = data['imgs']
  147. h, w = imgs[0].shape[0:2]
  148. th, tw = self.size
  149. if w == tw and h == th:
  150. return imgs
  151. # label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制
  152. if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
  153. # 文本实例的左上角点
  154. tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
  155. tl[tl < 0] = 0
  156. # 文本实例的右下角点
  157. br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
  158. br[br < 0] = 0
  159. # 保证选到右下角点时,有足够的距离进行crop
  160. br[0] = min(br[0], h - th)
  161. br[1] = min(br[1], w - tw)
  162. for _ in range(50000):
  163. i = random.randint(tl[0], br[0])
  164. j = random.randint(tl[1], br[1])
  165. # 保证shrink_label_map有文本
  166. if imgs[1][i:i + th, j:j + tw].sum() <= 0:
  167. continue
  168. else:
  169. break
  170. else:
  171. i = random.randint(0, h - th)
  172. j = random.randint(0, w - tw)
  173. # return i, j, th, tw
  174. for idx in range(len(imgs)):
  175. if len(imgs[idx].shape) == 3:
  176. imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
  177. else:
  178. imgs[idx] = imgs[idx][i:i + th, j:j + tw]
  179. data['imgs'] = imgs
  180. return data