sast_process.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. #copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. #Licensed under the Apache License, Version 2.0 (the "License");
  4. #you may not use this file except in compliance with the License.
  5. #You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. #Unless required by applicable law or agreed to in writing, software
  10. #distributed under the License is distributed on an "AS IS" BASIS,
  11. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. #See the License for the specific language governing permissions and
  13. #limitations under the License.
  14. import math
  15. import cv2
  16. import numpy as np
  17. import json
  18. import sys
  19. import os
  20. __all__ = ['SASTProcessTrain']
  21. class SASTProcessTrain(object):
  22. def __init__(self,
  23. image_shape=[512, 512],
  24. min_crop_size=24,
  25. min_crop_side_ratio=0.3,
  26. min_text_size=10,
  27. max_text_size=512,
  28. **kwargs):
  29. self.input_size = image_shape[1]
  30. self.min_crop_size = min_crop_size
  31. self.min_crop_side_ratio = min_crop_side_ratio
  32. self.min_text_size = min_text_size
  33. self.max_text_size = max_text_size
  34. def quad_area(self, poly):
  35. """
  36. compute area of a polygon
  37. :param poly:
  38. :return:
  39. """
  40. edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
  41. (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
  42. (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
  43. (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
  44. return np.sum(edge) / 2.
  45. def gen_quad_from_poly(self, poly):
  46. """
  47. Generate min area quad from poly.
  48. """
  49. point_num = poly.shape[0]
  50. min_area_quad = np.zeros((4, 2), dtype=np.float32)
  51. if True:
  52. rect = cv2.minAreaRect(poly.astype(
  53. np.int32)) # (center (x,y), (width, height), angle of rotation)
  54. center_point = rect[0]
  55. box = np.array(cv2.boxPoints(rect))
  56. first_point_idx = 0
  57. min_dist = 1e4
  58. for i in range(4):
  59. dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
  60. np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
  61. np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
  62. np.linalg.norm(box[(i + 3) % 4] - poly[-1])
  63. if dist < min_dist:
  64. min_dist = dist
  65. first_point_idx = i
  66. for i in range(4):
  67. min_area_quad[i] = box[(first_point_idx + i) % 4]
  68. return min_area_quad
  69. def check_and_validate_polys(self, polys, tags, xxx_todo_changeme):
  70. """
  71. check so that the text poly is in the same direction,
  72. and also filter some invalid polygons
  73. :param polys:
  74. :param tags:
  75. :return:
  76. """
  77. (h, w) = xxx_todo_changeme
  78. if polys.shape[0] == 0:
  79. return polys, np.array([]), np.array([])
  80. polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
  81. polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
  82. validated_polys = []
  83. validated_tags = []
  84. hv_tags = []
  85. for poly, tag in zip(polys, tags):
  86. quad = self.gen_quad_from_poly(poly)
  87. p_area = self.quad_area(quad)
  88. if abs(p_area) < 1:
  89. print('invalid poly')
  90. continue
  91. if p_area > 0:
  92. if tag == False:
  93. print('poly in wrong direction')
  94. tag = True # reversed cases should be ignore
  95. poly = poly[(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
  96. 1), :]
  97. quad = quad[(0, 3, 2, 1), :]
  98. len_w = np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[3] -
  99. quad[2])
  100. len_h = np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] -
  101. quad[2])
  102. hv_tag = 1
  103. if len_w * 2.0 < len_h:
  104. hv_tag = 0
  105. validated_polys.append(poly)
  106. validated_tags.append(tag)
  107. hv_tags.append(hv_tag)
  108. return np.array(validated_polys), np.array(validated_tags), np.array(
  109. hv_tags)
  110. def crop_area(self,
  111. im,
  112. polys,
  113. tags,
  114. hv_tags,
  115. crop_background=False,
  116. max_tries=25):
  117. """
  118. make random crop from the input image
  119. :param im:
  120. :param polys:
  121. :param tags:
  122. :param crop_background:
  123. :param max_tries: 50 -> 25
  124. :return:
  125. """
  126. h, w, _ = im.shape
  127. pad_h = h // 10
  128. pad_w = w // 10
  129. h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
  130. w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
  131. for poly in polys:
  132. poly = np.round(poly, decimals=0).astype(np.int32)
  133. minx = np.min(poly[:, 0])
  134. maxx = np.max(poly[:, 0])
  135. w_array[minx + pad_w:maxx + pad_w] = 1
  136. miny = np.min(poly[:, 1])
  137. maxy = np.max(poly[:, 1])
  138. h_array[miny + pad_h:maxy + pad_h] = 1
  139. # ensure the cropped area not across a text
  140. h_axis = np.where(h_array == 0)[0]
  141. w_axis = np.where(w_array == 0)[0]
  142. if len(h_axis) == 0 or len(w_axis) == 0:
  143. return im, polys, tags, hv_tags
  144. for i in range(max_tries):
  145. xx = np.random.choice(w_axis, size=2)
  146. xmin = np.min(xx) - pad_w
  147. xmax = np.max(xx) - pad_w
  148. xmin = np.clip(xmin, 0, w - 1)
  149. xmax = np.clip(xmax, 0, w - 1)
  150. yy = np.random.choice(h_axis, size=2)
  151. ymin = np.min(yy) - pad_h
  152. ymax = np.max(yy) - pad_h
  153. ymin = np.clip(ymin, 0, h - 1)
  154. ymax = np.clip(ymax, 0, h - 1)
  155. # if xmax - xmin < ARGS.min_crop_side_ratio * w or \
  156. # ymax - ymin < ARGS.min_crop_side_ratio * h:
  157. if xmax - xmin < self.min_crop_size or \
  158. ymax - ymin < self.min_crop_size:
  159. # area too small
  160. continue
  161. if polys.shape[0] != 0:
  162. poly_axis_in_area = (polys[:, :, 0] >= xmin) & (polys[:, :, 0] <= xmax) \
  163. & (polys[:, :, 1] >= ymin) & (polys[:, :, 1] <= ymax)
  164. selected_polys = np.where(
  165. np.sum(poly_axis_in_area, axis=1) == 4)[0]
  166. else:
  167. selected_polys = []
  168. if len(selected_polys) == 0:
  169. # no text in this area
  170. if crop_background:
  171. return im[ymin : ymax + 1, xmin : xmax + 1, :], \
  172. polys[selected_polys], tags[selected_polys], hv_tags[selected_polys]
  173. else:
  174. continue
  175. im = im[ymin:ymax + 1, xmin:xmax + 1, :]
  176. polys = polys[selected_polys]
  177. tags = tags[selected_polys]
  178. hv_tags = hv_tags[selected_polys]
  179. polys[:, :, 0] -= xmin
  180. polys[:, :, 1] -= ymin
  181. return im, polys, tags, hv_tags
  182. return im, polys, tags, hv_tags
  183. def generate_direction_map(self, poly_quads, direction_map):
  184. """
  185. """
  186. width_list = []
  187. height_list = []
  188. for quad in poly_quads:
  189. quad_w = (np.linalg.norm(quad[0] - quad[1]) +
  190. np.linalg.norm(quad[2] - quad[3])) / 2.0
  191. quad_h = (np.linalg.norm(quad[0] - quad[3]) +
  192. np.linalg.norm(quad[2] - quad[1])) / 2.0
  193. width_list.append(quad_w)
  194. height_list.append(quad_h)
  195. norm_width = max(sum(width_list) / (len(width_list) + 1e-6), 1.0)
  196. average_height = max(sum(height_list) / (len(height_list) + 1e-6), 1.0)
  197. for quad in poly_quads:
  198. direct_vector_full = (
  199. (quad[1] + quad[2]) - (quad[0] + quad[3])) / 2.0
  200. direct_vector = direct_vector_full / (
  201. np.linalg.norm(direct_vector_full) + 1e-6) * norm_width
  202. direction_label = tuple(
  203. map(float, [
  204. direct_vector[0], direct_vector[1], 1.0 / (average_height +
  205. 1e-6)
  206. ]))
  207. cv2.fillPoly(direction_map,
  208. quad.round().astype(np.int32)[np.newaxis, :, :],
  209. direction_label)
  210. return direction_map
  211. def calculate_average_height(self, poly_quads):
  212. """
  213. """
  214. height_list = []
  215. for quad in poly_quads:
  216. quad_h = (np.linalg.norm(quad[0] - quad[3]) +
  217. np.linalg.norm(quad[2] - quad[1])) / 2.0
  218. height_list.append(quad_h)
  219. average_height = max(sum(height_list) / len(height_list), 1.0)
  220. return average_height
  221. def generate_tcl_label(self,
  222. hw,
  223. polys,
  224. tags,
  225. ds_ratio,
  226. tcl_ratio=0.3,
  227. shrink_ratio_of_width=0.15):
  228. """
  229. Generate polygon.
  230. """
  231. h, w = hw
  232. h, w = int(h * ds_ratio), int(w * ds_ratio)
  233. polys = polys * ds_ratio
  234. score_map = np.zeros(
  235. (
  236. h,
  237. w, ), dtype=np.float32)
  238. tbo_map = np.zeros((h, w, 5), dtype=np.float32)
  239. training_mask = np.ones(
  240. (
  241. h,
  242. w, ), dtype=np.float32)
  243. direction_map = np.ones((h, w, 3)) * np.array([0, 0, 1]).reshape(
  244. [1, 1, 3]).astype(np.float32)
  245. for poly_idx, poly_tag in enumerate(zip(polys, tags)):
  246. poly = poly_tag[0]
  247. tag = poly_tag[1]
  248. # generate min_area_quad
  249. min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
  250. min_area_quad_h = 0.5 * (
  251. np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
  252. np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
  253. min_area_quad_w = 0.5 * (
  254. np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
  255. np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
  256. if min(min_area_quad_h, min_area_quad_w) < self.min_text_size * ds_ratio \
  257. or min(min_area_quad_h, min_area_quad_w) > self.max_text_size * ds_ratio:
  258. continue
  259. if tag:
  260. # continue
  261. cv2.fillPoly(training_mask,
  262. poly.astype(np.int32)[np.newaxis, :, :], 0.15)
  263. else:
  264. tcl_poly = self.poly2tcl(poly, tcl_ratio)
  265. tcl_quads = self.poly2quads(tcl_poly)
  266. poly_quads = self.poly2quads(poly)
  267. # stcl map
  268. stcl_quads, quad_index = self.shrink_poly_along_width(
  269. tcl_quads,
  270. shrink_ratio_of_width=shrink_ratio_of_width,
  271. expand_height_ratio=1.0 / tcl_ratio)
  272. # generate tcl map
  273. cv2.fillPoly(score_map,
  274. np.round(stcl_quads).astype(np.int32), 1.0)
  275. # generate tbo map
  276. for idx, quad in enumerate(stcl_quads):
  277. quad_mask = np.zeros((h, w), dtype=np.float32)
  278. quad_mask = cv2.fillPoly(
  279. quad_mask,
  280. np.round(quad[np.newaxis, :, :]).astype(np.int32), 1.0)
  281. tbo_map = self.gen_quad_tbo(poly_quads[quad_index[idx]],
  282. quad_mask, tbo_map)
  283. return score_map, tbo_map, training_mask
  284. def generate_tvo_and_tco(self,
  285. hw,
  286. polys,
  287. tags,
  288. tcl_ratio=0.3,
  289. ds_ratio=0.25):
  290. """
  291. Generate tcl map, tvo map and tbo map.
  292. """
  293. h, w = hw
  294. h, w = int(h * ds_ratio), int(w * ds_ratio)
  295. polys = polys * ds_ratio
  296. poly_mask = np.zeros((h, w), dtype=np.float32)
  297. tvo_map = np.ones((9, h, w), dtype=np.float32)
  298. tvo_map[0:-1:2] = np.tile(np.arange(0, w), (h, 1))
  299. tvo_map[1:-1:2] = np.tile(np.arange(0, w), (h, 1)).T
  300. poly_tv_xy_map = np.zeros((8, h, w), dtype=np.float32)
  301. # tco map
  302. tco_map = np.ones((3, h, w), dtype=np.float32)
  303. tco_map[0] = np.tile(np.arange(0, w), (h, 1))
  304. tco_map[1] = np.tile(np.arange(0, w), (h, 1)).T
  305. poly_tc_xy_map = np.zeros((2, h, w), dtype=np.float32)
  306. poly_short_edge_map = np.ones((h, w), dtype=np.float32)
  307. for poly, poly_tag in zip(polys, tags):
  308. if poly_tag == True:
  309. continue
  310. # adjust point order for vertical poly
  311. poly = self.adjust_point(poly)
  312. # generate min_area_quad
  313. min_area_quad, center_point = self.gen_min_area_quad_from_poly(poly)
  314. min_area_quad_h = 0.5 * (
  315. np.linalg.norm(min_area_quad[0] - min_area_quad[3]) +
  316. np.linalg.norm(min_area_quad[1] - min_area_quad[2]))
  317. min_area_quad_w = 0.5 * (
  318. np.linalg.norm(min_area_quad[0] - min_area_quad[1]) +
  319. np.linalg.norm(min_area_quad[2] - min_area_quad[3]))
  320. # generate tcl map and text, 128 * 128
  321. tcl_poly = self.poly2tcl(poly, tcl_ratio)
  322. # generate poly_tv_xy_map
  323. for idx in range(4):
  324. cv2.fillPoly(
  325. poly_tv_xy_map[2 * idx],
  326. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  327. float(min(max(min_area_quad[idx, 0], 0), w)))
  328. cv2.fillPoly(
  329. poly_tv_xy_map[2 * idx + 1],
  330. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  331. float(min(max(min_area_quad[idx, 1], 0), h)))
  332. # generate poly_tc_xy_map
  333. for idx in range(2):
  334. cv2.fillPoly(
  335. poly_tc_xy_map[idx],
  336. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  337. float(center_point[idx]))
  338. # generate poly_short_edge_map
  339. cv2.fillPoly(
  340. poly_short_edge_map,
  341. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  342. float(max(min(min_area_quad_h, min_area_quad_w), 1.0)))
  343. # generate poly_mask and training_mask
  344. cv2.fillPoly(poly_mask,
  345. np.round(tcl_poly[np.newaxis, :, :]).astype(np.int32),
  346. 1)
  347. tvo_map *= poly_mask
  348. tvo_map[:8] -= poly_tv_xy_map
  349. tvo_map[-1] /= poly_short_edge_map
  350. tvo_map = tvo_map.transpose((1, 2, 0))
  351. tco_map *= poly_mask
  352. tco_map[:2] -= poly_tc_xy_map
  353. tco_map[-1] /= poly_short_edge_map
  354. tco_map = tco_map.transpose((1, 2, 0))
  355. return tvo_map, tco_map
  356. def adjust_point(self, poly):
  357. """
  358. adjust point order.
  359. """
  360. point_num = poly.shape[0]
  361. if point_num == 4:
  362. len_1 = np.linalg.norm(poly[0] - poly[1])
  363. len_2 = np.linalg.norm(poly[1] - poly[2])
  364. len_3 = np.linalg.norm(poly[2] - poly[3])
  365. len_4 = np.linalg.norm(poly[3] - poly[0])
  366. if (len_1 + len_3) * 1.5 < (len_2 + len_4):
  367. poly = poly[[1, 2, 3, 0], :]
  368. elif point_num > 4:
  369. vector_1 = poly[0] - poly[1]
  370. vector_2 = poly[1] - poly[2]
  371. cos_theta = np.dot(vector_1, vector_2) / (
  372. np.linalg.norm(vector_1) * np.linalg.norm(vector_2) + 1e-6)
  373. theta = np.arccos(np.round(cos_theta, decimals=4))
  374. if abs(theta) > (70 / 180 * math.pi):
  375. index = list(range(1, point_num)) + [0]
  376. poly = poly[np.array(index), :]
  377. return poly
  378. def gen_min_area_quad_from_poly(self, poly):
  379. """
  380. Generate min area quad from poly.
  381. """
  382. point_num = poly.shape[0]
  383. min_area_quad = np.zeros((4, 2), dtype=np.float32)
  384. if point_num == 4:
  385. min_area_quad = poly
  386. center_point = np.sum(poly, axis=0) / 4
  387. else:
  388. rect = cv2.minAreaRect(poly.astype(
  389. np.int32)) # (center (x,y), (width, height), angle of rotation)
  390. center_point = rect[0]
  391. box = np.array(cv2.boxPoints(rect))
  392. first_point_idx = 0
  393. min_dist = 1e4
  394. for i in range(4):
  395. dist = np.linalg.norm(box[(i + 0) % 4] - poly[0]) + \
  396. np.linalg.norm(box[(i + 1) % 4] - poly[point_num // 2 - 1]) + \
  397. np.linalg.norm(box[(i + 2) % 4] - poly[point_num // 2]) + \
  398. np.linalg.norm(box[(i + 3) % 4] - poly[-1])
  399. if dist < min_dist:
  400. min_dist = dist
  401. first_point_idx = i
  402. for i in range(4):
  403. min_area_quad[i] = box[(first_point_idx + i) % 4]
  404. return min_area_quad, center_point
  405. def shrink_quad_along_width(self,
  406. quad,
  407. begin_width_ratio=0.,
  408. end_width_ratio=1.):
  409. """
  410. Generate shrink_quad_along_width.
  411. """
  412. ratio_pair = np.array(
  413. [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
  414. p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
  415. p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
  416. return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
  417. def shrink_poly_along_width(self,
  418. quads,
  419. shrink_ratio_of_width,
  420. expand_height_ratio=1.0):
  421. """
  422. shrink poly with given length.
  423. """
  424. upper_edge_list = []
  425. def get_cut_info(edge_len_list, cut_len):
  426. for idx, edge_len in enumerate(edge_len_list):
  427. cut_len -= edge_len
  428. if cut_len <= 0.000001:
  429. ratio = (cut_len + edge_len_list[idx]) / edge_len_list[idx]
  430. return idx, ratio
  431. for quad in quads:
  432. upper_edge_len = np.linalg.norm(quad[0] - quad[1])
  433. upper_edge_list.append(upper_edge_len)
  434. # length of left edge and right edge.
  435. left_length = np.linalg.norm(quads[0][0] - quads[0][
  436. 3]) * expand_height_ratio
  437. right_length = np.linalg.norm(quads[-1][1] - quads[-1][
  438. 2]) * expand_height_ratio
  439. shrink_length = min(left_length, right_length,
  440. sum(upper_edge_list)) * shrink_ratio_of_width
  441. # shrinking length
  442. upper_len_left = shrink_length
  443. upper_len_right = sum(upper_edge_list) - shrink_length
  444. left_idx, left_ratio = get_cut_info(upper_edge_list, upper_len_left)
  445. left_quad = self.shrink_quad_along_width(
  446. quads[left_idx], begin_width_ratio=left_ratio, end_width_ratio=1)
  447. right_idx, right_ratio = get_cut_info(upper_edge_list, upper_len_right)
  448. right_quad = self.shrink_quad_along_width(
  449. quads[right_idx], begin_width_ratio=0, end_width_ratio=right_ratio)
  450. out_quad_list = []
  451. if left_idx == right_idx:
  452. out_quad_list.append(
  453. [left_quad[0], right_quad[1], right_quad[2], left_quad[3]])
  454. else:
  455. out_quad_list.append(left_quad)
  456. for idx in range(left_idx + 1, right_idx):
  457. out_quad_list.append(quads[idx])
  458. out_quad_list.append(right_quad)
  459. return np.array(out_quad_list), list(range(left_idx, right_idx + 1))
  460. def vector_angle(self, A, B):
  461. """
  462. Calculate the angle between vector AB and x-axis positive direction.
  463. """
  464. AB = np.array([B[1] - A[1], B[0] - A[0]])
  465. return np.arctan2(*AB)
  466. def theta_line_cross_point(self, theta, point):
  467. """
  468. Calculate the line through given point and angle in ax + by + c =0 form.
  469. """
  470. x, y = point
  471. cos = np.cos(theta)
  472. sin = np.sin(theta)
  473. return [sin, -cos, cos * y - sin * x]
  474. def line_cross_two_point(self, A, B):
  475. """
  476. Calculate the line through given point A and B in ax + by + c =0 form.
  477. """
  478. angle = self.vector_angle(A, B)
  479. return self.theta_line_cross_point(angle, A)
  480. def average_angle(self, poly):
  481. """
  482. Calculate the average angle between left and right edge in given poly.
  483. """
  484. p0, p1, p2, p3 = poly
  485. angle30 = self.vector_angle(p3, p0)
  486. angle21 = self.vector_angle(p2, p1)
  487. return (angle30 + angle21) / 2
  488. def line_cross_point(self, line1, line2):
  489. """
  490. line1 and line2 in 0=ax+by+c form, compute the cross point of line1 and line2
  491. """
  492. a1, b1, c1 = line1
  493. a2, b2, c2 = line2
  494. d = a1 * b2 - a2 * b1
  495. if d == 0:
  496. #print("line1", line1)
  497. #print("line2", line2)
  498. print('Cross point does not exist')
  499. return np.array([0, 0], dtype=np.float32)
  500. else:
  501. x = (b1 * c2 - b2 * c1) / d
  502. y = (a2 * c1 - a1 * c2) / d
  503. return np.array([x, y], dtype=np.float32)
  504. def quad2tcl(self, poly, ratio):
  505. """
  506. Generate center line by poly clock-wise point. (4, 2)
  507. """
  508. ratio_pair = np.array(
  509. [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
  510. p0_3 = poly[0] + (poly[3] - poly[0]) * ratio_pair
  511. p1_2 = poly[1] + (poly[2] - poly[1]) * ratio_pair
  512. return np.array([p0_3[0], p1_2[0], p1_2[1], p0_3[1]])
  513. def poly2tcl(self, poly, ratio):
  514. """
  515. Generate center line by poly clock-wise point.
  516. """
  517. ratio_pair = np.array(
  518. [[0.5 - ratio / 2], [0.5 + ratio / 2]], dtype=np.float32)
  519. tcl_poly = np.zeros_like(poly)
  520. point_num = poly.shape[0]
  521. for idx in range(point_num // 2):
  522. point_pair = poly[idx] + (poly[point_num - 1 - idx] - poly[idx]
  523. ) * ratio_pair
  524. tcl_poly[idx] = point_pair[0]
  525. tcl_poly[point_num - 1 - idx] = point_pair[1]
  526. return tcl_poly
  527. def gen_quad_tbo(self, quad, tcl_mask, tbo_map):
  528. """
  529. Generate tbo_map for give quad.
  530. """
  531. # upper and lower line function: ax + by + c = 0;
  532. up_line = self.line_cross_two_point(quad[0], quad[1])
  533. lower_line = self.line_cross_two_point(quad[3], quad[2])
  534. quad_h = 0.5 * (np.linalg.norm(quad[0] - quad[3]) +
  535. np.linalg.norm(quad[1] - quad[2]))
  536. quad_w = 0.5 * (np.linalg.norm(quad[0] - quad[1]) +
  537. np.linalg.norm(quad[2] - quad[3]))
  538. # average angle of left and right line.
  539. angle = self.average_angle(quad)
  540. xy_in_poly = np.argwhere(tcl_mask == 1)
  541. for y, x in xy_in_poly:
  542. point = (x, y)
  543. line = self.theta_line_cross_point(angle, point)
  544. cross_point_upper = self.line_cross_point(up_line, line)
  545. cross_point_lower = self.line_cross_point(lower_line, line)
  546. ##FIX, offset reverse
  547. upper_offset_x, upper_offset_y = cross_point_upper - point
  548. lower_offset_x, lower_offset_y = cross_point_lower - point
  549. tbo_map[y, x, 0] = upper_offset_y
  550. tbo_map[y, x, 1] = upper_offset_x
  551. tbo_map[y, x, 2] = lower_offset_y
  552. tbo_map[y, x, 3] = lower_offset_x
  553. tbo_map[y, x, 4] = 1.0 / max(min(quad_h, quad_w), 1.0) * 2
  554. return tbo_map
  555. def poly2quads(self, poly):
  556. """
  557. Split poly into quads.
  558. """
  559. quad_list = []
  560. point_num = poly.shape[0]
  561. # point pair
  562. point_pair_list = []
  563. for idx in range(point_num // 2):
  564. point_pair = [poly[idx], poly[point_num - 1 - idx]]
  565. point_pair_list.append(point_pair)
  566. quad_num = point_num // 2 - 1
  567. for idx in range(quad_num):
  568. # reshape and adjust to clock-wise
  569. quad_list.append((np.array(point_pair_list)[[idx, idx + 1]]
  570. ).reshape(4, 2)[[0, 2, 3, 1]])
  571. return np.array(quad_list)
  572. def __call__(self, data):
  573. im = data['image']
  574. text_polys = data['polys']
  575. text_tags = data['ignore_tags']
  576. if im is None:
  577. return None
  578. if text_polys.shape[0] == 0:
  579. return None
  580. h, w, _ = im.shape
  581. text_polys, text_tags, hv_tags = self.check_and_validate_polys(
  582. text_polys, text_tags, (h, w))
  583. if text_polys.shape[0] == 0:
  584. return None
  585. #set aspect ratio and keep area fix
  586. asp_scales = np.arange(1.0, 1.55, 0.1)
  587. asp_scale = np.random.choice(asp_scales)
  588. if np.random.rand() < 0.5:
  589. asp_scale = 1.0 / asp_scale
  590. asp_scale = math.sqrt(asp_scale)
  591. asp_wx = asp_scale
  592. asp_hy = 1.0 / asp_scale
  593. im = cv2.resize(im, dsize=None, fx=asp_wx, fy=asp_hy)
  594. text_polys[:, :, 0] *= asp_wx
  595. text_polys[:, :, 1] *= asp_hy
  596. h, w, _ = im.shape
  597. if max(h, w) > 2048:
  598. rd_scale = 2048.0 / max(h, w)
  599. im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
  600. text_polys *= rd_scale
  601. h, w, _ = im.shape
  602. if min(h, w) < 16:
  603. return None
  604. #no background
  605. im, text_polys, text_tags, hv_tags = self.crop_area(im, \
  606. text_polys, text_tags, hv_tags, crop_background=False)
  607. if text_polys.shape[0] == 0:
  608. return None
  609. #continue for all ignore case
  610. if np.sum((text_tags * 1.0)) >= text_tags.size:
  611. return None
  612. new_h, new_w, _ = im.shape
  613. if (new_h is None) or (new_w is None):
  614. return None
  615. #resize image
  616. std_ratio = float(self.input_size) / max(new_w, new_h)
  617. rand_scales = np.array(
  618. [0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.0, 1.0, 1.0, 1.0])
  619. rz_scale = std_ratio * np.random.choice(rand_scales)
  620. im = cv2.resize(im, dsize=None, fx=rz_scale, fy=rz_scale)
  621. text_polys[:, :, 0] *= rz_scale
  622. text_polys[:, :, 1] *= rz_scale
  623. #add gaussian blur
  624. if np.random.rand() < 0.1 * 0.5:
  625. ks = np.random.permutation(5)[0] + 1
  626. ks = int(ks / 2) * 2 + 1
  627. im = cv2.GaussianBlur(im, ksize=(ks, ks), sigmaX=0, sigmaY=0)
  628. #add brighter
  629. if np.random.rand() < 0.1 * 0.5:
  630. im = im * (1.0 + np.random.rand() * 0.5)
  631. im = np.clip(im, 0.0, 255.0)
  632. #add darker
  633. if np.random.rand() < 0.1 * 0.5:
  634. im = im * (1.0 - np.random.rand() * 0.5)
  635. im = np.clip(im, 0.0, 255.0)
  636. # Padding the im to [input_size, input_size]
  637. new_h, new_w, _ = im.shape
  638. if min(new_w, new_h) < self.input_size * 0.5:
  639. return None
  640. im_padded = np.ones(
  641. (self.input_size, self.input_size, 3), dtype=np.float32)
  642. im_padded[:, :, 2] = 0.485 * 255
  643. im_padded[:, :, 1] = 0.456 * 255
  644. im_padded[:, :, 0] = 0.406 * 255
  645. # Random the start position
  646. del_h = self.input_size - new_h
  647. del_w = self.input_size - new_w
  648. sh, sw = 0, 0
  649. if del_h > 1:
  650. sh = int(np.random.rand() * del_h)
  651. if del_w > 1:
  652. sw = int(np.random.rand() * del_w)
  653. # Padding
  654. im_padded[sh:sh + new_h, sw:sw + new_w, :] = im.copy()
  655. text_polys[:, :, 0] += sw
  656. text_polys[:, :, 1] += sh
  657. score_map, border_map, training_mask = self.generate_tcl_label(
  658. (self.input_size, self.input_size), text_polys, text_tags, 0.25)
  659. # SAST head
  660. tvo_map, tco_map = self.generate_tvo_and_tco(
  661. (self.input_size, self.input_size),
  662. text_polys,
  663. text_tags,
  664. tcl_ratio=0.3,
  665. ds_ratio=0.25)
  666. # print("test--------tvo_map shape:", tvo_map.shape)
  667. im_padded[:, :, 2] -= 0.485 * 255
  668. im_padded[:, :, 1] -= 0.456 * 255
  669. im_padded[:, :, 0] -= 0.406 * 255
  670. im_padded[:, :, 2] /= (255.0 * 0.229)
  671. im_padded[:, :, 1] /= (255.0 * 0.224)
  672. im_padded[:, :, 0] /= (255.0 * 0.225)
  673. im_padded = im_padded.transpose((2, 0, 1))
  674. data['image'] = im_padded[::-1, :, :]
  675. data['score_map'] = score_map[np.newaxis, :, :]
  676. data['border_map'] = border_map.transpose((2, 0, 1))
  677. data['training_mask'] = training_mask[np.newaxis, :, :]
  678. data['tvo_map'] = tvo_map.transpose((2, 0, 1))
  679. data['tco_map'] = tco_map.transpose((2, 0, 1))
  680. return data