|
@@ -96,7 +96,7 @@ class OCRSystem:
|
|
continue
|
|
continue
|
|
|
|
|
|
# 图片预处理
|
|
# 图片预处理
|
|
- img = self.resizeImg(img, 800)
|
|
|
|
|
|
+ img = self.resizeImg(img, 640)
|
|
|
|
|
|
starttime = time.time()
|
|
starttime = time.time()
|
|
dt_boxes, rec_res = self.text_sys(img)
|
|
dt_boxes, rec_res = self.text_sys(img)
|
|
@@ -111,10 +111,11 @@ class OCRSystem:
|
|
text, score = rec_res[dno]
|
|
text, score = rec_res[dno]
|
|
if score > 0.8:
|
|
if score > 0.8:
|
|
text_list.append(text)
|
|
text_list.append(text)
|
|
- # print(text)
|
|
|
|
|
|
+ print(text)
|
|
self.getInformation(text, kwargs['invoice_type'])
|
|
self.getInformation(text, kwargs['invoice_type'])
|
|
|
|
|
|
inv_text = ''.join(text_list)
|
|
inv_text = ''.join(text_list)
|
|
|
|
+ print(inv_text)
|
|
self.getInformationAgain(inv_text, kwargs['invoice_type'])
|
|
self.getInformationAgain(inv_text, kwargs['invoice_type'])
|
|
all_results.append({
|
|
all_results.append({
|
|
'no': self.inv_no,
|
|
'no': self.inv_no,
|
|
@@ -134,6 +135,10 @@ class OCRSystem:
|
|
pt = re.compile(r'N[\w|\s]?(\d{8})', re.M)
|
|
pt = re.compile(r'N[\w|\s]?(\d{8})', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
self.inv_no = information_list[0] if len(information_list) != 0 else ""
|
|
self.inv_no = information_list[0] if len(information_list) != 0 else ""
|
|
|
|
+ if self.inv_no == "":
|
|
|
|
+ pt = re.compile(r'号码:(\d{8})', re.M)
|
|
|
|
+ information_list = pt.findall(string)
|
|
|
|
+ self.inv_no = information_list[0] if len(information_list) != 0 else ""
|
|
|
|
|
|
if self.inv_id == "":
|
|
if self.inv_id == "":
|
|
if invoice_type == 1:
|
|
if invoice_type == 1:
|
|
@@ -142,15 +147,29 @@ class OCRSystem:
|
|
pt = re.compile(r'(\d{10})N', re.M)
|
|
pt = re.compile(r'(\d{10})N', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
self.inv_id = information_list[0] if len(information_list) != 0 else ""
|
|
self.inv_id = information_list[0] if len(information_list) != 0 else ""
|
|
|
|
+ if self.inv_id == "":
|
|
|
|
+ if invoice_type == 1:
|
|
|
|
+ pt = re.compile(r'代码:(\d{12})', re.M)
|
|
|
|
+ else:
|
|
|
|
+ pt = re.compile(r'代码:(\d{10})', re.M)
|
|
|
|
+ information_list = pt.findall(string)
|
|
|
|
+ self.inv_id = information_list[0] if len(information_list) != 0 else ""
|
|
|
|
|
|
if self.inv_company[1] == '':
|
|
if self.inv_company[1] == '':
|
|
- pt = re.compile(r'称:(.*?)[-*+></\d]?[纳税]', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'称:(.*?)[-*+></\d购]?[纳税]', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
if len(information_list) != 0:
|
|
if len(information_list) != 0:
|
|
for i in range(len(self.inv_company)):
|
|
for i in range(len(self.inv_company)):
|
|
- if self.inv_company[i] == '':
|
|
|
|
- if len(information_list) != 0:
|
|
|
|
- self.inv_company[i] = information_list.pop(0)
|
|
|
|
|
|
+ if len(information_list) != 0:
|
|
|
|
+ self.inv_company[i] = information_list.pop(0)
|
|
|
|
+
|
|
|
|
+ if self.inv_identifier[1] == '':
|
|
|
|
+ pt = re.compile(r'别号:([a-zA-Z\d]{18})', re.M)
|
|
|
|
+ information_list = pt.findall(string)
|
|
|
|
+ if len(information_list) != 0:
|
|
|
|
+ for i in range(len(self.inv_identifier)):
|
|
|
|
+ if len(information_list) != 0:
|
|
|
|
+ self.inv_identifier[i] = information_list.pop(0)
|
|
|
|
|
|
|
|
|
|
if self.inv_payee == "":
|
|
if self.inv_payee == "":
|
|
@@ -195,28 +214,28 @@ class OCRSystem:
|
|
return True
|
|
return True
|
|
|
|
|
|
if self.inv_payee == "":
|
|
if self.inv_payee == "":
|
|
- pt = re.compile(r'款人:(.*)', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'款人:(.*)$', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
self.inv_payee = information_list[0] if len(information_list) != 0 else ""
|
|
self.inv_payee = information_list[0] if len(information_list) != 0 else ""
|
|
if self.inv_payee != "":
|
|
if self.inv_payee != "":
|
|
return True
|
|
return True
|
|
|
|
|
|
if self.inv_review == "":
|
|
if self.inv_review == "":
|
|
- pt = re.compile(r'复核:(.*)', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'复核:(.*)$', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
self.inv_review = information_list[0] if len(information_list) != 0 else ""
|
|
self.inv_review = information_list[0] if len(information_list) != 0 else ""
|
|
if self.inv_review != "":
|
|
if self.inv_review != "":
|
|
return True
|
|
return True
|
|
|
|
|
|
if self.inv_drawer == "":
|
|
if self.inv_drawer == "":
|
|
- pt = re.compile(r'票人:(.*)', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'票人:(.*)$', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
self.inv_drawer = information_list[0] if len(information_list) != 0 else ""
|
|
self.inv_drawer = information_list[0] if len(information_list) != 0 else ""
|
|
if self.inv_drawer != "":
|
|
if self.inv_drawer != "":
|
|
return True
|
|
return True
|
|
|
|
|
|
if self.inv_identifier[1] == '':
|
|
if self.inv_identifier[1] == '':
|
|
- pt = re.compile(r'[别号:]?([a-zA-Z\d]{18})$', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'^[纳税人识别号:]?([a-zA-Z\d]{18})$', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
if len(information_list) != 0:
|
|
if len(information_list) != 0:
|
|
for i in range(len(self.inv_identifier)):
|
|
for i in range(len(self.inv_identifier)):
|
|
@@ -225,7 +244,7 @@ class OCRSystem:
|
|
return True
|
|
return True
|
|
|
|
|
|
if self.inv_identifier[1] == '':
|
|
if self.inv_identifier[1] == '':
|
|
- pt = re.compile(r'称:(.*)', re.M)
|
|
|
|
|
|
+ pt = re.compile(r'称:(.*)$', re.M)
|
|
information_list = pt.findall(string)
|
|
information_list = pt.findall(string)
|
|
if len(information_list) != 0:
|
|
if len(information_list) != 0:
|
|
for i in range(len(self.inv_company)):
|
|
for i in range(len(self.inv_company)):
|