Overview of OCR(optical character recognition).
Optical Character Recognition (OCR) technology is a Business Solution for automating data extraction from a scanned document or image file and then convert it into the text into a machine-readable form so that it can be used for data processing like editing or searching.
PaddleOCR was developed by a Chinese company named 'Baidu' in September 2020. They have used the PaddlePaddle deep learning framework.
from paddleocr import PaddleOCR,draw_ocr # Paddleocr supports Chinese, English, French, German, Korean and Japanese. # You can set the parameter `lang` as `ch`, `en`, `french`, `german`, `korean`, `japan` # to switch the language model in order. ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory img_path = 'image.jpeg' result = ocr.ocr(img_path, cls=True) for line in result: print(line) # draw result from PIL import Image image = Image.open(img_path).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] im_show = draw_ocr(image, boxes, txts, scores, font_path='fonts/cyrillic.ttf') im_show = Image.fromarray(im_show) im_show.save('result.jpg')
Namespace(benchmark=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='.paddleocr/', cls_thresh=0.9, cpu_threads=10, det=True, det_algorithm='DB', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='.paddleocr/', det_sast_nms_thresh=0.2, det_sast_polygon=False, det_sast_score_thresh=0.5, drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e2e_pgnet_mode='fast', e2e_pgnet_polygon=True, e2e_pgnet_score_thresh=0.5, e2e_pgnet_valid_set='totaltext', enable_mkldnn=False, gpu_mem=500, help='==SUPPRESS==', image_dir=None, ir_optim=True, label_list=['0', '180'], lang='en', layout_path_model='lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config', max_batch_size=10, max_text_length=25, min_subgraph_size=10, output='./output/table', precision='fp32', process_id=0, rec=True, rec_algorithm='CRNN', rec_batch_num=6, rec_char_dict_path='local/lib/python3.6/site-packages/paddleocr/ppocr/utils/en_dict.txt', rec_char_type='ch', rec_image_shape='3, 32, 320', rec_model_dir='.paddleocr/', save_log_path='./log_output/', show_log=True, table_char_dict_path=None, table_char_type='en', table_max_len=488, table_model_dir=None, total_process_num=1, type='ocr', use_angle_cls=True, use_dilation=False, use_gpu=True, use_mp=False, use_pdserving=False, use_space_char=True, use_tensorrt=False, vis_font_path='./doc/fonts/simfang.ttf', warmup=True) [2021/08/13 19:21:01] root DEBUG: dt_boxes num : 10, elapse : 0.4250295162200928 [2021/08/13 19:21:01] root DEBUG: cls num : 10, elapse : 0.06311750411987305 [2021/08/13 19:21:01] root DEBUG: rec_res num : 10, elapse : 0.16244864463806152 [[[104.0, 93.0], [256.0, 103.0], [254.0, 134.0], [102.0, 124.0]], ('MALA', 0.98211896)] [[[66.0, 161.0], [265.0, 157.0], [266.0, 181.0], [66.0, 184.0]], ('00418-00-6418', 0.9912834)] [[[60.0, 360.0], [152.0, 360.0], [152.0, 378.0], [60.0, 378.0]], ('BINTLAR', 0.9820388)] [[[158.0, 364.0], [200.0, 364.0], [200.0, 378.0], [158.0, 378.0]], ('RN', 0.8610319)] [[[56.0, 403.0], [140.0, 406.0], [139.0, 427.0], [55.0, 424.0]], ('LOT2272', 0.99408233)] [[[54.0, 429.0], [265.0, 427.0], [266.0, 455.0], [54.0, 457.0]], ('4B400PETALING JAYA', 0.9694132)] [[[521.0, 438.0], [663.0, 436.0], [663.0, 450.0], [521.0, 452.0]], ('WARGANEGARA', 0.9971807)] [[[55.0, 461.0], [162.0, 461.0], [162.0, 485.0], [55.0, 485.0]], ('SELANGOR', 0.9964472)] [[[586.0, 454.0], [697.0, 452.0], [697.0, 466.0], [587.0, 468.0]], ('PEREMPUAN', 0.9980258)]
from paddleocr import PaddleOCR, draw_ocr # Also switch the language by modifying the lang parameter ocr = PaddleOCR(lang="en") # The model file will be downloaded automatically when executed for the first time img_path ='image.jpeg' result = ocr.ocr(img_path) # Print detection frame and recognition result # from PIL import Image, ImageFilter # #Open existing image # OriImage = Image.open('image.jpeg') # # OriImage.show() # blurImage = OriImage.filter(ImageFilter.BLUR) # blurImage.show() #Save blurImage result = ocr.ocr(img_path) for line in result: print(line) # Visualization from PIL import Image image = Image.open(blurImage).convert('RGB') boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] im_show = draw_ocr(image, boxes, txts, scores, font_path='PaddleOCR-release-2.2/doc/fonts/cyrillic.ttf') im_show = Image.fromarray(im_show) im_show.save('img_path.jpg') im_show.show()
Namespace(benchmark=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='.paddleocr/', cls_thresh=0.9, cpu_threads=10, det=True, det_algorithm='DB', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='.paddleocr/', det_sast_nms_thresh=0.2, det_sast_polygon=False, det_sast_score_thresh=0.5, drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e2e_pgnet_mode='fast', e2e_pgnet_polygon=True, e2e_pgnet_score_thresh=0.5, e2e_pgnet_valid_set='totaltext', enable_mkldnn=False, gpu_mem=500, help='==SUPPRESS==', image_dir=None, ir_optim=True, label_list=['0', '180'], lang='en', layout_path_model='lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config', max_batch_size=10, max_text_length=25, min_subgraph_size=10, output='./output/table', precision='fp32', process_id=0, rec=True, rec_algorithm='CRNN', rec_batch_num=6, rec_char_dict_path='.local/lib/python3.6/site-packages/paddleocr/ppocr/utils/en_dict.txt', rec_char_type='ch', rec_image_shape='3, 32, 320', rec_model_dir='.paddleocr/', save_log_path='./log_output/', show_log=True, table_char_dict_path=None, table_char_type='en', table_max_len=488, table_model_dir=None, total_process_num=1, type='ocr', use_angle_cls=False, use_dilation=False, use_gpu=True, use_mp=False, use_pdserving=False, use_space_char=True, use_tensorrt=False, vis_font_path='./doc/fonts/simfang.ttf', warmup=True) [2021/08/13 19:23:56] root WARNING: Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process [2021/08/13 19:23:57] root DEBUG: dt_boxes num : 10, elapse : 0.32827091217041016 [2021/08/13 19:23:57] root DEBUG: rec_res num : 10, elapse : 0.12045407295227051 [2021/08/13 19:23:57] root WARNING: Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process [2021/08/13 19:23:57] root DEBUG: dt_boxes num : 10, elapse : 0.34050536155700684 [2021/08/13 19:23:57] root DEBUG: rec_res num : 10, elapse : 0.16071629524230957 [[[104.0, 93.0], [256.0, 103.0], [254.0, 134.0], [102.0, 124.0]], ('MALA', 0.98211896)] [[[66.0, 161.0], [265.0, 157.0], [266.0, 181.0], [66.0, 184.0]], ('00418-00-6418', 0.9912834)] [[[60.0, 360.0], [152.0, 360.0], [152.0, 378.0], [60.0, 378.0]], ('BINTLAR', 0.9820388)] [[[158.0, 364.0], [200.0, 364.0], [200.0, 378.0], [158.0, 378.0]], ('RN', 0.8610319)] [[[56.0, 403.0], [140.0, 406.0], [139.0, 427.0], [55.0, 424.0]], ('LOT2272', 0.99408233)] [[[54.0, 429.0], [265.0, 427.0], [266.0, 455.0], [54.0, 457.0]], ('4B400PETALING JAYA', 0.9694132)] [[[521.0, 438.0], [663.0, 436.0], [663.0, 450.0], [521.0, 452.0]], ('WARGANEGARA', 0.9971807)] [[[55.0, 461.0], [162.0, 461.0], [162.0, 485.0], [55.0, 485.0]], ('SELANGOR', 0.9964472)] [[[586.0, 454.0], [697.0, 452.0], [697.0, 466.0], [587.0, 468.0]], ('PEREMPUAN', 0.9980258)]
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-79-f33617dd18df> in <module> 22 # Visualization 23 from PIL import Image ---> 24 image = Image.open(blurImage).convert('RGB') 25 boxes = [line[0] for line in result] 26 txts = [line[1][0] for line in result] ~/.local/lib/python3.6/site-packages/PIL/Image.py in open(fp, mode, formats) 2975 exclusive_fp = True 2976 -> 2977 prefix = fp.read(16) 2978 2979 preinit() ~/.local/lib/python3.6/site-packages/PIL/Image.py in __getattr__(self, name) 544 ) 545 return self._category --> 546 raise AttributeError(name) 547 548 @property AttributeError: read