1# ... 2# 3# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> 4# 5# This work is licensed under the terms of the GNU GPL, version 2 or 6# later. See the COPYING file in the top-level directory. 7 8import re 9import logging 10 11from . import has_cmd, run_cmd 12 13def tesseract_available(expected_version): 14 if not has_cmd('tesseract'): 15 return False 16 (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version']) 17 if ret: 18 return False 19 version = stdout.split()[1] 20 return int(version.split('.')[0]) >= expected_version 21 22def tesseract_ocr(image_path, tesseract_args=''): 23 console_logger = logging.getLogger('console') 24 console_logger.debug(image_path) 25 (stdout, stderr, ret) = run_cmd(['tesseract', image_path, 26 'stdout']) 27 if ret: 28 return None 29 lines = [] 30 for line in stdout.split('\n'): 31 sline = line.strip() 32 if len(sline): 33 console_logger.debug(sline) 34 lines += [sline] 35 return lines 36