1# ... 2# 3# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> 4# 5# This work is licensed under the terms of the GNU GPL, version 2 or 6# later. See the COPYING file in the top-level directory. 7 8import re 9import logging 10 11from . import has_cmd, run_cmd 12 13def tesseract_available(expected_version): 14 (has_tesseract, _) = has_cmd('tesseract') 15 if not has_tesseract: 16 return False 17 (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version']) 18 if ret: 19 return False 20 version = stdout.split()[1] 21 return int(version.split('.')[0]) >= expected_version 22 23def tesseract_ocr(image_path, tesseract_args=''): 24 console_logger = logging.getLogger('console') 25 console_logger.debug(image_path) 26 (stdout, stderr, ret) = run_cmd(['tesseract', image_path, 27 'stdout']) 28 if ret: 29 return None 30 lines = [] 31 for line in stdout.split('\n'): 32 sline = line.strip() 33 if len(sline): 34 console_logger.debug(sline) 35 lines += [sline] 36 return lines 37