1576fffbcSThomas Huth# ... 2576fffbcSThomas Huth# 3576fffbcSThomas Huth# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> 4576fffbcSThomas Huth# 5576fffbcSThomas Huth# This work is licensed under the terms of the GNU GPL, version 2 or 6576fffbcSThomas Huth# later. See the COPYING file in the top-level directory. 7576fffbcSThomas Huth 8576fffbcSThomas Huthimport re 9576fffbcSThomas Huthimport logging 10576fffbcSThomas Huth 11576fffbcSThomas Huthfrom . import has_cmd, run_cmd 12576fffbcSThomas Huth 13576fffbcSThomas Huthdef tesseract_available(expected_version): 14*59d10024SThomas Huth (has_tesseract, _) = has_cmd('tesseract') 15*59d10024SThomas Huth if not has_tesseract: 16576fffbcSThomas Huth return False 17576fffbcSThomas Huth (stdout, stderr, ret) = run_cmd([ 'tesseract', '--version']) 18576fffbcSThomas Huth if ret: 19576fffbcSThomas Huth return False 20576fffbcSThomas Huth version = stdout.split()[1] 21576fffbcSThomas Huth return int(version.split('.')[0]) >= expected_version 22576fffbcSThomas Huth 23576fffbcSThomas Huthdef tesseract_ocr(image_path, tesseract_args=''): 24576fffbcSThomas Huth console_logger = logging.getLogger('console') 25576fffbcSThomas Huth console_logger.debug(image_path) 26576fffbcSThomas Huth (stdout, stderr, ret) = run_cmd(['tesseract', image_path, 27576fffbcSThomas Huth 'stdout']) 28576fffbcSThomas Huth if ret: 29576fffbcSThomas Huth return None 30576fffbcSThomas Huth lines = [] 31576fffbcSThomas Huth for line in stdout.split('\n'): 32576fffbcSThomas Huth sline = line.strip() 33576fffbcSThomas Huth if len(sline): 34576fffbcSThomas Huth console_logger.debug(sline) 35576fffbcSThomas Huth lines += [sline] 36576fffbcSThomas Huth return lines 37