1# This script is used as a bitbake task to create a new python manifest 2# $ bitbake python -c create_manifest 3# 4# Our goal is to keep python-core as small as posible and add other python 5# packages only when the user needs them, hence why we split upstream python 6# into several packages. 7# 8# In a very simplistic way what this does is: 9# Launch python and see specifically what is required for it to run at a minimum 10# 11# Go through the python-manifest file and launch a separate task for every single 12# one of the files on each package, this task will check what was required for that 13# specific module to run, these modules will be called dependencies. 14# The output of such task will be a list of the modules or dependencies that were 15# found for that file. 16# 17# Such output will be parsed by this script, we will look for each dependency on the 18# manifest and if we find that another package already includes it, then we will add 19# that package as an RDEPENDS to the package we are currently checking; in case we dont 20# find the current dependency on any other package we will add it to the current package 21# as part of FILES. 22# 23# 24# This way we will create a new manifest from the data structure that was built during 25# this process, on this new manifest each package will contain specifically only 26# what it needs to run. 27# 28# There are some caveats which we try to deal with, such as repeated files on different 29# packages, packages that include folders, wildcards, and special packages. 30# Its also important to note that this method only works for python files, and shared 31# libraries. Static libraries, header files and binaries need to be dealt with manually. 32# 33# This script differs from its python2 version mostly on how shared libraries are handled 34# The manifest file for python3 has an extra field which contains the cached files for 35# each package. 36# Tha method to handle cached files does not work when a module includes a folder which 37# itself contains the pycache folder, gladly this is almost never the case. 38# 39# Author: Alejandro Enedino Hernandez Samaniego <alejandro at enedino dot org> 40 41 42import sys 43import subprocess 44import json 45import os 46import collections 47 48if '-d' in sys.argv: 49 debugFlag = '-d' 50else: 51 debugFlag = '' 52 53# Get python version from ${PYTHON_MAJMIN} 54pyversion = str(sys.argv[1]) 55 56# Hack to get native python search path (for folders), not fond of it but it works for now 57pivot = 'recipe-sysroot-native' 58for p in sys.path: 59 if pivot in p: 60 nativelibfolder = p[:p.find(pivot)+len(pivot)] 61 62# Empty dict to hold the whole manifest 63new_manifest = collections.OrderedDict() 64 65# Check for repeated files, folders and wildcards 66allfiles = [] 67repeated = [] 68wildcards = [] 69 70hasfolders = [] 71allfolders = [] 72 73def isFolder(value): 74 value = value.replace('${PYTHON_MAJMIN}',pyversion) 75 if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')): 76 return True 77 else: 78 return False 79 80def isCached(item): 81 if '__pycache__' in item: 82 return True 83 else: 84 return False 85 86def prepend_comments(comments, json_manifest): 87 with open(json_manifest, 'r+') as manifest: 88 json_contents = manifest.read() 89 manifest.seek(0, 0) 90 manifest.write(comments + json_contents) 91 92def print_indent(msg, offset): 93 for l in msg.splitlines(): 94 msg = ' ' * offset + l 95 print(msg) 96 97 98# Read existing JSON manifest 99with open('python3-manifest.json') as manifest: 100 # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker 101 manifest_str = manifest.read() 102 json_start = manifest_str.find('# EOC') + 6 # EOC + \n 103 manifest.seek(0) 104 comments = manifest.read(json_start) 105 manifest_str = manifest.read() 106 old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict) 107 108# 109# First pass to get core-package functionality, because we base everything on the fact that core is actually working 110# Not exactly the same so it should not be a function 111# 112 113print_indent('Getting dependencies for package: core', 0) 114 115 116# This special call gets the core dependencies and 117# appends to the old manifest so it doesnt hurt what it 118# currently holds. 119# This way when other packages check for dependencies 120# on the new core package, they will still find them 121# even when checking the old_manifest 122 123output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package', '%s' % debugFlag]).decode('utf8') 124for coredep in output.split(): 125 coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}') 126 if isCached(coredep): 127 if coredep not in old_manifest['core']['cached']: 128 old_manifest['core']['cached'].append(coredep) 129 else: 130 if coredep not in old_manifest['core']['files']: 131 old_manifest['core']['files'].append(coredep) 132 133 134# The second step is to loop through the existing files contained in the core package 135# according to the old manifest, identify if they are modules, or some other type 136# of file that we cant import (directories, binaries, configs) in which case we 137# can only assume they were added correctly (manually) so we ignore those and 138# pass them to the manifest directly. 139 140for filedep in old_manifest['core']['files']: 141 if isFolder(filedep): 142 if isCached(filedep): 143 if filedep not in old_manifest['core']['cached']: 144 old_manifest['core']['cached'].append(filedep) 145 else: 146 if filedep not in old_manifest['core']['files']: 147 old_manifest['core']['files'].append(filedep) 148 continue 149 if '${bindir}' in filedep: 150 if filedep not in old_manifest['core']['files']: 151 old_manifest['core']['files'].append(filedep) 152 continue 153 if filedep == '': 154 continue 155 if '${includedir}' in filedep: 156 if filedep not in old_manifest['core']['files']: 157 old_manifest['core']['files'].append(filedep) 158 continue 159 160 # Get actual module name , shouldnt be affected by libdir/bindir, etc. 161 pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0] 162 163 # We now know that were dealing with a python module, so we can import it 164 # and check what its dependencies are. 165 # We launch a separate task for each module for deterministic behavior. 166 # Each module will only import what is necessary for it to work in specific. 167 # The output of each task will contain each module's dependencies 168 169 print_indent('Getting dependencies for module: %s' % pymodule, 2) 170 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8') 171 print_indent('The following dependencies were found for module %s:\n' % pymodule, 4) 172 print_indent(output, 6) 173 174 175 for pymodule_dep in output.split(): 176 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}') 177 178 if isCached(pymodule_dep): 179 if pymodule_dep not in old_manifest['core']['cached']: 180 old_manifest['core']['cached'].append(pymodule_dep) 181 else: 182 if pymodule_dep not in old_manifest['core']['files']: 183 old_manifest['core']['files'].append(pymodule_dep) 184 185 186# At this point we are done with the core package. 187# The old_manifest dictionary is updated only for the core package because 188# all others will use this a base. 189 190 191print('\n\nChecking for directories...\n') 192# To improve the script speed, we check which packages contain directories 193# since we will be looping through (only) those later. 194for pypkg in old_manifest: 195 for filedep in old_manifest[pypkg]['files']: 196 if isFolder(filedep): 197 print_indent('%s is a directory' % filedep, 2) 198 if pypkg not in hasfolders: 199 hasfolders.append(pypkg) 200 if filedep not in allfolders: 201 allfolders.append(filedep) 202 203 204 205# This is the main loop that will handle each package. 206# It works in a similar fashion than the step before, but 207# we will now be updating a new dictionary that will eventually 208# become the new manifest. 209# 210# The following loops though all packages in the manifest, 211# through all files on each of them, and checks whether or not 212# they are modules and can be imported. 213# If they can be imported, then it checks for dependencies for 214# each of them by launching a separate task. 215# The output of that task is then parsed and the manifest is updated 216# accordingly, wether it should add the module on FILES for the current package 217# or if that module already belongs to another package then the current one 218# will RDEPEND on it 219 220for pypkg in old_manifest: 221 # Use an empty dict as data structure to hold data for each package and fill it up 222 new_manifest[pypkg] = collections.OrderedDict() 223 new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary'] 224 new_manifest[pypkg]['rdepends'] = [] 225 new_manifest[pypkg]['files'] = [] 226 new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached'] 227 228 # All packages should depend on core 229 if pypkg != 'core': 230 new_manifest[pypkg]['rdepends'].append('core') 231 new_manifest[pypkg]['cached'] = [] 232 233 print('\n') 234 print('--------------------------') 235 print('Handling package %s' % pypkg) 236 print('--------------------------') 237 238 # Handle special cases, we assume that when they were manually added 239 # to the manifest we knew what we were doing. 240 special_packages = ['misc', 'modules', 'dev', 'tests'] 241 if pypkg in special_packages or 'staticdev' in pypkg: 242 print_indent('Passing %s package directly' % pypkg, 2) 243 new_manifest[pypkg] = old_manifest[pypkg] 244 continue 245 246 for filedep in old_manifest[pypkg]['files']: 247 # We already handled core on the first pass, we can ignore it now 248 if pypkg == 'core': 249 if filedep not in new_manifest[pypkg]['files']: 250 new_manifest[pypkg]['files'].append(filedep) 251 continue 252 253 # Handle/ignore what we cant import 254 if isFolder(filedep): 255 new_manifest[pypkg]['files'].append(filedep) 256 # Asyncio (and others) are both the package and the folder name, we should not skip those... 257 path,mod = os.path.split(filedep) 258 if mod != pypkg: 259 continue 260 if '${bindir}' in filedep: 261 if filedep not in new_manifest[pypkg]['files']: 262 new_manifest[pypkg]['files'].append(filedep) 263 continue 264 if filedep == '': 265 continue 266 if '${includedir}' in filedep: 267 if filedep not in new_manifest[pypkg]['files']: 268 new_manifest[pypkg]['files'].append(filedep) 269 continue 270 271 # Get actual module name , shouldnt be affected by libdir/bindir, etc. 272 # We need to check if the imported module comes from another (e.g. sqlite3.dump) 273 path, pymodule = os.path.split(filedep) 274 path = os.path.basename(path) 275 pymodule = os.path.splitext(os.path.basename(pymodule))[0] 276 277 # If this condition is met, it means we need to import it from another module 278 # or its the folder itself (e.g. unittest) 279 if path == pypkg: 280 if pymodule: 281 pymodule = path + '.' + pymodule 282 else: 283 pymodule = path 284 285 286 287 # We now know that were dealing with a python module, so we can import it 288 # and check what its dependencies are. 289 # We launch a separate task for each module for deterministic behavior. 290 # Each module will only import what is necessary for it to work in specific. 291 # The output of each task will contain each module's dependencies 292 293 print_indent('\nGetting dependencies for module: %s' % pymodule, 2) 294 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule, '%s' % debugFlag]).decode('utf8') 295 print_indent('The following dependencies were found for module %s:\n' % pymodule, 4) 296 print_indent(output, 6) 297 298 reportFILES = [] 299 reportRDEPS = [] 300 301 for pymodule_dep in output.split(): 302 303 # Warning: This first part is ugly 304 # One of the dependencies that was found, could be inside of one of the folders included by another package 305 # We need to check if this happens so we can add the package containing the folder as an rdependency 306 # e.g. Folder encodings contained in codecs 307 # This would be solved if no packages included any folders 308 309 # This can be done in two ways: 310 # 1 - We assume that if we take out the filename from the path we would get 311 # the folder string, then we would check if folder string is in the list of folders 312 # This would not work if a package contains a folder which contains another folder 313 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 314 # folder_string would not match any value contained in the list of folders 315 # 316 # 2 - We do it the other way around, checking if the folder is contained in the path 317 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 318 # is folder_string inside path/folder1/folder2/filename?, 319 # Yes, it works, but we waste a couple of milliseconds. 320 321 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}') 322 inFolders = False 323 for folder in allfolders: 324 # The module could have a directory named after it, e.g. xml, if we take out the filename from the path 325 # we'll end up with ${libdir}, and we want ${libdir}/xml 326 if isFolder(pymodule_dep): 327 check_path = pymodule_dep 328 else: 329 check_path = os.path.dirname(pymodule_dep) 330 if folder in check_path : 331 inFolders = True # Did we find a folder? 332 folderFound = False # Second flag to break inner for 333 # Loop only through packages which contain folders 334 for pypkg_with_folder in hasfolders: 335 if (folderFound == False): 336 # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder)) 337 for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']: 338 if folder_dep == folder: 339 print ('%s directory found in %s' % (folder, pypkg_with_folder)) 340 folderFound = True 341 if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg: 342 new_manifest[pypkg]['rdepends'].append(pypkg_with_folder) 343 else: 344 break 345 346 # A folder was found so we're done with this item, we can go on 347 if inFolders: 348 continue 349 350 351 352 # No directories beyond this point 353 # We might already have this module on the dictionary since it could depend on a (previously checked) module 354 if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']: 355 # Handle core as a special package, we already did it so we pass it to NEW data structure directly 356 if pypkg == 'core': 357 print('Adding %s to %s FILES' % (pymodule_dep, pypkg)) 358 if pymodule_dep.endswith('*'): 359 wildcards.append(pymodule_dep) 360 if isCached(pymodule_dep): 361 new_manifest[pypkg]['cached'].append(pymodule_dep) 362 else: 363 new_manifest[pypkg]['files'].append(pymodule_dep) 364 365 # Check for repeated files 366 if pymodule_dep not in allfiles: 367 allfiles.append(pymodule_dep) 368 else: 369 if pymodule_dep not in repeated: 370 repeated.append(pymodule_dep) 371 else: 372 373 374 # Last step: Figure out if we this belongs to FILES or RDEPENDS 375 # We check if this module is already contained on another package, so we add that one 376 # as an RDEPENDS, or if its not, it means it should be contained on the current 377 # package, and we should add it to FILES 378 for possible_rdep in old_manifest: 379 # Debug 380 # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep) 381 if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']: 382 # Since were nesting, we need to check its not the same pypkg 383 if(possible_rdep != pypkg): 384 if possible_rdep not in new_manifest[pypkg]['rdepends']: 385 # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs 386 reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep)) 387 new_manifest[pypkg]['rdepends'].append(possible_rdep) 388 break 389 else: 390 391 # Since this module wasnt found on another package, it is not an RDEP, 392 # so we add it to FILES for this package. 393 # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files) 394 if os.path.basename(pymodule_dep) != pypkg: 395 reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg))) 396 if isCached(pymodule_dep): 397 new_manifest[pypkg]['cached'].append(pymodule_dep) 398 else: 399 new_manifest[pypkg]['files'].append(pymodule_dep) 400 if pymodule_dep.endswith('*'): 401 wildcards.append(pymodule_dep) 402 if pymodule_dep not in allfiles: 403 allfiles.append(pymodule_dep) 404 else: 405 if pymodule_dep not in repeated: 406 repeated.append(pymodule_dep) 407 408 print('\n') 409 print('#################################') 410 print('Summary for module %s' % pymodule) 411 print('FILES found for module %s:' % pymodule) 412 print(''.join(reportFILES)) 413 print('RDEPENDS found for module %s:' % pymodule) 414 print(''.join(reportRDEPS)) 415 print('#################################') 416 417print('The following FILES contain wildcards, please check if they are necessary') 418print(wildcards) 419print('The following FILES contain folders, please check if they are necessary') 420print(hasfolders) 421 422 423# Sort it just so it looks nicer 424for pypkg in new_manifest: 425 new_manifest[pypkg]['files'].sort() 426 new_manifest[pypkg]['cached'].sort() 427 new_manifest[pypkg]['rdepends'].sort() 428 429# Create the manifest from the data structure that was built 430with open('python3-manifest.json.new','w') as outfile: 431 json.dump(new_manifest,outfile, indent=4) 432 outfile.write('\n') 433 434prepend_comments(comments,'python3-manifest.json.new') 435 436if (repeated): 437 error_msg = '\n\nERROR:\n' 438 error_msg += 'The following files were found in more than one package),\n' 439 error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n' 440 error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n' 441 error_msg += '\n'.join(repeated) 442 error_msg += '\n' 443 sys.exit(error_msg) 444 445