1# This script is used as a bitbake task to create a new python manifest 2# $ bitbake python -c create_manifest 3# 4# Our goal is to keep python-core as small as posible and add other python 5# packages only when the user needs them, hence why we split upstream python 6# into several packages. 7# 8# In a very simplistic way what this does is: 9# Launch python and see specifically what is required for it to run at a minimum 10# 11# Go through the python-manifest file and launch a separate task for every single 12# one of the files on each package, this task will check what was required for that 13# specific module to run, these modules will be called dependencies. 14# The output of such task will be a list of the modules or dependencies that were 15# found for that file. 16# 17# Such output will be parsed by this script, we will look for each dependency on the 18# manifest and if we find that another package already includes it, then we will add 19# that package as an RDEPENDS to the package we are currently checking; in case we dont 20# find the current dependency on any other package we will add it to the current package 21# as part of FILES. 22# 23# 24# This way we will create a new manifest from the data structure that was built during 25# this process, on this new manifest each package will contain specifically only 26# what it needs to run. 27# 28# There are some caveats which we try to deal with, such as repeated files on different 29# packages, packages that include folders, wildcards, and special packages. 30# Its also important to note that this method only works for python files, and shared 31# libraries. Static libraries, header files and binaries need to be dealt with manually. 32# 33# This script differs from its python2 version mostly on how shared libraries are handled 34# The manifest file for python3 has an extra field which contains the cached files for 35# each package. 36# Tha method to handle cached files does not work when a module includes a folder which 37# itself contains the pycache folder, gladly this is almost never the case. 38# 39# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29 at gmail dot com> 40 41 42import sys 43import subprocess 44import json 45import os 46import collections 47 48# Get python version from ${PYTHON_MAJMIN} 49pyversion = str(sys.argv[1]) 50 51# Hack to get native python search path (for folders), not fond of it but it works for now 52pivot = 'recipe-sysroot-native' 53for p in sys.path: 54 if pivot in p: 55 nativelibfolder = p[:p.find(pivot)+len(pivot)] 56 57# Empty dict to hold the whole manifest 58new_manifest = collections.OrderedDict() 59 60# Check for repeated files, folders and wildcards 61allfiles = [] 62repeated = [] 63wildcards = [] 64 65hasfolders = [] 66allfolders = [] 67 68def isFolder(value): 69 value = value.replace('${PYTHON_MAJMIN}',pyversion) 70 if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')): 71 return True 72 else: 73 return False 74 75def isCached(item): 76 if '__pycache__' in item: 77 return True 78 else: 79 return False 80 81def prepend_comments(comments, json_manifest): 82 with open(json_manifest, 'r+') as manifest: 83 json_contents = manifest.read() 84 manifest.seek(0, 0) 85 manifest.write(comments + json_contents) 86 87# Read existing JSON manifest 88with open('python3-manifest.json') as manifest: 89 # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker 90 manifest_str = manifest.read() 91 json_start = manifest_str.find('# EOC') + 6 # EOC + \n 92 manifest.seek(0) 93 comments = manifest.read(json_start) 94 manifest_str = manifest.read() 95 old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict) 96 97# 98# First pass to get core-package functionality, because we base everything on the fact that core is actually working 99# Not exactly the same so it should not be a function 100# 101 102print ('Getting dependencies for package: core') 103 104 105# This special call gets the core dependencies and 106# appends to the old manifest so it doesnt hurt what it 107# currently holds. 108# This way when other packages check for dependencies 109# on the new core package, they will still find them 110# even when checking the old_manifest 111 112output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8') 113for coredep in output.split(): 114 coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}') 115 if isCached(coredep): 116 if coredep not in old_manifest['core']['cached']: 117 old_manifest['core']['cached'].append(coredep) 118 else: 119 if coredep not in old_manifest['core']['files']: 120 old_manifest['core']['files'].append(coredep) 121 122 123# The second step is to loop through the existing files contained in the core package 124# according to the old manifest, identify if they are modules, or some other type 125# of file that we cant import (directories, binaries, configs) in which case we 126# can only assume they were added correctly (manually) so we ignore those and 127# pass them to the manifest directly. 128 129for filedep in old_manifest['core']['files']: 130 if isFolder(filedep): 131 if isCached(filedep): 132 if filedep not in old_manifest['core']['cached']: 133 old_manifest['core']['cached'].append(filedep) 134 else: 135 if filedep not in old_manifest['core']['files']: 136 old_manifest['core']['files'].append(filedep) 137 continue 138 if '${bindir}' in filedep: 139 if filedep not in old_manifest['core']['files']: 140 old_manifest['core']['files'].append(filedep) 141 continue 142 if filedep == '': 143 continue 144 if '${includedir}' in filedep: 145 if filedep not in old_manifest['core']['files']: 146 old_manifest['core']['files'].append(filedep) 147 continue 148 149 # Get actual module name , shouldnt be affected by libdir/bindir, etc. 150 pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0] 151 152 153 # We now know that were dealing with a python module, so we can import it 154 # and check what its dependencies are. 155 # We launch a separate task for each module for deterministic behavior. 156 # Each module will only import what is necessary for it to work in specific. 157 # The output of each task will contain each module's dependencies 158 159 print ('Getting dependencies for module: %s' % pymodule) 160 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8') 161 print ('The following dependencies were found for module %s:\n' % pymodule) 162 print (output) 163 164 165 for pymodule_dep in output.split(): 166 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}') 167 168 if isCached(pymodule_dep): 169 if pymodule_dep not in old_manifest['core']['cached']: 170 old_manifest['core']['cached'].append(pymodule_dep) 171 else: 172 if pymodule_dep not in old_manifest['core']['files']: 173 old_manifest['core']['files'].append(pymodule_dep) 174 175 176# At this point we are done with the core package. 177# The old_manifest dictionary is updated only for the core package because 178# all others will use this a base. 179 180 181# To improve the script speed, we check which packages contain directories 182# since we will be looping through (only) those later. 183for pypkg in old_manifest: 184 for filedep in old_manifest[pypkg]['files']: 185 if isFolder(filedep): 186 print ('%s is a folder' % filedep) 187 if pypkg not in hasfolders: 188 hasfolders.append(pypkg) 189 if filedep not in allfolders: 190 allfolders.append(filedep) 191 192 193 194# This is the main loop that will handle each package. 195# It works in a similar fashion than the step before, but 196# we will now be updating a new dictionary that will eventually 197# become the new manifest. 198# 199# The following loops though all packages in the manifest, 200# through all files on each of them, and checks whether or not 201# they are modules and can be imported. 202# If they can be imported, then it checks for dependencies for 203# each of them by launching a separate task. 204# The output of that task is then parsed and the manifest is updated 205# accordingly, wether it should add the module on FILES for the current package 206# or if that module already belongs to another package then the current one 207# will RDEPEND on it 208 209for pypkg in old_manifest: 210 # Use an empty dict as data structure to hold data for each package and fill it up 211 new_manifest[pypkg] = collections.OrderedDict() 212 new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary'] 213 new_manifest[pypkg]['rdepends'] = [] 214 new_manifest[pypkg]['files'] = [] 215 new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached'] 216 217 # All packages should depend on core 218 if pypkg != 'core': 219 new_manifest[pypkg]['rdepends'].append('core') 220 new_manifest[pypkg]['cached'] = [] 221 222 print('\n') 223 print('--------------------------') 224 print ('Handling package %s' % pypkg) 225 print('--------------------------') 226 227 # Handle special cases, we assume that when they were manually added 228 # to the manifest we knew what we were doing. 229 special_packages = ['misc', 'modules', 'dev', 'tests'] 230 if pypkg in special_packages or 'staticdev' in pypkg: 231 print('Passing %s package directly' % pypkg) 232 new_manifest[pypkg] = old_manifest[pypkg] 233 continue 234 235 for filedep in old_manifest[pypkg]['files']: 236 # We already handled core on the first pass, we can ignore it now 237 if pypkg == 'core': 238 if filedep not in new_manifest[pypkg]['files']: 239 new_manifest[pypkg]['files'].append(filedep) 240 continue 241 242 # Handle/ignore what we cant import 243 if isFolder(filedep): 244 new_manifest[pypkg]['files'].append(filedep) 245 # Asyncio (and others) are both the package and the folder name, we should not skip those... 246 path,mod = os.path.split(filedep) 247 if mod != pypkg: 248 continue 249 if '${bindir}' in filedep: 250 if filedep not in new_manifest[pypkg]['files']: 251 new_manifest[pypkg]['files'].append(filedep) 252 continue 253 if filedep == '': 254 continue 255 if '${includedir}' in filedep: 256 if filedep not in new_manifest[pypkg]['files']: 257 new_manifest[pypkg]['files'].append(filedep) 258 continue 259 260 # Get actual module name , shouldnt be affected by libdir/bindir, etc. 261 # We need to check if the imported module comes from another (e.g. sqlite3.dump) 262 path,pymodule = os.path.split(filedep) 263 path = os.path.basename(path) 264 pymodule = os.path.splitext(os.path.basename(pymodule))[0] 265 266 # If this condition is met, it means we need to import it from another module 267 # or its the folder itself (e.g. unittest) 268 if path == pypkg: 269 if pymodule: 270 pymodule = path + '.' + pymodule 271 else: 272 pymodule = path 273 274 275 276 # We now know that were dealing with a python module, so we can import it 277 # and check what its dependencies are. 278 # We launch a separate task for each module for deterministic behavior. 279 # Each module will only import what is necessary for it to work in specific. 280 # The output of each task will contain each module's dependencies 281 282 print ('\nGetting dependencies for module: %s' % pymodule) 283 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8') 284 print ('The following dependencies were found for module %s:\n' % pymodule) 285 print (output) 286 287 reportFILES = [] 288 reportRDEPS = [] 289 290 for pymodule_dep in output.split(): 291 292 # Warning: This first part is ugly 293 # One of the dependencies that was found, could be inside of one of the folders included by another package 294 # We need to check if this happens so we can add the package containing the folder as an rdependency 295 # e.g. Folder encodings contained in codecs 296 # This would be solved if no packages included any folders 297 298 # This can be done in two ways: 299 # 1 - We assume that if we take out the filename from the path we would get 300 # the folder string, then we would check if folder string is in the list of folders 301 # This would not work if a package contains a folder which contains another folder 302 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 303 # folder_string would not match any value contained in the list of folders 304 # 305 # 2 - We do it the other way around, checking if the folder is contained in the path 306 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 307 # is folder_string inside path/folder1/folder2/filename?, 308 # Yes, it works, but we waste a couple of milliseconds. 309 310 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}') 311 inFolders = False 312 for folder in allfolders: 313 # The module could have a directory named after it, e.g. xml, if we take out the filename from the path 314 # we'll end up with ${libdir}, and we want ${libdir}/xml 315 if isFolder(pymodule_dep): 316 check_path = pymodule_dep 317 else: 318 check_path = os.path.dirname(pymodule_dep) 319 if folder in check_path : 320 inFolders = True # Did we find a folder? 321 folderFound = False # Second flag to break inner for 322 # Loop only through packages which contain folders 323 for pypkg_with_folder in hasfolders: 324 if (folderFound == False): 325 # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder)) 326 for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']: 327 if folder_dep == folder: 328 print ('%s folder found in %s' % (folder, pypkg_with_folder)) 329 folderFound = True 330 if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg: 331 new_manifest[pypkg]['rdepends'].append(pypkg_with_folder) 332 else: 333 break 334 335 # A folder was found so we're done with this item, we can go on 336 if inFolders: 337 continue 338 339 340 341 # No directories beyond this point 342 # We might already have this module on the dictionary since it could depend on a (previously checked) module 343 if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']: 344 # Handle core as a special package, we already did it so we pass it to NEW data structure directly 345 if pypkg == 'core': 346 print('Adding %s to %s FILES' % (pymodule_dep, pypkg)) 347 if pymodule_dep.endswith('*'): 348 wildcards.append(pymodule_dep) 349 if isCached(pymodule_dep): 350 new_manifest[pypkg]['cached'].append(pymodule_dep) 351 else: 352 new_manifest[pypkg]['files'].append(pymodule_dep) 353 354 # Check for repeated files 355 if pymodule_dep not in allfiles: 356 allfiles.append(pymodule_dep) 357 else: 358 if pymodule_dep not in repeated: 359 repeated.append(pymodule_dep) 360 else: 361 362 363 # Last step: Figure out if we this belongs to FILES or RDEPENDS 364 # We check if this module is already contained on another package, so we add that one 365 # as an RDEPENDS, or if its not, it means it should be contained on the current 366 # package, and we should add it to FILES 367 for possible_rdep in old_manifest: 368 # Debug 369 # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep) 370 if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']: 371 # Since were nesting, we need to check its not the same pypkg 372 if(possible_rdep != pypkg): 373 if possible_rdep not in new_manifest[pypkg]['rdepends']: 374 # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs 375 reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep)) 376 new_manifest[pypkg]['rdepends'].append(possible_rdep) 377 break 378 else: 379 380 # Since this module wasnt found on another package, it is not an RDEP, 381 # so we add it to FILES for this package. 382 # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files) 383 if os.path.basename(pymodule_dep) != pypkg: 384 reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg))) 385 if isCached(pymodule_dep): 386 new_manifest[pypkg]['cached'].append(pymodule_dep) 387 else: 388 new_manifest[pypkg]['files'].append(pymodule_dep) 389 if pymodule_dep.endswith('*'): 390 wildcards.append(pymodule_dep) 391 if pymodule_dep not in allfiles: 392 allfiles.append(pymodule_dep) 393 else: 394 if pymodule_dep not in repeated: 395 repeated.append(pymodule_dep) 396 397 print('\n') 398 print('#################################') 399 print('Summary for module %s' % pymodule) 400 print('FILES found for module %s:' % pymodule) 401 print(''.join(reportFILES)) 402 print('RDEPENDS found for module %s:' % pymodule) 403 print(''.join(reportRDEPS)) 404 print('#################################') 405 406print('The following FILES contain wildcards, please check if they are necessary') 407print(wildcards) 408print('The following FILES contain folders, please check if they are necessary') 409print(hasfolders) 410 411 412# Sort it just so it looks nicer 413for pypkg in new_manifest: 414 new_manifest[pypkg]['files'].sort() 415 new_manifest[pypkg]['cached'].sort() 416 new_manifest[pypkg]['rdepends'].sort() 417 418# Create the manifest from the data structure that was built 419with open('python3-manifest.json.new','w') as outfile: 420 json.dump(new_manifest,outfile, indent=4) 421 outfile.write('\n') 422 423prepend_comments(comments,'python3-manifest.json.new') 424 425if (repeated): 426 error_msg = '\n\nERROR:\n' 427 error_msg += 'The following files are repeated (contained in more than one package),\n' 428 error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n' 429 error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n' 430 error_msg += '\n'.join(repeated) 431 error_msg += '\n' 432 sys.exit(error_msg) 433 434