# This script is used as a bitbake task to create a new python manifest
# $ bitbake python -c create_manifest
#
# Our goal is to keep python-core as small as posible and add other python
# packages only when the user needs them, hence why we split upstream python
# into several packages.
#
# In a very simplistic way what this does is: 
# Launch python and see specifically what is required for it to run at a minimum
#
# Go through the python-manifest file and launch a separate task for every single
# one of the files on each package, this task will check what was required for that
# specific module to run, these modules will be called dependencies.
# The output of such task will be a list of the modules or dependencies that were
# found for that file.
#
# Such output will be parsed by this script, we will look for each dependency on the
# manifest and if we find that another package already includes it, then we will add
# that package as an RDEPENDS to the package we are currently checking; in case we dont
# find the current dependency on any other package we will add it to the current package
# as part of FILES.
#
#
# This way we will create a new manifest from the data structure that was built during
# this process, on this new manifest each package will contain specifically only
# what it needs to run.
#
# There are some caveats which we try to deal with, such as repeated files on different
# packages, packages that include folders, wildcards, and special packages.
# Its also important to note that this method only works for python files, and shared
# libraries. Static libraries, header files and binaries need to be dealt with manually.
#
# This script differs from its python2 version mostly on how shared libraries are handled
# The manifest file for python3 has an extra field which contains the cached files for
# each package.
# Tha method to handle cached files does not work when a module includes a folder which
# itself contains the pycache folder, gladly this is almost never the case.
#
# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29 at gmail dot com>


import sys
import subprocess
import json
import os
import collections

# Get python version from ${PYTHON_MAJMIN}
pyversion = str(sys.argv[1])

# Hack to get native python search path (for folders), not fond of it but it works for now
pivot = 'recipe-sysroot-native'
for p in sys.path:
    if pivot in p:
        nativelibfolder = p[:p.find(pivot)+len(pivot)]

# Empty dict to hold the whole manifest
new_manifest = collections.OrderedDict()

# Check for repeated files, folders and wildcards
allfiles = []
repeated = []
wildcards = []

hasfolders = []
allfolders = []

def isFolder(value):
    value = value.replace('${PYTHON_MAJMIN}',pyversion)
    if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
        return True
    else:
        return False

def isCached(item):
    if '__pycache__' in item:
        return True
    else:
        return False

def prepend_comments(comments, json_manifest):
    with open(json_manifest, 'r+') as manifest:
        json_contents = manifest.read()
        manifest.seek(0, 0)
        manifest.write(comments + json_contents)

# Read existing JSON manifest
with open('python3-manifest.json') as manifest:
    # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker
    manifest_str =  manifest.read()
    json_start = manifest_str.find('# EOC') + 6 # EOC + \n
    manifest.seek(0)
    comments = manifest.read(json_start)
    manifest_str = manifest.read()
    old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict)

#
# First pass to get core-package functionality, because we base everything on the fact that core is actually working
# Not exactly the same so it should not be a function
#

print ('Getting dependencies for package: core')


# This special call gets the core dependencies and
# appends to the old manifest so it doesnt hurt what it
# currently holds.
# This way when other packages check for dependencies
# on the new core package, they will still find them
# even when checking the old_manifest

output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
for coredep in output.split():
    coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}')
    if isCached(coredep):
        if coredep not in old_manifest['core']['cached']:
            old_manifest['core']['cached'].append(coredep)
    else:
        if coredep not in old_manifest['core']['files']:
            old_manifest['core']['files'].append(coredep)


# The second step is to loop through the existing files contained in the core package
# according to the old manifest, identify if they are  modules, or some other type 
# of file that we cant import (directories, binaries, configs) in which case we
# can only assume they were added correctly (manually) so we ignore those and 
# pass them to the manifest directly.

for filedep in old_manifest['core']['files']:
    if isFolder(filedep):
        if isCached(filedep):
            if filedep not in old_manifest['core']['cached']:
                old_manifest['core']['cached'].append(filedep)
        else:
            if filedep not in old_manifest['core']['files']:
                old_manifest['core']['files'].append(filedep)
        continue
    if '${bindir}' in filedep:
        if filedep not in old_manifest['core']['files']:
            old_manifest['core']['files'].append(filedep)
        continue
    if filedep == '':
        continue
    if '${includedir}' in filedep:
        if filedep not in old_manifest['core']['files']:
            old_manifest['core']['files'].append(filedep)
        continue

    # Get actual module name , shouldnt be affected by libdir/bindir, etc.
    pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0]


    # We now know that were dealing with a python module, so we can import it
    # and check what its dependencies are.
    # We launch a separate task for each module for deterministic behavior.
    # Each module will only import what is necessary for it to work in specific.
    # The output of each task will contain each module's dependencies

    print ('Getting dependencies for module: %s' % pymodule)
    output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
    print ('The following dependencies were found for module %s:\n' % pymodule)
    print (output)


    for pymodule_dep in output.split():
        pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')

        if isCached(pymodule_dep):
            if pymodule_dep not in old_manifest['core']['cached']:
                old_manifest['core']['cached'].append(pymodule_dep)
        else:
            if pymodule_dep not in old_manifest['core']['files']:
                old_manifest['core']['files'].append(pymodule_dep)


# At this point we are done with the core package.
# The old_manifest dictionary is updated only for the core package because
# all others will use this a base.


# To improve the script speed, we check which packages contain directories
# since we will be looping through (only) those later.
for pypkg in old_manifest:
    for filedep in old_manifest[pypkg]['files']:
        if isFolder(filedep):
            print ('%s is a folder' % filedep)
            if pypkg not in hasfolders:
                hasfolders.append(pypkg)
            if filedep not in allfolders:
                allfolders.append(filedep)



# This is the main loop that will handle each package.
# It works in a similar fashion than the step before, but
# we will now be updating a new dictionary that will eventually
# become the new manifest.
#
# The following loops though all packages in the manifest,
# through all files on each of them, and checks whether or not
# they are modules and can be imported.
# If they can be imported, then it checks for dependencies for
# each of them by launching a separate task.
# The output of that task is then parsed and the manifest is updated
# accordingly, wether it should add the module on FILES for the current package
# or if that module already belongs to another package then the current one 
# will RDEPEND on it

for pypkg in old_manifest:
    # Use an empty dict as data structure to hold data for each package and fill it up
    new_manifest[pypkg] = collections.OrderedDict()
    new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary']
    new_manifest[pypkg]['rdepends'] = []
    new_manifest[pypkg]['files'] = []
    new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached']

    # All packages should depend on core
    if pypkg != 'core':
        new_manifest[pypkg]['rdepends'].append('core')
        new_manifest[pypkg]['cached'] = []

    print('\n')
    print('--------------------------')
    print ('Handling package %s' % pypkg)
    print('--------------------------')

    # Handle special cases, we assume that when they were manually added 
    # to the manifest we knew what we were doing.
    special_packages = ['misc', 'modules', 'dev', 'tests']
    if pypkg in special_packages or 'staticdev' in pypkg:
        print('Passing %s package directly' % pypkg)
        new_manifest[pypkg] = old_manifest[pypkg]
        continue

    for filedep in old_manifest[pypkg]['files']:
        # We already handled core on the first pass, we can ignore it now
        if pypkg == 'core':
            if filedep not in new_manifest[pypkg]['files']:
                new_manifest[pypkg]['files'].append(filedep)
            continue

        # Handle/ignore what we cant import
        if isFolder(filedep):
            new_manifest[pypkg]['files'].append(filedep)
            # Asyncio (and others) are both the package and the folder name, we should not skip those...
            path,mod = os.path.split(filedep)
            if mod != pypkg:
                continue
        if '${bindir}' in filedep:
            if filedep not in new_manifest[pypkg]['files']:
                new_manifest[pypkg]['files'].append(filedep)
            continue
        if filedep == '':
            continue
        if '${includedir}' in filedep:
            if filedep not in new_manifest[pypkg]['files']:
                new_manifest[pypkg]['files'].append(filedep)
            continue

        # Get actual module name , shouldnt be affected by libdir/bindir, etc.
        # We need to check if the imported module comes from another (e.g. sqlite3.dump)
        path,pymodule = os.path.split(filedep)
        path = os.path.basename(path)
        pymodule = os.path.splitext(os.path.basename(pymodule))[0]

        # If this condition is met, it means we need to import it from another module
        # or its the folder itself (e.g. unittest)
        if path == pypkg:
            if pymodule:
                pymodule = path + '.' + pymodule
            else:
                pymodule = path



        # We now know that were dealing with a python module, so we can import it
        # and check what its dependencies are.
        # We launch a separate task for each module for deterministic behavior.
        # Each module will only import what is necessary for it to work in specific.
        # The output of each task will contain each module's dependencies

        print ('\nGetting dependencies for module: %s' % pymodule)
        output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
        print ('The following dependencies were found for module %s:\n' % pymodule)
        print (output)

        reportFILES = []
        reportRDEPS = []

        for pymodule_dep in output.split():

            # Warning: This first part is ugly
            # One of the dependencies that was found, could be inside of one of the folders included by another package
            # We need to check if this happens so we can add the package containing the folder as an rdependency
            # e.g. Folder encodings contained in codecs
            # This would be solved if no packages included any folders

            # This can be done in two ways:
            # 1 - We assume that if we take out the filename from the path we would get
            #   the folder string, then we would check if folder string is in the list of folders
            #   This would not work if a package contains a folder which contains another folder
            #   e.g. path/folder1/folder2/filename  folder_string= path/folder1/folder2
            #   folder_string would not match any value contained in the list of folders
            #
            # 2 - We do it the other way around, checking if the folder is contained in the path
            #   e.g. path/folder1/folder2/filename  folder_string= path/folder1/folder2
            #   is folder_string inside path/folder1/folder2/filename?, 
            #   Yes, it works, but we waste a couple of milliseconds.

            pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
            inFolders = False
            for folder in allfolders:
                # The module could have a directory named after it, e.g. xml, if we take out the filename from the path
                # we'll end up with ${libdir}, and we want ${libdir}/xml
                if isFolder(pymodule_dep):
                    check_path = pymodule_dep
                else:
                    check_path = os.path.dirname(pymodule_dep)
                if folder in check_path :
                    inFolders = True # Did we find a folder?
                    folderFound = False # Second flag to break inner for
                    # Loop only through packages which contain folders
                    for pypkg_with_folder in hasfolders:
                        if (folderFound == False):
                            # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder))
                            for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']:
                                if folder_dep == folder:
                                    print ('%s folder found in %s' % (folder, pypkg_with_folder))
                                    folderFound = True
                                    if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg:
                                        new_manifest[pypkg]['rdepends'].append(pypkg_with_folder)
                        else:
                            break

            # A folder was found so we're done with this item, we can go on
            if inFolders:
                continue



            # No directories beyond this point
            # We might already have this module on the dictionary since it could depend on a (previously checked) module
            if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']:
                # Handle core as a special package, we already did it so we pass it to NEW data structure directly
                if pypkg == 'core':
                    print('Adding %s to %s FILES' % (pymodule_dep, pypkg))
                    if pymodule_dep.endswith('*'):
                        wildcards.append(pymodule_dep)
                    if isCached(pymodule_dep):
                        new_manifest[pypkg]['cached'].append(pymodule_dep)
                    else:
                        new_manifest[pypkg]['files'].append(pymodule_dep)

                    # Check for repeated files
                    if pymodule_dep not in allfiles:
                        allfiles.append(pymodule_dep)
                    else:
                        if pymodule_dep not in repeated:
                            repeated.append(pymodule_dep)
                else:


                    # Last step: Figure out if we this belongs to FILES or RDEPENDS
                    # We check if this module is already contained on another package, so we add that one
                    # as an RDEPENDS, or if its not, it means it should be contained on the current
                    # package, and we should add it to FILES
                    for possible_rdep in old_manifest:
                        # Debug
                        # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep)
                        if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']:
                            # Since were nesting, we need to check its not the same pypkg
                            if(possible_rdep != pypkg):
                                if possible_rdep not in new_manifest[pypkg]['rdepends']:
                                    # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs
                                    reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep))
                                    new_manifest[pypkg]['rdepends'].append(possible_rdep)
                                break
                    else:

                      # Since this module wasnt found on another package, it is not an RDEP,
                      # so we add it to FILES for this package.
                      # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
                      if os.path.basename(pymodule_dep) != pypkg:
                        reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg)))
                        if isCached(pymodule_dep):
                            new_manifest[pypkg]['cached'].append(pymodule_dep)
                        else:
                            new_manifest[pypkg]['files'].append(pymodule_dep)
                        if pymodule_dep.endswith('*'):
                            wildcards.append(pymodule_dep)
                        if pymodule_dep not in allfiles:
                            allfiles.append(pymodule_dep)
                        else:
                            if pymodule_dep not in repeated:
                                repeated.append(pymodule_dep)

        print('\n')
        print('#################################')
        print('Summary for module %s' % pymodule)
        print('FILES found for module %s:' % pymodule)
        print(''.join(reportFILES))
        print('RDEPENDS found for module %s:' % pymodule)
        print(''.join(reportRDEPS))
        print('#################################')

print('The following FILES contain wildcards, please check if they are necessary')
print(wildcards)
print('The following FILES contain folders, please check if they are necessary')
print(hasfolders)


# Sort it just so it looks nicer
for pypkg in new_manifest:
    new_manifest[pypkg]['files'].sort()
    new_manifest[pypkg]['cached'].sort()
    new_manifest[pypkg]['rdepends'].sort()

# Create the manifest from the data structure that was built
with open('python3-manifest.json.new','w') as outfile:
    json.dump(new_manifest,outfile, indent=4)
    outfile.write('\n')

prepend_comments(comments,'python3-manifest.json.new')

if (repeated):
    error_msg = '\n\nERROR:\n'
    error_msg += 'The following files are repeated (contained in more than one package),\n'
    error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n'
    error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n'
    error_msg += '\n'.join(repeated)
    error_msg += '\n'
    sys.exit(error_msg)