Skip to content

Commit

Permalink
Merge pull request #437 from jherland/egg-metadata-work
Browse files Browse the repository at this point in the history
Fix .files and inferred packages_distributions for .egg-info packages
  • Loading branch information
jaraco committed Apr 10, 2023
2 parents 5811d73 + b8a8b5d commit 3112653
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 17 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v6.3.0
======

* #115: Support ``installed-files.txt`` for ``Distribution.files``
when present.

v6.2.1
======

Expand Down
58 changes: 51 additions & 7 deletions importlib_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import functools
import itertools
import posixpath
import contextlib
import collections
import inspect

Expand Down Expand Up @@ -461,8 +462,8 @@ def files(self):
:return: List of PackagePath for this distribution or None
Result is `None` if the metadata file that enumerates files
(i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
missing.
(i.e. RECORD for dist-info, or installed-files.txt or
SOURCES.txt for egg-info) is missing.
Result may be empty if the metadata exists but is empty.
"""

Expand All @@ -475,9 +476,19 @@ def make_file(name, hash=None, size_str=None):

@pass_none
def make_files(lines):
return list(starmap(make_file, csv.reader(lines)))
return starmap(make_file, csv.reader(lines))

return make_files(self._read_files_distinfo() or self._read_files_egginfo())
@pass_none
def skip_missing_files(package_paths):
return list(filter(lambda path: path.locate().exists(), package_paths))

return skip_missing_files(
make_files(
self._read_files_distinfo()
or self._read_files_egginfo_installed()
or self._read_files_egginfo_sources()
)
)

def _read_files_distinfo(self):
"""
Expand All @@ -486,10 +497,43 @@ def _read_files_distinfo(self):
text = self.read_text('RECORD')
return text and text.splitlines()

def _read_files_egginfo(self):
def _read_files_egginfo_installed(self):
"""
SOURCES.txt might contain literal commas, so wrap each line
in quotes.
Read installed-files.txt and return lines in a similar
CSV-parsable format as RECORD: each file must be placed
relative to the site-packages directory, and must also be
quoted (since file names can contain literal commas).
This file is written when the package is installed by pip,
but it might not be written for other installation methods.
Hence, even if we can assume that this file is accurate
when it exists, we cannot assume that it always exists.
"""
text = self.read_text('installed-files.txt')
# We need to prepend the .egg-info/ subdir to the lines in this file.
# But this subdir is only available in the PathDistribution's self._path
# which is not easily accessible from this base class...
subdir = getattr(self, '_path', None)
if not text or not subdir:
return
with contextlib.suppress(Exception):
ret = [
str((subdir / line).resolve().relative_to(self.locate_file('')))
for line in text.splitlines()
]
return map('"{}"'.format, ret)

def _read_files_egginfo_sources(self):
"""
Read SOURCES.txt and return lines in a similar CSV-parsable
format as RECORD: each file name must be quoted (since it
might contain literal commas).
Note that SOURCES.txt is not a reliable source for what
files are installed by a package. This file is generated
for a source archive, and the files that are present
there (e.g. setup.py) may not correctly reflect the files
that are present after the package has been installed.
"""
text = self.read_text('SOURCES.txt')
return text and map('"{}"'.format, text.splitlines())
Expand Down
97 changes: 95 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ def setUp(self):


# Except for python/mypy#731, prefer to define
# FilesDef = Dict[str, Union['FilesDef', str]]
FilesDef = Dict[str, Union[Dict[str, Union[Dict[str, str], str]], str]]
# FilesDef = Dict[str, Union['FilesDef', str, bytes]]
FilesDef = Dict[
str, Union[Dict[str, Union[Dict[str, Union[str, bytes]], str, bytes]], str, bytes]
]


class DistInfoPkg(OnSysPath, SiteDir):
Expand Down Expand Up @@ -214,6 +216,97 @@ def setUp(self):
build_files(EggInfoPkg.files, prefix=self.site_dir)


class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteDir):
files: FilesDef = {
"egg_with_module_pkg.egg-info": {
"PKG-INFO": "Name: egg_with_module-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
egg_with_module.py
setup.py
egg_with_module_pkg.egg-info/PKG-INFO
egg_with_module_pkg.egg-info/SOURCES.txt
egg_with_module_pkg.egg-info/top_level.txt
""",
# installed-files.txt is written by pip, and is a strictly more
# accurate source than SOURCES.txt as to the installed contents of
# the package.
"installed-files.txt": """
../egg_with_module.py
PKG-INFO
SOURCES.txt
top_level.txt
""",
# missing top_level.txt (to trigger fallback to installed-files.txt)
},
"egg_with_module.py": """
def main():
print("hello world")
""",
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgPipInstalledNoToplevel.files, prefix=self.site_dir)


class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteDir):
files: FilesDef = {
"egg_with_no_modules_pkg.egg-info": {
"PKG-INFO": "Name: egg_with_no_modules-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
setup.py
egg_with_no_modules_pkg.egg-info/PKG-INFO
egg_with_no_modules_pkg.egg-info/SOURCES.txt
egg_with_no_modules_pkg.egg-info/top_level.txt
""",
# installed-files.txt is written by pip, and is a strictly more
# accurate source than SOURCES.txt as to the installed contents of
# the package.
"installed-files.txt": """
PKG-INFO
SOURCES.txt
top_level.txt
""",
# top_level.txt correctly reflects that no modules are installed
"top_level.txt": b"\n",
},
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgPipInstalledNoModules.files, prefix=self.site_dir)


class EggInfoPkgSourcesFallback(OnSysPath, SiteDir):
files: FilesDef = {
"sources_fallback_pkg.egg-info": {
"PKG-INFO": "Name: sources_fallback-pkg",
# SOURCES.txt is made from the source archive, and contains files
# (setup.py) that are not present after installation.
"SOURCES.txt": """
sources_fallback.py
setup.py
sources_fallback_pkg.egg-info/PKG-INFO
sources_fallback_pkg.egg-info/SOURCES.txt
""",
# missing installed-files.txt (i.e. not installed by pip) and
# missing top_level.txt (to trigger fallback to SOURCES.txt)
},
"sources_fallback.py": """
def main():
print("hello world")
""",
}

def setUp(self):
super().setUp()
build_files(EggInfoPkgSourcesFallback.files, prefix=self.site_dir)


class EggInfoFile(OnSysPath, SiteDir):
files: FilesDef = {
"egginfo_file.egg-info": """
Expand Down
33 changes: 26 additions & 7 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def suppress_known_deprecation():

class APITests(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
fixtures.DistInfoPkg,
fixtures.DistInfoPkgWithDot,
fixtures.EggInfoFile,
Expand Down Expand Up @@ -62,15 +65,28 @@ def test_prefix_not_matched(self):
distribution(prefix)

def test_for_top_level(self):
self.assertEqual(
distribution('egginfo-pkg').read_text('top_level.txt').strip(), 'mod'
)
tests = [
('egginfo-pkg', 'mod'),
('egg_with_no_modules-pkg', ''),
]
for pkg_name, expect_content in tests:
with self.subTest(pkg_name):
self.assertEqual(
distribution(pkg_name).read_text('top_level.txt').strip(),
expect_content,
)

def test_read_text(self):
top_level = [
path for path in files('egginfo-pkg') if path.name == 'top_level.txt'
][0]
self.assertEqual(top_level.read_text(), 'mod\n')
tests = [
('egginfo-pkg', 'mod\n'),
('egg_with_no_modules-pkg', '\n'),
]
for pkg_name, expect_content in tests:
with self.subTest(pkg_name):
top_level = [
path for path in files(pkg_name) if path.name == 'top_level.txt'
][0]
self.assertEqual(top_level.read_text(), expect_content)

def test_entry_points(self):
eps = entry_points()
Expand Down Expand Up @@ -184,6 +200,9 @@ def test_files_dist_info(self):

def test_files_egg_info(self):
self._test_files(files('egginfo-pkg'))
self._test_files(files('egg_with_module-pkg'))
self._test_files(files('egg_with_no_modules-pkg'))
self._test_files(files('sources_fallback-pkg'))

def test_version_egg_info_file(self):
self.assertEqual(version('egginfo-file'), '0.1')
Expand Down
49 changes: 48 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,21 @@ def test_metadata_loads_egg_info(self):
assert meta['Description'] == 'pôrˈtend'


class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):
class DiscoveryTests(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
fixtures.DistInfoPkg,
unittest.TestCase,
):
def test_package_discovery(self):
dists = list(distributions())
assert all(isinstance(dist, Distribution) for dist in dists)
assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists)
assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists)

def test_invalid_usage(self):
Expand Down Expand Up @@ -362,3 +372,40 @@ def test_packages_distributions_all_module_types(self):
assert distributions[f'in_package_{i}'] == ['all_distributions']

assert not any(name.endswith('.dist-info') for name in distributions)


class PackagesDistributionsEggTest(
fixtures.EggInfoPkg,
fixtures.EggInfoPkgPipInstalledNoToplevel,
fixtures.EggInfoPkgPipInstalledNoModules,
fixtures.EggInfoPkgSourcesFallback,
unittest.TestCase,
):
def test_packages_distributions_on_eggs(self):
"""
Test old-style egg packages with a variation of 'top_level.txt',
'SOURCES.txt', and 'installed-files.txt', available.
"""
distributions = packages_distributions()

def import_names_from_package(package_name):
return {
import_name
for import_name, package_names in distributions.items()
if package_name in package_names
}

# egginfo-pkg declares one import ('mod') via top_level.txt
assert import_names_from_package('egginfo-pkg') == {'mod'}

# egg_with_module-pkg has one import ('egg_with_module') inferred from
# installed-files.txt (top_level.txt is missing)
assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'}

# egg_with_no_modules-pkg should not be associated with any import names
# (top_level.txt is empty, and installed-files.txt has no .py files)
assert import_names_from_package('egg_with_no_modules-pkg') == set()

# sources_fallback-pkg has one import ('sources_fallback') inferred from
# SOURCES.txt (top_level.txt and installed-files.txt is missing)
assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'}

0 comments on commit 3112653

Please sign in to comment.