Skip to content

Commit

Permalink
fix: performance regression when parsing links from legacy repositori…
Browse files Browse the repository at this point in the history
…es (#6442)

Resolves: #6436

Measurements of `poetry lock` with warm cache with example
pyproject.toml from #6436:

|test case|time in s|peak memory usage in MB|
|---|---|---|
|legacy repository (before)|422|113|
|legacy repository (after)|3|118|
|pypi repository|1|92|

`backports.cached-property` is used in order to support
cached_property on Python 3.7. 

Co-authored-by: Jarrod Moore <jmo@jmo.name>
Co-authored-by: Bjorn Neergaard <bjorn@neersighted.com>
  • Loading branch information
3 people committed Sep 9, 2022
1 parent 692bdaa commit c4b2253
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 50 deletions.
14 changes: 13 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ python = "^3.7"

poetry-core = "^1.1.0"
poetry-plugin-export = "^1.0.6"
"backports.cached-property" = { version = "^1.0.2", python = "<3.8" }
cachecontrol = { version = "^0.12.9", extras = ["filecache"] }
cachy = "^0.3.0"
cleo = "^1.0.0a5"
Expand Down
33 changes: 15 additions & 18 deletions src/poetry/repositories/link_sources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import logging
import re

from abc import abstractmethod
from typing import TYPE_CHECKING
from typing import DefaultDict
from typing import List

from packaging.utils import canonicalize_name
from poetry.core.packages.package import Package
from poetry.core.semver.version import Version

from poetry.utils._compat import cached_property
from poetry.utils.patterns import sdist_file_re
from poetry.utils.patterns import wheel_file_re

Expand All @@ -20,6 +22,8 @@
from packaging.utils import NormalizedName
from poetry.core.packages.utils.link import Link

LinkCache = DefaultDict[NormalizedName, DefaultDict[Version, List[Link]]]


logger = logging.getLogger(__name__)

Expand All @@ -44,16 +48,8 @@ def __init__(self, url: str) -> None:
def url(self) -> str:
return self._url

def versions(self, name: str) -> Iterator[Version]:
name = canonicalize_name(name)
seen: set[Version] = set()

for link in self.links:
pkg = self.link_package_data(link)

if pkg and pkg.name == name and pkg.version not in seen:
seen.add(pkg.version)
yield pkg.version
def versions(self, name: NormalizedName) -> Iterator[Version]:
yield from self._link_cache[name]

@property
def packages(self) -> Iterator[Package]:
Expand All @@ -64,9 +60,10 @@ def packages(self) -> Iterator[Package]:
yield pkg

@property
@abstractmethod
def links(self) -> Iterator[Link]:
raise NotImplementedError()
for links_per_version in self._link_cache.values():
for links in links_per_version.values():
yield from links

@classmethod
def link_package_data(cls, link: Link) -> Package | None:
Expand Down Expand Up @@ -102,11 +99,7 @@ def link_package_data(cls, link: Link) -> Package | None:
def links_for_version(
self, name: NormalizedName, version: Version
) -> Iterator[Link]:
for link in self.links:
pkg = self.link_package_data(link)

if pkg and pkg.name == name and pkg.version == version:
yield link
yield from self._link_cache[name][version]

def clean_link(self, url: str) -> str:
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
Expand All @@ -127,3 +120,7 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool:
if reasons:
return "\n".join(sorted(reasons))
return True

@cached_property
def _link_cache(self) -> LinkCache:
raise NotImplementedError()
16 changes: 12 additions & 4 deletions src/poetry/repositories/link_sources/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
import urllib.parse
import warnings

from collections import defaultdict
from html import unescape
from typing import TYPE_CHECKING

from poetry.core.packages.utils.link import Link

from poetry.repositories.link_sources.base import LinkSource
from poetry.utils._compat import cached_property


if TYPE_CHECKING:
from collections.abc import Iterator
from poetry.repositories.link_sources.base import LinkCache


with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -25,8 +28,9 @@ def __init__(self, url: str, content: str) -> None:

self._parsed = html5lib.parse(content, namespaceHTMLElements=False)

@property
def links(self) -> Iterator[Link]:
@cached_property
def _link_cache(self) -> LinkCache:
links: LinkCache = defaultdict(lambda: defaultdict(list))
for anchor in self._parsed.findall(".//a"):
if anchor.get("href"):
href = anchor.get("href")
Expand All @@ -44,7 +48,11 @@ def links(self) -> Iterator[Link]:
if link.ext not in self.SUPPORTED_FORMATS:
continue

yield link
pkg = self.link_package_data(link)
if pkg:
links[pkg.name][pkg.version].append(link)

return links


class SimpleRepositoryPage(HTMLPage):
Expand Down
16 changes: 15 additions & 1 deletion src/poetry/utils/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
else:
from importlib import metadata

if sys.version_info < (3, 8):
# compatibility for python <3.8
from backports.cached_property import cached_property
else:
from functools import cached_property

WINDOWS = sys.platform == "win32"


Expand Down Expand Up @@ -53,4 +59,12 @@ def list_to_shell_command(cmd: list[str]) -> str:
)


__all__ = ["WINDOWS", "decode", "encode", "list_to_shell_command", "metadata", "to_str"]
__all__ = [
"WINDOWS",
"cached_property",
"decode",
"encode",
"list_to_shell_command",
"metadata",
"to_str",
]
27 changes: 17 additions & 10 deletions tests/repositories/link_sources/test_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from collections import defaultdict
from typing import TYPE_CHECKING
from unittest.mock import PropertyMock

Expand All @@ -24,16 +25,22 @@ def link_source(mocker: MockerFixture) -> LinkSource:
url = "https://example.org"
link_source = LinkSource(url)
mocker.patch(
f"{LinkSource.__module__}.{LinkSource.__qualname__}.links",
f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_cache",
new_callable=PropertyMock,
return_value=iter(
[
Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
Link(f"{url}/invalid.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
Link(f"{url}/demo-0.1.1.tar.gz"),
]
return_value=defaultdict(
lambda: defaultdict(list),
{
canonicalize_name("demo"): defaultdict(
list,
{
Version.parse("0.1.0"): [
Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
],
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
},
),
},
),
)
return link_source
Expand Down Expand Up @@ -63,7 +70,7 @@ def test_link_package_data(filename: str, expected: Package | None) -> None:
],
)
def test_versions(name: str, expected: set[Version], link_source: LinkSource) -> None:
assert set(link_source.versions(name)) == expected
assert set(link_source.versions(canonicalize_name(name))) == expected


def test_packages(link_source: LinkSource) -> None:
Expand Down
3 changes: 2 additions & 1 deletion tests/repositories/link_sources/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from packaging.utils import canonicalize_name
from poetry.core.packages.utils.link import Link
from poetry.core.semver.version import Version

Expand Down Expand Up @@ -90,4 +91,4 @@ def test_yanked(yanked_attrs: tuple[str, str], expected: bool | str) -> None:
content = DEMO_TEMPLATE.format(anchors)
page = HTMLPage("https://example.org", content)

assert page.yanked("demo", Version.parse("0.1")) == expected
assert page.yanked(canonicalize_name("demo"), Version.parse("0.1")) == expected
17 changes: 2 additions & 15 deletions tests/repositories/test_legacy_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,25 +102,12 @@ def test_page_invalid_version_link() -> None:
assert page is not None

links = list(page.links)
assert len(links) == 2
assert len(links) == 1

versions = list(page.versions("poetry"))
versions = list(page.versions(canonicalize_name("poetry")))
assert len(versions) == 1
assert versions[0].to_string() == "0.1.0"

invalid_link = None

for link in links:
if link.filename.startswith("poetry-21"):
invalid_link = link
break

links_010 = list(page.links_for_version(canonicalize_name("poetry"), versions[0]))
assert invalid_link not in links_010

assert invalid_link
assert not page.link_package_data(invalid_link)

packages = list(page.packages)
assert len(packages) == 1
assert packages[0].name == "poetry"
Expand Down

0 comments on commit c4b2253

Please sign in to comment.