Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

repositories: add support for PEP 658 #5509

Merged
merged 7 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
repositories: add support for PEP 658
This change allows Poetry to make use of PEP 503 "simple" API
repositories that implement PEP 658 for core metadata.

Co-authored-by: Bartosz Sokorski <[email protected]>
  • Loading branch information
2 people authored and radoering committed Feb 20, 2024
commit 96f3f225f74d3136839bf76e6992e2b31ea69216
84 changes: 72 additions & 12 deletions src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any
from typing import Iterator

import pkginfo
import requests
import requests.adapters

Expand All @@ -20,6 +21,7 @@
from poetry.core.version.markers import parse_marker

from poetry.config.config import Config
from poetry.inspection.info import PackageInfo
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
from poetry.inspection.lazy_wheel import metadata_from_wheel_url
from poetry.repositories.cached_repository import CachedRepository
Expand All @@ -37,7 +39,6 @@
if TYPE_CHECKING:
from packaging.utils import NormalizedName

from poetry.inspection.info import PackageInfo
from poetry.repositories.link_sources.base import LinkSource
from poetry.utils.authenticator import RepositoryCertificateConfig

Expand Down Expand Up @@ -155,10 +156,29 @@ def _get_info_from_sdist(self, url: str) -> PackageInfo:
with self._cached_or_downloaded_file(Link(url)) as filepath:
return PackageInfo.from_sdist(filepath)

def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
@staticmethod
def _get_info_from_metadata(
url: str, metadata: dict[str, pkginfo.Distribution]
) -> PackageInfo | None:
if url in metadata:
dist = metadata[url]
return PackageInfo(
name=dist.name,
version=dist.version,
summary=dist.summary,
requires_dist=list(dist.requires_dist),
requires_python=dist.requires_python,
)
return None

def _get_info_from_urls(
self,
urls: dict[str, list[str]],
metadata: dict[str, pkginfo.Distribution] | None = None,
) -> PackageInfo:
metadata = metadata or {}
# Prefer to read data from wheels: this is faster and more reliable
wheels = urls.get("bdist_wheel")
if wheels:
if wheels := urls.get("bdist_wheel"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I got it, we risk falling back to package inspection when:

  • A high priority package (wheel) does not have PEP 658 metadata
  • A low priority package (sdist) does have PEP 658 metadata

In this case we would get the sdist metadata from the repository but then we wouldn't use it.

I would prefer a more aggressive strategy that favours PEP 658 over package inspection. Something like (in this order):

  1. PEP 658 metadata of wheel
  2. PEP 658 metadata of sdist
  3. wheel inspection
  4. sdist inspection

While currently it feels more like:

  1. PEP 658 metadata of wheel
  2. wheel inspection
  3. PEP 658 of sdist
  4. sdist inspection

One way you could achieve this is by reverting changes to _get_info_from_urls (which would still act on packages only) and inside _links_to_data something like:

if metadata:
    # We got PEP 658 metadata from at least one package.
    # Either take one randomly or use some fancier logic (wheels over sdist)
    info = _get_info_from_metadata(metadata)
else:
    info = _get_info_from_urls(urls)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible network optimization in case we go for the "random" approach: as soon as we got PEP 658 metadata from one url, stop querying others as we don't need anything else

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can keep it as is for now. The current approach has the advantage that it does not change our logic (which wheel's metadata we chose) so we get the same results no matter if we can download metadata or if we have to download the wheel/sdist.

Further, I suppose that in 99 % of use cases it's not relevant because either all files of a release have PEP 658 metadata or none of them. (I may be wrong, but in that case we can adjust it later.)

# We ought just to be able to look at any of the available wheels to read
# metadata, they all should give the same answer.
#
Expand Down Expand Up @@ -194,13 +214,19 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
platform_specific_wheels.append(wheel)

if universal_wheel is not None:
return self._get_info_from_wheel(universal_wheel)
return self._get_info_from_metadata(
universal_wheel, metadata
) or self._get_info_from_wheel(universal_wheel)

info = None
if universal_python2_wheel and universal_python3_wheel:
info = self._get_info_from_wheel(universal_python2_wheel)
info = self._get_info_from_metadata(
universal_python2_wheel, metadata
) or self._get_info_from_wheel(universal_python2_wheel)

py3_info = self._get_info_from_wheel(universal_python3_wheel)
py3_info = self._get_info_from_metadata(
universal_python3_wheel, metadata
) or self._get_info_from_wheel(universal_python3_wheel)

if info.requires_python or py3_info.requires_python:
info.requires_python = str(
Expand Down Expand Up @@ -250,16 +276,24 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:

# Prefer non platform specific wheels
if universal_python3_wheel:
return self._get_info_from_wheel(universal_python3_wheel)
return self._get_info_from_metadata(
universal_python3_wheel, metadata
) or self._get_info_from_wheel(universal_python3_wheel)

if universal_python2_wheel:
return self._get_info_from_wheel(universal_python2_wheel)
return self._get_info_from_metadata(
universal_python2_wheel, metadata
) or self._get_info_from_wheel(universal_python2_wheel)

if platform_specific_wheels:
first_wheel = platform_specific_wheels[0]
return self._get_info_from_wheel(first_wheel)
return self._get_info_from_metadata(
first_wheel, metadata
) or self._get_info_from_wheel(first_wheel)

return self._get_info_from_sdist(urls["sdist"][0])
return self._get_info_from_metadata(
urls["sdist"][0], metadata
) or self._get_info_from_sdist(urls["sdist"][0])

def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]:
if not links:
Expand All @@ -268,11 +302,37 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]
f' "{data.version}"'
)
urls = defaultdict(list)
metadata = {}
radoering marked this conversation as resolved.
Show resolved Hide resolved
files: list[dict[str, Any]] = []
for link in links:
if link.yanked and not data.yanked:
# drop yanked files unless the entire release is yanked
continue
if link.has_metadata:
try:
assert link.metadata_url is not None
response = self.session.get(link.metadata_url)
distribution = pkginfo.Distribution()
assert link.metadata_hash_name is not None
radoering marked this conversation as resolved.
Show resolved Hide resolved
metadata_hash = getattr(hashlib, link.metadata_hash_name)(
response.text.encode()
).hexdigest()

if metadata_hash != link.metadata_hash:
self._log(
radoering marked this conversation as resolved.
Show resolved Hide resolved
f"Metadata file hash ({metadata_hash}) does not match"
f" expected hash ({link.metadata_hash}).",
level="warning",
)

distribution.parse(response.content)
metadata[link.url] = distribution
except requests.HTTPError:
radoering marked this conversation as resolved.
Show resolved Hide resolved
self._log(
f"Failed to retrieve metadata at {link.metadata_url}",
level="debug",
)

if link.is_wheel:
urls["bdist_wheel"].append(link.url)
elif link.filename.endswith(
Expand All @@ -299,7 +359,7 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]

data.files = files

info = self._get_info_from_urls(urls)
info = self._get_info_from_urls(urls, metadata)

data.summary = info.summary
data.requires_dist = info.requires_dist
Expand Down
5 changes: 4 additions & 1 deletion src/poetry/repositories/link_sources/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ def _link_cache(self) -> LinkCache:
yanked = unescape(yanked_value)
else:
yanked = "data-yanked" in anchor
link = Link(url, requires_python=pyrequire, yanked=yanked)
metadata = anchor.get("data-dist-info-metadata")
link = Link(
url, requires_python=pyrequire, yanked=yanked, metadata=metadata
)

if link.ext not in self.SUPPORTED_FORMATS:
continue
Expand Down
12 changes: 12 additions & 0 deletions tests/repositories/fixtures/legacy/isort-metadata.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<title>Links for isort</title>
</head>
<body>
<h1>Links for isort</h1>
<a href="https://files.pythonhosted.org/packages/1f/2c/non-existant/isort-metadata-4.3.4-py3-none-any.whl#sha256=1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af"
data-dist-info-metadata="sha256=e360bf0ed8a06390513d50dd5b7e9d635c789853a93b84163f9de4ae0647580c">isort-metadata-4.3.4-py3-none-any.whl</a><br/>
</body>
</html>
<!--SERIAL 3575149-->
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Metadata-Version: 2.0
Name: isort-metadata
Version: 4.3.4
Summary: A Python utility / library to sort Python imports.
Home-page: https://github.com/timothycrosley/isort
Author: Timothy Crosley
Author-email: [email protected]
License: MIT
Keywords: Refactor,Python,Python2,Python3,Refactoring,Imports,Sort,Clean
Platform: UNKNOWN
Classifier: Development Status :: 6 - Mature
Classifier: Intended Audience :: Developers
Classifier: Natural Language :: English
Classifier: Environment :: Console
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: Utilities
Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
Requires-Dist: futures; python_version=="2.7"
35 changes: 35 additions & 0 deletions tests/repositories/test_legacy_repository.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import base64
import posixpath
import re
import shutil

Expand All @@ -23,10 +24,13 @@


if TYPE_CHECKING:
from typing import Any

import httpretty

from _pytest.monkeypatch import MonkeyPatch
from packaging.utils import NormalizedName
from pytest_mock import MockerFixture

from poetry.config.config import Config

Expand Down Expand Up @@ -176,6 +180,37 @@ def test_get_package_information_fallback_read_setup() -> None:
)


def _get_mock(url: str, **__: Any) -> requests.Response:
if url.endswith(".metadata"):
response = requests.Response()
response.encoding = "application/text"
response._content = MockRepository.FIXTURES.joinpath(
"metadata", posixpath.basename(url)
).read_bytes()
return response
raise requests.HTTPError()


def test_get_package_information_pep_658(mocker: MockerFixture) -> None:
repo = MockRepository()

isort_package = repo.package("isort", Version.parse("4.3.4"))

mocker.patch.object(repo.session, "get", _get_mock)

try:
package = repo.package("isort-metadata", Version.parse("4.3.4"))
except FileNotFoundError:
pytest.fail("Metadata was not successfully retrieved")
else:
assert package.source_type == isort_package.source_type == "legacy"
assert package.source_reference == isort_package.source_reference == repo.name
assert package.source_url == isort_package.source_url == repo.url
assert package.name == "isort-metadata"
assert package.version.text == isort_package.version.text == "4.3.4"
assert package.description == isort_package.description


def test_get_package_information_skips_dependencies_with_invalid_constraints() -> None:
repo = MockRepository()

Expand Down