repositories: add support for PEP 658

This change allows Poetry to make use of PEP 503 "simple" API repositories that implement PEP 658 for core metadata. Co-authored-by: Bartosz Sokorski <[email protected]>
python-poetry · radoering · Feb 20, 2024 · Apr 27, 2022 · Nov 18, 2023 · Nov 18, 2023
commit 96f3f225f74d3136839bf76e6992e2b31ea69216
diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py
@@ -10,6 +10,7 @@
 from typing import Any
 from typing import Iterator
 
+import pkginfo
 import requests
 import requests.adapters
 
@@ -20,6 +21,7 @@
 from poetry.core.version.markers import parse_marker
 
 from poetry.config.config import Config
+from poetry.inspection.info import PackageInfo
 from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
 from poetry.inspection.lazy_wheel import metadata_from_wheel_url
 from poetry.repositories.cached_repository import CachedRepository
@@ -37,7 +39,6 @@
 if TYPE_CHECKING:
     from packaging.utils import NormalizedName
 
-    from poetry.inspection.info import PackageInfo
     from poetry.repositories.link_sources.base import LinkSource
     from poetry.utils.authenticator import RepositoryCertificateConfig
 
@@ -155,10 +156,29 @@ def _get_info_from_sdist(self, url: str) -> PackageInfo:
         with self._cached_or_downloaded_file(Link(url)) as filepath:
             return PackageInfo.from_sdist(filepath)
 
-    def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
+    @staticmethod
+    def _get_info_from_metadata(
+        url: str, metadata: dict[str, pkginfo.Distribution]
+    ) -> PackageInfo | None:
+        if url in metadata:
+            dist = metadata[url]
+            return PackageInfo(
+                name=dist.name,
+                version=dist.version,
+                summary=dist.summary,
+                requires_dist=list(dist.requires_dist),
+                requires_python=dist.requires_python,
+            )
+        return None
+
+    def _get_info_from_urls(
+        self,
+        urls: dict[str, list[str]],
+        metadata: dict[str, pkginfo.Distribution] | None = None,
+    ) -> PackageInfo:
+        metadata = metadata or {}
         # Prefer to read data from wheels: this is faster and more reliable
-        wheels = urls.get("bdist_wheel")
-        if wheels:
+        if wheels := urls.get("bdist_wheel"):
             # We ought just to be able to look at any of the available wheels to read
             # metadata, they all should give the same answer.
             #
@@ -194,13 +214,19 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
                     platform_specific_wheels.append(wheel)
 
             if universal_wheel is not None:
-                return self._get_info_from_wheel(universal_wheel)
+                return self._get_info_from_metadata(
+                    universal_wheel, metadata
+                ) or self._get_info_from_wheel(universal_wheel)
 
             info = None
             if universal_python2_wheel and universal_python3_wheel:
-                info = self._get_info_from_wheel(universal_python2_wheel)
+                info = self._get_info_from_metadata(
+                    universal_python2_wheel, metadata
+                ) or self._get_info_from_wheel(universal_python2_wheel)
 
-                py3_info = self._get_info_from_wheel(universal_python3_wheel)
+                py3_info = self._get_info_from_metadata(
+                    universal_python3_wheel, metadata
+                ) or self._get_info_from_wheel(universal_python3_wheel)
 
                 if info.requires_python or py3_info.requires_python:
                     info.requires_python = str(
@@ -250,16 +276,24 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
 
             # Prefer non platform specific wheels
             if universal_python3_wheel:
-                return self._get_info_from_wheel(universal_python3_wheel)
+                return self._get_info_from_metadata(
+                    universal_python3_wheel, metadata
+                ) or self._get_info_from_wheel(universal_python3_wheel)
 
             if universal_python2_wheel:
-                return self._get_info_from_wheel(universal_python2_wheel)
+                return self._get_info_from_metadata(
+                    universal_python2_wheel, metadata
+                ) or self._get_info_from_wheel(universal_python2_wheel)
 
             if platform_specific_wheels:
                 first_wheel = platform_specific_wheels[0]
-                return self._get_info_from_wheel(first_wheel)
+                return self._get_info_from_metadata(
+                    first_wheel, metadata
+                ) or self._get_info_from_wheel(first_wheel)
 
-        return self._get_info_from_sdist(urls["sdist"][0])
+        return self._get_info_from_metadata(
+            urls["sdist"][0], metadata
+        ) or self._get_info_from_sdist(urls["sdist"][0])
 
     def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]:
         if not links:
@@ -268,11 +302,37 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]
                 f' "{data.version}"'
             )
         urls = defaultdict(list)
+        metadata = {}
         files: list[dict[str, Any]] = []
         for link in links:
             if link.yanked and not data.yanked:
                 # drop yanked files unless the entire release is yanked
                 continue
+            if link.has_metadata:
+                try:
+                    assert link.metadata_url is not None
+                    response = self.session.get(link.metadata_url)
+                    distribution = pkginfo.Distribution()
+                    assert link.metadata_hash_name is not None
+                    metadata_hash = getattr(hashlib, link.metadata_hash_name)(
+                        response.text.encode()
+                    ).hexdigest()
+
+                    if metadata_hash != link.metadata_hash:
+                        self._log(
+                            f"Metadata file hash ({metadata_hash}) does not match"
+                            f" expected hash ({link.metadata_hash}).",
+                            level="warning",
+                        )
+
+                    distribution.parse(response.content)
+                    metadata[link.url] = distribution
+                except requests.HTTPError:
+                    self._log(
+                        f"Failed to retrieve metadata at {link.metadata_url}",
+                        level="debug",
+                    )
+
             if link.is_wheel:
                 urls["bdist_wheel"].append(link.url)
             elif link.filename.endswith(
@@ -299,7 +359,7 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]
 
         data.files = files
 
-        info = self._get_info_from_urls(urls)
+        info = self._get_info_from_urls(urls, metadata)
 
         data.summary = info.summary
         data.requires_dist = info.requires_dist

diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py
@@ -42,7 +42,10 @@ def _link_cache(self) -> LinkCache:
                     yanked = unescape(yanked_value)
                 else:
                     yanked = "data-yanked" in anchor
-                link = Link(url, requires_python=pyrequire, yanked=yanked)
+                metadata = anchor.get("data-dist-info-metadata")
+                link = Link(
+                    url, requires_python=pyrequire, yanked=yanked, metadata=metadata
+                )
 
                 if link.ext not in self.SUPPORTED_FORMATS:
                     continue

diff --git a/tests/repositories/fixtures/legacy/isort-metadata.html b/tests/repositories/fixtures/legacy/isort-metadata.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Links for isort</title>
+</head>
+<body>
+<h1>Links for isort</h1>
+<a href="https://files.pythonhosted.org/packages/1f/2c/non-existant/isort-metadata-4.3.4-py3-none-any.whl#sha256=1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af"
+   data-dist-info-metadata="sha256=e360bf0ed8a06390513d50dd5b7e9d635c789853a93b84163f9de4ae0647580c">isort-metadata-4.3.4-py3-none-any.whl</a><br/>
+</body>
+</html>
+<!--SERIAL 3575149-->
diff --git a/tests/repositories/fixtures/legacy/metadata/isort-metadata-4.3.4-py3-none-any.whl.metadata b/tests/repositories/fixtures/legacy/metadata/isort-metadata-4.3.4-py3-none-any.whl.metadata
@@ -0,0 +1,28 @@
+Metadata-Version: 2.0
+Name: isort-metadata
+Version: 4.3.4
+Summary: A Python utility / library to sort Python imports.
+Home-page: https://github.com/timothycrosley/isort
+Author: Timothy Crosley
+Author-email: [email protected]
+License: MIT
+Keywords: Refactor,Python,Python2,Python3,Refactoring,Imports,Sort,Clean
+Platform: UNKNOWN
+Classifier: Development Status :: 6 - Mature
+Classifier: Intended Audience :: Developers
+Classifier: Natural Language :: English
+Classifier: Environment :: Console
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Utilities
+Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
+Requires-Dist: futures; python_version=="2.7"
diff --git a/tests/repositories/test_legacy_repository.py b/tests/repositories/test_legacy_repository.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import base64
+import posixpath
 import re
 import shutil
 
@@ -23,10 +24,13 @@
 
 
 if TYPE_CHECKING:
+    from typing import Any
+
     import httpretty
 
     from _pytest.monkeypatch import MonkeyPatch
     from packaging.utils import NormalizedName
+    from pytest_mock import MockerFixture
 
     from poetry.config.config import Config
 
@@ -176,6 +180,37 @@ def test_get_package_information_fallback_read_setup() -> None:
     )
 
 
+def _get_mock(url: str, **__: Any) -> requests.Response:
+    if url.endswith(".metadata"):
+        response = requests.Response()
+        response.encoding = "application/text"
+        response._content = MockRepository.FIXTURES.joinpath(
+            "metadata", posixpath.basename(url)
+        ).read_bytes()
+        return response
+    raise requests.HTTPError()
+
+
+def test_get_package_information_pep_658(mocker: MockerFixture) -> None:
+    repo = MockRepository()
+
+    isort_package = repo.package("isort", Version.parse("4.3.4"))
+
+    mocker.patch.object(repo.session, "get", _get_mock)
+
+    try:
+        package = repo.package("isort-metadata", Version.parse("4.3.4"))
+    except FileNotFoundError:
+        pytest.fail("Metadata was not successfully retrieved")
+    else:
+        assert package.source_type == isort_package.source_type == "legacy"
+        assert package.source_reference == isort_package.source_reference == repo.name
+        assert package.source_url == isort_package.source_url == repo.url
+        assert package.name == "isort-metadata"
+        assert package.version.text == isort_package.version.text == "4.3.4"
+        assert package.description == isort_package.description
+
+
 def test_get_package_information_skips_dependencies_with_invalid_constraints() -> None:
     repo = MockRepository()