Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazily evaluate the candidate sequence to prevent unneeded distribution downloads #9289

Merged
merged 4 commits into from
Dec 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions news/9203.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
New resolver: Discard a faulty distribution, instead of quitting outright.
This implementation is taken from 20.2.2, with a fix that always makes the
resolver iterate through candidates from indexes lazily, to avoid downloading
candidates we do not need.
4 changes: 4 additions & 0 deletions news/9246.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
New resolver: Discard a source distribution if it fails to generate metadata,
instead of quitting outright. This implementation is taken from 20.2.2, with a
fix that always makes the resolver iterate through candidates from indexes
lazily, to avoid downloading candidates we do not need.
15 changes: 15 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ def __str__(self):
)


class InstallationSubprocessError(InstallationError):
"""A subprocess call failed during installation."""
def __init__(self, returncode, description):
# type: (int, str) -> None
self.returncode = returncode
self.description = description

def __str__(self):
# type: () -> str
return (
"Command errored out with exit status {}: {} "
"Check the logs for full command output."
).format(self.returncode, self.description)


class HashErrors(InstallationError):
"""Multiple HashError instances rolled into one for reporting"""

Expand Down
23 changes: 6 additions & 17 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __init__(
self._ireq = ireq
self._name = name
self._version = version
self._dist = None # type: Optional[Distribution]
self.dist = self._prepare()

def __str__(self):
# type: () -> str
Expand Down Expand Up @@ -209,8 +209,6 @@ def _prepare_distribution(self):
def _check_metadata_consistency(self, dist):
# type: (Distribution) -> None
"""Check for consistency of project name and version of dist."""
# TODO: (Longer term) Rather than abort, reject this candidate
# and backtrack. This would need resolvelib support.
name = canonicalize_name(dist.project_name)
if self._name is not None and self._name != name:
raise MetadataInconsistent(self._ireq, "name", dist.project_name)
Expand All @@ -219,25 +217,17 @@ def _check_metadata_consistency(self, dist):
raise MetadataInconsistent(self._ireq, "version", dist.version)

def _prepare(self):
# type: () -> None
if self._dist is not None:
return
# type: () -> Distribution
try:
dist = self._prepare_distribution()
except HashError as e:
# Provide HashError the underlying ireq that caused it. This
# provides context for the resulting error message to show the
# offending line to the user.
e.req = self._ireq
raise

assert dist is not None, "Distribution already installed"
self._check_metadata_consistency(dist)
self._dist = dist

@property
def dist(self):
# type: () -> Distribution
if self._dist is None:
self._prepare()
return self._dist
return dist

def _get_requires_python_dependency(self):
# type: () -> Optional[Requirement]
Expand All @@ -261,7 +251,6 @@ def iter_dependencies(self, with_requires):

def get_install_requirement(self):
# type: () -> Optional[InstallRequirement]
self._prepare()
return self._ireq


Expand Down
52 changes: 44 additions & 8 deletions src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pip._internal.exceptions import (
DistributionNotFound,
InstallationError,
InstallationSubprocessError,
MetadataInconsistent,
UnsupportedPythonVersion,
UnsupportedWheel,
)
Expand Down Expand Up @@ -33,6 +35,7 @@
ExplicitRequirement,
RequiresPythonRequirement,
SpecifierRequirement,
UnsatisfiableRequirement,
)

if MYPY_CHECK_RUNNING:
Expand Down Expand Up @@ -94,6 +97,7 @@ def __init__(
self._force_reinstall = force_reinstall
self._ignore_requires_python = ignore_requires_python

self._build_failures = {} # type: Cache[InstallationError]
self._link_candidate_cache = {} # type: Cache[LinkCandidate]
self._editable_candidate_cache = {} # type: Cache[EditableCandidate]
self._installed_candidate_cache = {
Expand Down Expand Up @@ -136,21 +140,40 @@ def _make_candidate_from_link(
name, # type: Optional[str]
version, # type: Optional[_BaseVersion]
):
# type: (...) -> Candidate
# type: (...) -> Optional[Candidate]
# TODO: Check already installed candidate, and use it if the link and
# editable flag match.

if link in self._build_failures:
# We already tried this candidate before, and it does not build.
# Don't bother trying again.
return None

if template.editable:
if link not in self._editable_candidate_cache:
self._editable_candidate_cache[link] = EditableCandidate(
link, template, factory=self, name=name, version=version,
)
try:
self._editable_candidate_cache[link] = EditableCandidate(
link, template, factory=self,
name=name, version=version,
)
except (InstallationSubprocessError, MetadataInconsistent) as e:
logger.warning("Discarding %s. %s", link, e)
self._build_failures[link] = e
return None
base = self._editable_candidate_cache[link] # type: BaseCandidate
else:
if link not in self._link_candidate_cache:
self._link_candidate_cache[link] = LinkCandidate(
link, template, factory=self, name=name, version=version,
)
try:
self._link_candidate_cache[link] = LinkCandidate(
link, template, factory=self,
name=name, version=version,
)
except (InstallationSubprocessError, MetadataInconsistent) as e:
logger.warning("Discarding %s. %s", link, e)
self._build_failures[link] = e
return None
base = self._link_candidate_cache[link]

if extras:
return ExtrasCandidate(base, extras)
return base
Expand Down Expand Up @@ -210,13 +233,16 @@ def iter_index_candidates():
for ican in reversed(icans):
if not all_yanked and ican.link.is_yanked:
continue
yield self._make_candidate_from_link(
candidate = self._make_candidate_from_link(
link=ican.link,
extras=extras,
template=template,
name=name,
version=ican.version,
)
if candidate is None:
continue
yield candidate

return FoundCandidates(
iter_index_candidates,
Expand Down Expand Up @@ -280,6 +306,16 @@ def make_requirement_from_install_req(self, ireq, requested_extras):
name=canonicalize_name(ireq.name) if ireq.name else None,
version=None,
)
if cand is None:
# There's no way we can satisfy a URL requirement if the underlying
# candidate fails to build. An unnamed URL must be user-supplied, so
# we fail eagerly. If the URL is named, an unsatisfiable requirement
# can make the resolver do the right thing, either backtrack (and
# maybe find some other requirement that's buildable) or raise a
# ResolutionImpossible eventually.
if not ireq.name:
raise self._build_failures[ireq.link]
return UnsatisfiableRequirement(canonicalize_name(ireq.name))
return self.make_requirement_from_candidate(cand)

def make_requirement_from_candidate(self, candidate):
Expand Down
36 changes: 24 additions & 12 deletions src/pip/_internal/resolution/resolvelib/found_candidates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
"""Utilities to lazily create and visit candidates found.

Creating and visiting a candidate is a *very* costly operation. It involves
fetching, extracting, potentially building modules from source, and verifying
distribution metadata. It is therefore crucial for performance to keep
everything here lazy all the way down, so we only touch candidates that we
absolutely need, and not "download the world" when we only need one version of
something.
"""

import functools
import itertools
import operator

from pip._vendor.six.moves import collections_abc # type: ignore

Expand Down Expand Up @@ -32,18 +41,21 @@ def _insert_installed(installed, others):
already-installed package. Candidates from index are returned in their
normal ordering, except replaced when the version is already installed.

Since candidates from index are already sorted by reverse version order,
`sorted()` here would keep the ordering mostly intact, only shuffling the
already-installed candidate into the correct position. We put the already-
installed candidate in front of those from the index, so it's put in front
after sorting due to Python sorting's stableness guarentee.
The implementation iterates through and yields other candidates, inserting
the installed candidate exactly once before we start yielding older or
equivalent candidates, or after all other candidates if they are all newer.
"""
candidates = sorted(
itertools.chain([installed], others),
key=operator.attrgetter("version"),
reverse=True,
)
return iter(candidates)
installed_yielded = False
for candidate in others:
# If the installed candidate is better, yield it first.
if not installed_yielded and installed.version >= candidate.version:
yield installed
installed_yielded = True
yield candidate

uranusjr marked this conversation as resolved.
Show resolved Hide resolved
# If the installed candidate is older than all other candidates.
if not installed_yielded:
yield installed


class FoundCandidates(collections_abc.Sequence):
Expand Down
41 changes: 41 additions & 0 deletions src/pip/_internal/resolution/resolvelib/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,44 @@ def is_satisfied_by(self, candidate):
# already implements the prerelease logic, and would have filtered out
# prerelease candidates if the user does not expect them.
return self.specifier.contains(candidate.version, prereleases=True)


class UnsatisfiableRequirement(Requirement):
"""A requirement that cannot be satisfied.
"""
def __init__(self, name):
# type: (str) -> None
self._name = name

def __str__(self):
# type: () -> str
return "{} (unavailable)".format(self._name)

def __repr__(self):
# type: () -> str
return "{class_name}({name!r})".format(
class_name=self.__class__.__name__,
name=str(self._name),
)

@property
def project_name(self):
# type: () -> str
return self._name

@property
def name(self):
# type: () -> str
return self._name

def format_for_error(self):
# type: () -> str
return str(self)

def get_candidate_lookup(self):
# type: () -> CandidateLookup
return None, None

def is_satisfied_by(self, candidate):
# type: (Candidate) -> bool
return False
8 changes: 2 additions & 6 deletions src/pip/_internal/utils/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import subprocess

from pip._internal.cli.spinners import SpinnerInterface, open_spinner
from pip._internal.exceptions import InstallationError
from pip._internal.exceptions import InstallationSubprocessError
from pip._internal.utils.compat import console_to_str, str_to_display
from pip._internal.utils.logging import subprocess_logger
from pip._internal.utils.misc import HiddenText, path_to_display
Expand Down Expand Up @@ -230,11 +230,7 @@ def call_subprocess(
exit_status=proc.returncode,
)
subprocess_logger.error(msg)
exc_msg = (
'Command errored out with exit status {}: {} '
'Check the logs for full command output.'
).format(proc.returncode, command_desc)
raise InstallationError(exc_msg)
raise InstallationSubprocessError(proc.returncode, command_desc)
elif on_returncode == 'warn':
subprocess_logger.warning(
'Command "%s" had error code %s in %s',
Expand Down
61 changes: 61 additions & 0 deletions tests/functional/test_new_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,3 +1216,64 @@ def test_new_resolver_does_not_reinstall_when_from_a_local_index(script):
assert "Installing collected packages: simple" not in result.stdout, str(result)
assert "Requirement already satisfied: simple" in result.stdout, str(result)
assert_installed(script, simple="0.1.0")


def test_new_resolver_skip_inconsistent_metadata(script):
create_basic_wheel_for_package(script, "A", "1")

a_2 = create_basic_wheel_for_package(script, "A", "2")
a_2.rename(a_2.parent.joinpath("a-3-py2.py3-none-any.whl"))

result = script.pip(
"install",
"--no-cache-dir", "--no-index",
"--find-links", script.scratch_path,
"--verbose",
"A",
allow_stderr_warning=True,
)

assert " different version in metadata: '2'" in result.stderr, str(result)
assert_installed(script, a="1")


@pytest.mark.parametrize(
"upgrade",
[True, False],
ids=["upgrade", "no-upgrade"],
)
def test_new_resolver_lazy_fetch_candidates(script, upgrade):
create_basic_wheel_for_package(script, "myuberpkg", "1")
create_basic_wheel_for_package(script, "myuberpkg", "2")
create_basic_wheel_for_package(script, "myuberpkg", "3")

# Install an old version first.
script.pip(
"install",
"--no-cache-dir", "--no-index",
"--find-links", script.scratch_path,
"myuberpkg==1",
)

# Now install the same package again, maybe with the upgrade flag.
if upgrade:
pip_upgrade_args = ["--upgrade"]
else:
pip_upgrade_args = []
result = script.pip(
"install",
"--no-cache-dir", "--no-index",
"--find-links", script.scratch_path,
"myuberpkg",
*pip_upgrade_args # Trailing comma fails on Python 2.
)

# pip should install the version preferred by the strategy...
if upgrade:
assert_installed(script, myuberpkg="3")
else:
assert_installed(script, myuberpkg="1")

# But should reach there in the best route possible, without trying
# candidates it does not need to.
assert "myuberpkg-2" not in result.stdout, str(result)
Loading