From 8979c6ce1157f213cc224e79501184525626a6e0 Mon Sep 17 00:00:00 2001 From: x-hgg-x <39058530+x-hgg-x@users.noreply.github.com> Date: Thu, 10 Oct 2024 19:08:26 +0200 Subject: [PATCH] fix: use nohash in the resolver for the activation keys --- Cargo.lock | 7 ++++ Cargo.toml | 2 + crates/resolver-tests/src/sat.rs | 2 +- src/cargo/core/activation_key.rs | 60 ++++++++++++++++++++++++++---- src/cargo/core/mod.rs | 2 +- src/cargo/core/package_id.rs | 28 ++++++++++++-- src/cargo/core/resolver/context.rs | 36 ++++++++++++------ src/cargo/core/resolver/mod.rs | 5 +-- 8 files changed, 114 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41453bf13f7..edaa2b3b946 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -334,6 +334,7 @@ dependencies = [ "libc", "libgit2-sys", "memchr", + "nohash-hasher", "opener", "openssl", "os_info", @@ -2386,6 +2387,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "nom" version = "7.1.3" diff --git a/Cargo.toml b/Cargo.toml index aa92c6518c3..781b40c5338 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ libgit2-sys = "0.17.0" libloading = "0.8.5" memchr = "2.7.4" miow = "0.6.0" +nohash-hasher = "0.2.0" opener = "0.7.1" openssl = "=0.10.57" # See rust-lang/cargo#13546 and openssl/openssl#23376 for pinning openssl-sys = "=0.9.92" # See rust-lang/cargo#13546 and openssl/openssl#23376 for pinning @@ -182,6 +183,7 @@ jobserver.workspace = true lazycell.workspace = true libgit2-sys.workspace = true memchr.workspace = true +nohash-hasher.workspace = true opener.workspace = true os_info.workspace = true pasetors.workspace = true diff --git a/crates/resolver-tests/src/sat.rs b/crates/resolver-tests/src/sat.rs index f04a585b296..f36a2612921 100644 --- a/crates/resolver-tests/src/sat.rs +++ b/crates/resolver-tests/src/sat.rs @@ -395,7 +395,7 @@ impl SatResolver { &mut solver, var_for_is_packages_used .iter() - .map(|(p, &v)| (p.as_activations_key(), v)), + .map(|(p, &v)| (p.activation_key(), v)), ); for pkg in by_name.values().flatten() { diff --git a/src/cargo/core/activation_key.rs b/src/cargo/core/activation_key.rs index 50d995ed60c..f53850c69e5 100644 --- a/src/cargo/core/activation_key.rs +++ b/src/cargo/core/activation_key.rs @@ -1,17 +1,61 @@ +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; use std::num::NonZeroU64; +use std::sync::{Mutex, OnceLock}; use crate::core::SourceId; use crate::util::interning::InternedString; -/// Find the activated version of a crate based on the name, source, and semver compatibility. -/// By storing this in a hash map we ensure that there is only one -/// semver compatible version of each crate. -/// This all so stores the `ContextAge`. -pub type ActivationsKey = (InternedString, SourceId, SemverCompatibility); +static ACTIVATION_KEY_CACHE: OnceLock>> = + OnceLock::new(); -/// A type that represents when cargo treats two Versions as compatible. -/// Versions `a` and `b` are compatible if their left-most nonzero digit is the -/// same. +type ActivationKeyInner = (InternedString, SourceId, SemverCompatibility); + +/// The activated version of a crate is based on the name, source, and semver compatibility. +#[derive(Clone, Copy, Eq)] +pub struct ActivationKey { + inner: &'static ActivationKeyInner, +} + +impl From for ActivationKey { + fn from(inner: ActivationKeyInner) -> Self { + let mut cache = ACTIVATION_KEY_CACHE + .get_or_init(|| Default::default()) + .lock() + .unwrap(); + let inner = cache.get(&inner).cloned().unwrap_or_else(|| { + let inner = Box::leak(Box::new(inner)); + cache.insert(inner); + inner + }); + Self { inner } + } +} + +impl ActivationKey { + /// This function is used for the `Eq` and `Hash` impls to implement a "no hash" hashable value. + /// This is possible since all `ActivationKey` are already interned in a `HashSet`. + fn key(&self) -> u64 { + std::ptr::from_ref(self.inner) as u64 + } +} + +impl PartialEq for ActivationKey { + fn eq(&self, other: &Self) -> bool { + self.key() == other.key() + } +} + +impl nohash_hasher::IsEnabled for ActivationKey {} + +impl Hash for ActivationKey { + fn hash(&self, state: &mut H) { + state.write_u64(self.key()); + } +} + +/// A type that represents when cargo treats two versions as compatible. +/// Versions `a` and `b` are compatible if their left-most nonzero digit is the same. #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] pub enum SemverCompatibility { Major(NonZeroU64), diff --git a/src/cargo/core/mod.rs b/src/cargo/core/mod.rs index b7520a8e146..4577943c713 100644 --- a/src/cargo/core/mod.rs +++ b/src/cargo/core/mod.rs @@ -1,4 +1,4 @@ -pub use self::activation_key::ActivationsKey; +pub use self::activation_key::ActivationKey; pub use self::dependency::{Dependency, SerializedDependency}; pub use self::features::{CliUnstable, Edition, Feature, Features}; pub use self::manifest::{EitherManifest, VirtualManifest}; diff --git a/src/cargo/core/package_id.rs b/src/cargo/core/package_id.rs index e079ca2a8ca..a7f99d68846 100644 --- a/src/cargo/core/package_id.rs +++ b/src/cargo/core/package_id.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::collections::HashSet; use std::fmt::{self, Formatter}; use std::hash; @@ -10,7 +11,7 @@ use std::sync::OnceLock; use serde::de; use serde::ser; -use crate::core::ActivationsKey; +use crate::core::ActivationKey; use crate::core::PackageIdSpec; use crate::core::SourceId; use crate::util::interning::InternedString; @@ -24,13 +25,31 @@ pub struct PackageId { inner: &'static PackageIdInner, } -#[derive(PartialOrd, Eq, Ord)] struct PackageIdInner { name: InternedString, version: semver::Version, source_id: SourceId, + // This field is used as a cache to improve the resolver speed, + // and is not included in the `Eq`, `Hash` and `Ord` impls. + activation_key: ActivationKey, } +impl Ord for PackageIdInner { + fn cmp(&self, other: &Self) -> Ordering { + let self_key = (self.name, &self.version, self.source_id); + let other_key = (other.name, &other.version, other.source_id); + self_key.cmp(&other_key) + } +} + +impl PartialOrd for PackageIdInner { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl Eq for PackageIdInner {} + // Custom equality that uses full equality of SourceId, rather than its custom equality. // // The `build` part of the version is usually ignored (like a "comment"). @@ -136,6 +155,7 @@ impl PackageId { pub fn new(name: InternedString, version: semver::Version, source_id: SourceId) -> PackageId { let inner = PackageIdInner { + activation_key: (name, source_id, (&version).into()).into(), name, version, source_id, @@ -161,8 +181,8 @@ impl PackageId { pub fn source_id(self) -> SourceId { self.inner.source_id } - pub fn as_activations_key(self) -> ActivationsKey { - (self.name(), self.source_id(), self.version().into()) + pub fn activation_key(self) -> ActivationKey { + self.inner.activation_key } pub fn with_source_id(self, source: SourceId) -> PackageId { diff --git a/src/cargo/core/resolver/context.rs b/src/cargo/core/resolver/context.rs index b537b87a94d..081b665bc85 100644 --- a/src/cargo/core/resolver/context.rs +++ b/src/cargo/core/resolver/context.rs @@ -2,7 +2,7 @@ use super::dep_cache::RegistryQueryer; use super::errors::ActivateResult; use super::types::{ConflictMap, ConflictReason, FeaturesSet, ResolveOpts}; use super::RequestedFeatures; -use crate::core::{ActivationsKey, Dependency, PackageId, Summary}; +use crate::core::{ActivationKey, Dependency, PackageId, Summary}; use crate::util::interning::InternedString; use crate::util::Graph; use anyhow::format_err; @@ -26,8 +26,13 @@ pub struct ResolverContext { pub parents: Graph>, } -pub type Activations = - im_rc::HashMap; +/// By storing activation keys in a `HashMap` we ensure that there is only one +/// semver compatible version of each crate. +type Activations = im_rc::HashMap< + ActivationKey, + (Summary, ContextAge), + nohash_hasher::BuildNoHashHasher, +>; /// When backtracking it can be useful to know how far back to go. /// The `ContextAge` of a `Context` is a monotonically increasing counter of the number @@ -62,7 +67,7 @@ impl ResolverContext { ) -> ActivateResult { let id = summary.package_id(); let age: ContextAge = self.age; - match self.activations.entry(id.as_activations_key()) { + match self.activations.entry(id.activation_key()) { im_rc::hashmap::Entry::Occupied(o) => { debug_assert_eq!( &o.get().0, @@ -101,8 +106,13 @@ impl ResolverContext { // versions came from a `[patch]` source. if let Some((_, dep)) = parent { if dep.source_id() != id.source_id() { - let key = (id.name(), dep.source_id(), id.version().into()); - let prev = self.activations.insert(key, (summary.clone(), age)); + let new_id = + PackageId::new(id.name(), id.version().clone(), dep.source_id()); + + let prev = self + .activations + .insert(new_id.activation_key(), (summary.clone(), age)); + if let Some((previous_summary, _)) = prev { return Err( (previous_summary.package_id(), ConflictReason::Semver).into() @@ -145,9 +155,13 @@ impl ResolverContext { /// If the package is active returns the `ContextAge` when it was added pub fn is_active(&self, id: PackageId) -> Option { - self.activations - .get(&id.as_activations_key()) - .and_then(|(s, l)| if s.package_id() == id { Some(*l) } else { None }) + let (summary, age) = self.activations.get(&id.activation_key())?; + + if summary.package_id() == id { + Some(*age) + } else { + None + } } /// Checks whether all of `parent` and the keys of `conflicting activations` @@ -163,8 +177,8 @@ impl ResolverContext { max = std::cmp::max(max, self.is_active(parent)?); } - for id in conflicting_activations.keys() { - max = std::cmp::max(max, self.is_active(*id)?); + for &id in conflicting_activations.keys() { + max = std::cmp::max(max, self.is_active(id)?); } Some(max) } diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index e78b2c0d9b3..f547adda4a5 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -198,8 +198,7 @@ fn activate_deps_loop( let mut backtrack_stack = Vec::new(); let mut remaining_deps = RemainingDeps::new(); - // `past_conflicting_activations` is a cache of the reasons for each time we - // backtrack. + // `past_conflicting_activations` is a cache of the reasons for each time we backtrack. let mut past_conflicting_activations = conflict_cache::ConflictCache::new(); // Activate all the initial summaries to kick off some work. @@ -775,7 +774,7 @@ impl RemainingCandidates { // // Here we throw out our candidate if it's *compatible*, yet not // equal, to all previously activated versions. - if let Some((a, _)) = cx.activations.get(&b_id.as_activations_key()) { + if let Some((a, _)) = cx.activations.get(&b_id.activation_key()) { if *a != b { conflicting_prev_active .entry(a.package_id())