Skip to content

Commit

Permalink
Optimize frame stats collection (EmbarkStudios#151)
Browse files Browse the repository at this point in the history
### Checklist

* [x] I have read the [Contributor Guide](../CONTRIBUTING.md)
* [x] I have read and agree to the [Code of
Conduct](../CODE_OF_CONDUCT.md)
* [x] I have added a description of my changes and why I'd like them
included in the section below

### Description of Changes

Optimize UI performance by calculating frame collection statistics
dynamically. Rather than repeatedly traversing each frame, we now update
the stats upon adding or removing frames. This change mitigates the
significant overhead caused by constantly accessing frame data protected
by `RwLock`.

Also, this update modifies the data containers used for storing frames
to adopt binary tree structures. As a result, all frames are sorted at
the time of insertion, eliminating the need for subsequent sorting
during vector collection.

Moreover, instead of returning vectors, the binary tree structure
enables iteration over elements in a sorted order, providing iterators
rather than vectors. This functionality gives the API caller the
flexibility to either clone frames or simply inspect them in an
efficient manner.

Before:

![before](https://github.com/EmbarkStudios/puffin/assets/7009786/e1472c1e-77e8-4845-beff-2d5a9bca0e1a)

After:

![after](https://github.com/EmbarkStudios/puffin/assets/7009786/ea100b5a-ad21-4b7d-a0bd-45d918c656f5)
  • Loading branch information
andreiltd authored Jul 31, 2024
1 parent cf078c0 commit 40f2579
Show file tree
Hide file tree
Showing 9 changed files with 424 additions and 168 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions puffin/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- next-header -->
## [Unreleased] - ReleaseDate

- [PR#151](https://github.com/EmbarkStudios/puffin/pull/151) Optimize frame statistics collection.

## [0.19.0] - 2024-01-17

- [PR#169](https://github.com/EmbarkStudios/puffin/pull/169) Stream scope information only once, drastically reduce bandwidth and increased performance. Allow better usage of static strings in profile scopes. Breaking change! See PR for migration guide.
Expand Down
1 change: 1 addition & 0 deletions puffin/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ web = ["dep:js-sys", "dep:web-time"]
[dependencies]
byteorder = { version = "1.0" }
cfg-if = "1.0"
itertools = "0.10"
once_cell = "1.0"
parking_lot = { version = "0.12"}

Expand Down
9 changes: 9 additions & 0 deletions puffin/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ pub fn criterion_benchmark(c: &mut Criterion) {
puffin::profile_scope!("my longish scope name", "my_mesh.obj");
})
});
c.bench_function("flush_frames", |b| {
puffin::GlobalProfiler::lock().new_frame();
let _fv = puffin::GlobalFrameView::default();

b.iter(|| {
puffin::profile_function!();
puffin::GlobalProfiler::lock().new_frame();
})
});

puffin::set_scopes_on(false);
c.bench_function("profile_function_off", |b| {
Expand Down
226 changes: 156 additions & 70 deletions puffin/src/frame_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub type ThreadStreams = BTreeMap<ThreadInfo, Arc<StreamInfo>>;

/// Meta-information about a frame.
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
#[derive(Clone, Debug)]
#[derive(Clone, Copy, Debug)]
pub struct FrameMeta {
/// What frame this is (counting from 0 at application startup).
pub frame_index: FrameIndex,
Expand Down Expand Up @@ -160,6 +160,13 @@ impl FrameData {
self.unpacked_frame.meta.num_bytes
}

/// Returns the packing information for the frame.
pub fn packing_info(&self) -> PackingInfo {
PackingInfo {
unpacked_size: Some(self.unpacked_frame.meta.num_bytes),
packed_size: None,
}
}
/// Always returns `false`.
pub fn has_packed(&self) -> bool {
false
Expand Down Expand Up @@ -320,14 +327,10 @@ impl PackedStreams {
#[cfg(feature = "packing")]
pub struct FrameData {
meta: FrameMeta,
/// * [`None`] if still compressed.
/// * `Some(Err(…))` if there was a problem during unpacking.
/// * `Some(Ok(…))` if unpacked.
unpacked_frame: RwLock<Option<anyhow::Result<Arc<UnpackedFrameData>>>>,

/// [`UnpackedFrameData::thread_streams`], compressed.
/// [`None`] if not yet compressed.
packed_streams: RwLock<Option<PackedStreams>>,
/// Encapsulates the frame data in its current state, which can be
/// uncompressed, compressed, or a combination of both
data: RwLock<FrameDataState>,

/// Scopes that were registered during this frame.
pub scope_delta: Vec<Arc<ScopeDetails>>,
Expand All @@ -337,6 +340,113 @@ pub struct FrameData {
pub full_delta: bool,
}

#[derive(Clone, Copy, Debug)]
pub struct PackingInfo {
/// Number of bytes used when unpacked, if has unpacked.
pub unpacked_size: Option<usize>,
/// Number of bytes used by the packed data, if has packed.
pub packed_size: Option<usize>,
}

#[cfg(feature = "packing")]
enum FrameDataState {
/// Unpacked data.
Unpacked(Arc<UnpackedFrameData>),

/// [`UnpackedFrameData::thread_streams`], compressed.
Packed(PackedStreams),

/// Both compressed and uncompressed.
Both(Arc<UnpackedFrameData>, PackedStreams),
}

#[cfg(feature = "packing")]
impl FrameDataState {
fn unpacked_size(&self) -> Option<usize> {
match self {
FrameDataState::Packed(_) => None,
FrameDataState::Unpacked(unpacked) | FrameDataState::Both(unpacked, _) => {
Some(unpacked.meta.num_bytes)
}
}
}

fn unpacked(&self) -> Option<Arc<UnpackedFrameData>> {
match self {
FrameDataState::Packed(_) => None,
FrameDataState::Unpacked(unpacked) | FrameDataState::Both(unpacked, _) => {
Some(unpacked.clone())
}
}
}

fn unpack(&mut self, unpacked: Arc<UnpackedFrameData>) {
let temp = std::mem::replace(
self,
FrameDataState::Packed(PackedStreams::new(CompressionKind::Uncompressed, vec![])),
);

if let FrameDataState::Packed(packed) = temp {
// Transform only if we don't have unpacked already
*self = FrameDataState::Both(unpacked, packed);
} else {
// Restore the original value if it was not Inner::Packed
*self = temp;
}
}

fn packed_size(&self) -> Option<usize> {
match self {
FrameDataState::Unpacked(_) => None,
FrameDataState::Packed(packed) | FrameDataState::Both(_, packed) => {
Some(packed.num_bytes())
}
}
}

fn packed(&self) -> Option<&PackedStreams> {
match self {
FrameDataState::Unpacked(_) => None,
FrameDataState::Packed(packed) | FrameDataState::Both(_, packed) => Some(packed),
}
}

fn pack_and_remove(&mut self) {
if let FrameDataState::Unpacked(ref unpacked) | FrameDataState::Both(ref unpacked, _) =
*self
{
let packed = PackedStreams::pack(&unpacked.thread_streams);
*self = Self::Packed(packed);
}
}

fn pack_and_keep(&mut self) {
if let FrameDataState::Unpacked(ref unpacked) = *self {
let packed = PackedStreams::pack(&unpacked.thread_streams);
*self = Self::Packed(packed);
}
}

fn bytes_of_ram_used(&self) -> usize {
self.unpacked_size().unwrap_or(0) + self.packed_size().unwrap_or(0)
}

fn has_packed(&self) -> bool {
matches!(self, FrameDataState::Packed(_) | FrameDataState::Both(..))
}

fn has_unpacked(&self) -> bool {
matches!(self, FrameDataState::Unpacked(_) | FrameDataState::Both(..))
}

fn packing_info(&self) -> PackingInfo {
PackingInfo {
unpacked_size: self.unpacked_size(),
packed_size: self.packed_size(),
}
}
}

#[cfg(feature = "packing")]
impl FrameData {
/// Create a new [`FrameData`].
Expand All @@ -359,9 +469,8 @@ impl FrameData {
full_delta: bool,
) -> Self {
Self {
meta: unpacked_frame.meta.clone(),
unpacked_frame: RwLock::new(Some(Ok(unpacked_frame))),
packed_streams: RwLock::new(None),
meta: unpacked_frame.meta,
data: RwLock::new(FrameDataState::Unpacked(unpacked_frame)),
scope_delta,
full_delta,
}
Expand All @@ -375,31 +484,37 @@ impl FrameData {

/// Number of bytes used by the packed data, if packed.
pub fn packed_size(&self) -> Option<usize> {
self.packed_streams.read().as_ref().map(|c| c.num_bytes())
self.data.read().packed_size()
}

/// Number of bytes used when unpacked, if known.
pub fn unpacked_size(&self) -> Option<usize> {
if self.has_unpacked() {
Some(self.meta.num_bytes)
} else {
None
}
self.data.read().unpacked_size()
}

/// bytes currently used by the unpacked and packed data.
pub fn bytes_of_ram_used(&self) -> usize {
self.unpacked_size().unwrap_or(0) + self.packed_size().unwrap_or(0)
self.data.read().bytes_of_ram_used()
}

/// Do we have a packed version stored internally?
pub fn has_packed(&self) -> bool {
self.packed_streams.read().is_some()
self.data.read().has_packed()
}

/// Do we have a unpacked version stored internally?
pub fn has_unpacked(&self) -> bool {
self.unpacked_frame.read().is_some()
self.data.read().has_unpacked()
}

/// Provides an overview of the frame's packing status.
///
/// The function retrieves both the sizes of the unpacked and packed frames, as well as whether
/// packed/unpacked versions of the frame exist internally. The goal of this function is to
/// minimize the number of lock accesses by consolidating information retrieval into a single
/// operation.
pub fn packing_info(&self) -> PackingInfo {
self.data.read().packing_info()
}

/// Return the unpacked data.
Expand All @@ -408,60 +523,34 @@ impl FrameData {
///
/// Returns `Err` if failing to decode the packed data.
pub fn unpacked(&self) -> anyhow::Result<Arc<UnpackedFrameData>> {
fn unpack_frame_data(
meta: FrameMeta,
packed: &PackedStreams,
) -> anyhow::Result<UnpackedFrameData> {
Ok(UnpackedFrameData {
meta,
thread_streams: packed.unpack()?,
})
}
let unpacked = {
let inner_guard = self.data.read();
let FrameDataState::Packed(ref packed) = *inner_guard else {
// Safe to unwrap, variant has to contain unpacked if NOT `Packed`
return Ok(self.data.read().unpacked().unwrap());
};

let has_unpacked = self.unpacked_frame.read().is_some();
if !has_unpacked {
crate::profile_scope!("unpack_puffin_frame");
let packed_lock = self.packed_streams.read();
let packed = packed_lock
.as_ref()
.expect("FrameData is neither packed or unpacked");

let frame_data_result = unpack_frame_data(self.meta.clone(), packed);
let frame_data_result = frame_data_result.map(Arc::new);
*self.unpacked_frame.write() = Some(frame_data_result);
}

match self.unpacked_frame.read().as_ref().unwrap() {
Ok(frame) => Ok(frame.clone()),
Err(err) => Err(anyhow::format_err!("{}", err)), // can't clone `anyhow::Error`
}
Arc::new(UnpackedFrameData {
meta: self.meta,
thread_streams: packed.unpack()?,
})
};

self.data.write().unpack(unpacked.clone());
Ok(unpacked)
}

/// Make the [`FrameData`] use up less memory.
/// Idempotent.
pub fn pack(&self) {
self.create_packed();
*self.unpacked_frame.write() = None;
self.data.write().pack_and_remove();
}

/// Create a packed storage without freeing the unpacked storage.
fn create_packed(&self) {
let has_packed = self.packed_streams.read().is_some();
if !has_packed {
// crate::profile_scope!("pack_puffin_frame"); // we get called from `GlobalProfiler::new_frame`, so avoid recursiveness!
let unpacked_frame = self
.unpacked_frame
.read()
.as_ref()
.expect("We should have an unpacked frame if we don't have a packed one")
.as_ref()
.expect("The unpacked frame should be error free, since it doesn't come from packed source")
.clone();

let packed = PackedStreams::pack(&unpacked_frame.thread_streams);

*self.packed_streams.write() = Some(packed);
}
self.data.write().pack_and_keep();
}

/// Writes one [`FrameData`] into a stream, prefixed by its length ([`u32`] le).
Expand All @@ -483,8 +572,8 @@ impl FrameData {
write.write_all(&meta_serialized)?;

self.create_packed();
let packed_streams_lock = self.packed_streams.read();
let packed_streams = packed_streams_lock.as_ref().unwrap(); // We just called create_packed
let packed_streams_lock = self.data.read();
let packed_streams = packed_streams_lock.packed().unwrap(); // We just called create_packed

write.write_all(&(packed_streams.num_bytes() as u32).to_le_bytes())?;
write.write_u8(packed_streams.compression_kind as u8)?;
Expand Down Expand Up @@ -613,8 +702,7 @@ impl FrameData {

Ok(Some(Self {
meta,
unpacked_frame: RwLock::new(None),
packed_streams: RwLock::new(Some(packed_streams)),
data: RwLock::new(FrameDataState::Packed(packed_streams)),
scope_delta: Default::default(),
full_delta: false,
}))
Expand Down Expand Up @@ -645,8 +733,7 @@ impl FrameData {

Ok(Some(Self {
meta,
unpacked_frame: RwLock::new(None),
packed_streams: RwLock::new(Some(packed_streams)),
data: RwLock::new(FrameDataState::Packed(packed_streams)),
scope_delta: Default::default(),
full_delta: false,
}))
Expand Down Expand Up @@ -685,8 +772,7 @@ impl FrameData {

Ok(Some(Self {
meta,
unpacked_frame: RwLock::new(None),
packed_streams: RwLock::new(Some(streams_compressed)),
data: RwLock::new(FrameDataState::Packed(streams_compressed)),
scope_delta: new_scopes,
full_delta: false,
}))
Expand Down
2 changes: 1 addition & 1 deletion puffin/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub use data::{Error, Reader, Result, Scope, ScopeRecord, Stream, StreamInfo, St
pub use frame_data::{FrameData, FrameMeta, UnpackedFrameData};
pub use global_profiler::{FrameSink, GlobalProfiler};
pub use merge::{merge_scopes_for_thread, MergeScope};
pub use profile_view::{select_slowest, FrameView, GlobalFrameView};
pub use profile_view::{select_slowest, FrameStats, FrameView, GlobalFrameView};
pub use scope_details::{ScopeCollection, ScopeDetails, ScopeType};
pub use thread_profiler::{ThreadInfo, ThreadProfiler};
pub use utils::{clean_function_name, short_file_name, shorten_rust_function_name, type_name_of};
Expand Down
Loading

0 comments on commit 40f2579

Please sign in to comment.