Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions crates/exec-harness/preload/codspeed_preload.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
//
// Environment variables:
// CODSPEED_BENCH_URI - The benchmark URI to report (required)
// CODSPEED_PRELOAD_LOCK - Set by the first process to prevent child processes
// from re-initializing instrumentation

#include <stdlib.h>
#include <unistd.h>
Expand All @@ -22,8 +20,6 @@
#define RUNNING_ON_VALGRIND 0
#endif

static const char *LOCK_ENV = "CODSPEED_PRELOAD_LOCK";

// These constants are defined by the build script (build.rs) via -D flags
#ifndef CODSPEED_URI_ENV
#error "CODSPEED_URI_ENV must be defined by the build system"
Expand Down Expand Up @@ -54,14 +50,6 @@ __attribute__((constructor)) static void codspeed_preload_init(void) {
return;
}

// Check if another process already owns the instrumentation
if (getenv(LOCK_ENV)) {
return;
}

// Set the lock to prevent child processes from initializing
setenv(LOCK_ENV, "1", 1);

g_bench_uri = getenv(URI_ENV);
if (!g_bench_uri) {
return;
Expand Down
40 changes: 39 additions & 1 deletion crates/exec-harness/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::BenchmarkCommand;
use crate::constants;
use crate::uri;
use instrument_hooks_bindings::InstrumentHooks;
use std::path::PathBuf;
use std::process::Command;

mod ld_preload_check;
Expand Down Expand Up @@ -54,9 +55,14 @@ pub fn perform_with_valgrind(commands: Vec<BenchmarkCommand>) -> Result<()> {
cmd.args(&benchmark_cmd.command[1..]);
// Use LD_PRELOAD to inject instrumentation into the child process
cmd.env("LD_PRELOAD", preload_lib_path);
cmd.env("PYTHONPERFSUPPORT", "1");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe add a quick comment that we need this so that python generates perf maps (technically it's in the commit message, but comments make it a bit more obvious)

cmd.env(constants::URI_ENV, &name_and_uri.uri);

let status = cmd.status().context("Failed to execute command")?;
let mut child = cmd.spawn().context("Failed to spawn command")?;

let status = child.wait().context("Failed to execute command")?;

bail_if_command_spawned_subprocesses_under_valgrind(child.id())?;

if !status.success() {
bail!("Command exited with non-zero status: {status}");
Expand All @@ -65,3 +71,35 @@ pub fn perform_with_valgrind(commands: Vec<BenchmarkCommand>) -> Result<()> {

Ok(())
}

fn bail_if_command_spawned_subprocesses_under_valgrind(pid: u32) -> Result<()> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have a comment here that explains a bit why and how this works (basically what we discussed in the DMs); don't spend too much time on it, just want to make sure we have an explanation when we come back to this code

let Some(profile_folder) = std::env::var_os("CODSPEED_PROFILE_FOLDER") else {
debug!("CODSPEED_PROFILE_FOLDER is not set, skipping subprocess detection");
return Ok(());
};

let profile_folder = PathBuf::from(profile_folder);

// Bail if any <pid>.out where <pid> > pid of the benchmark process exists in the profile
// folder, which indicates that the benchmark process spawned subprocesses.
for entry in std::fs::read_dir(profile_folder)? {
let entry = entry?;
let file_name = entry.file_name();
let file_name = file_name.to_string_lossy();

if let Some(stripped) = file_name.strip_suffix(".out") {
if let Ok(subprocess_pid) = stripped.parse::<u32>() {
if subprocess_pid > pid {
bail!(
"The codspeed CLI in CPU Simulation mode does not support measuring processes that spawn other processes yet.\n\n\
Please either:\n\
- Use the walltime measurement mode, or\n\
- Benchmark a process that does not create subprocesses"
)
}
}
}
}

Ok(())
}
11 changes: 5 additions & 6 deletions src/executor/helpers/harvest_perf_maps_for_pids.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
use crate::prelude::*;
use std::collections::HashSet;
use std::fs;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this function is async, and the BenchmarkData::save_to function is now also async we could switch to tokio::fs instead of std::fs here.

use std::path::{Path, PathBuf};

pub async fn harvest_perf_maps_for_pids(
profile_folder: &Path,
pids: &HashSet<libc::pid_t>,
) -> Result<()> {
pub async fn harvest_perf_maps_for_pids<I>(profile_folder: &Path, pids: I) -> Result<()>
where
I: IntoIterator<Item = libc::pid_t>,
{
let perf_maps = pids
.iter()
.into_iter()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dont forget to dedup the pids here, that's why we used HashSet

.map(|pid| format!("perf-{pid}.map"))
.map(|file_name| {
(
Expand Down
6 changes: 1 addition & 5 deletions src/executor/shared/fifo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ impl GenericFifo {
pub struct FifoBenchmarkData {
/// Name and version of the integration
pub integration: Option<(String, String)>,
pub bench_pids: HashSet<pid_t>,
}

pub struct RunnerFifo {
Expand Down Expand Up @@ -255,10 +254,7 @@ impl RunnerFifo {
);
let marker_result =
ExecutionTimestamps::new(&bench_order_by_timestamp, &markers);
let fifo_data = FifoBenchmarkData {
integration,
bench_pids,
};
let fifo_data = FifoBenchmarkData { integration };
return Ok((marker_result, fifo_data, exit_status));
}
Err(e) => return Err(anyhow::Error::from(e)),
Expand Down
2 changes: 1 addition & 1 deletion src/executor/valgrind/helpers/perf_maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ pub async fn harvest_perf_maps(profile_folder: &Path) -> Result<()> {
.filter_map(|pid| pid.parse().ok())
.collect::<HashSet<_>>();

harvest_perf_maps_for_pids(profile_folder, &pids).await
harvest_perf_maps_for_pids(profile_folder, pids.iter().copied()).await
}
12 changes: 6 additions & 6 deletions src/executor/wall_time/perf/jit_dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ use crate::{
};
use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord};
use runner_shared::unwind_data::UnwindData;
use std::{
collections::HashSet,
path::{Path, PathBuf},
};
use std::path::{Path, PathBuf};

struct JitDump {
path: PathBuf,
Expand Down Expand Up @@ -102,7 +99,10 @@ impl JitDump {
}

/// Converts all the `jit-<pid>.dump` into unwind data and copies it to the profile folder.
pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet<i32>) -> Result<()> {
pub async fn harvest_perf_jit_for_pids<I>(profile_folder: &Path, pids: I) -> Result<()>
where
I: IntoIterator<Item = libc::pid_t>,
{
for pid in pids {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment here, make sure we're iterating only over the dedup/unique pids. Otherwise we can easily run into issues where we duplicate work and/or overwrite files

let name = format!("jit-{pid}.dump");
let path = PathBuf::from("/tmp").join(&name);
Expand Down Expand Up @@ -131,7 +131,7 @@ pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet<i32
};

for module in unwind_data {
module.save_to(profile_folder, *pid as _)?;
module.save_to(profile_folder, pid)?;
}
}

Expand Down
40 changes: 27 additions & 13 deletions src/executor/wall_time/perf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ impl PerfRunner {
|| config.command.contains("uv")
|| config.command.contains("python")
{
// Max supported stack size is 64KiB, but this will increase the file size by a lot. In
// order to allow uploads and maintain accuracy, we limit this to 8KiB.
(UnwindingMode::Dwarf, Some(8 * 1024))
// Note that the higher the stack size, the larger the file, although it is mitigated
// by zstd compression
(UnwindingMode::Dwarf, Some(32 * 1024))
Comment on lines +108 to +110
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you test this on pytest-codspeed and check the final report sizes? Last time when we tried it, we had some issues there.

It would also be useful to know when we could run into those limits since we're saving 4x more data now.

} else {
// Default to dwarf unwinding since it works well with most binaries.
debug!("No call graph mode detected, defaulting to dwarf");
Expand Down Expand Up @@ -195,15 +195,10 @@ impl PerfRunner {
.get()
.expect("Benchmark order is not available");

// Harvest the perf maps generated by python. This will copy the perf
// maps from /tmp to the profile folder. We have to write our own perf
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
harvest_perf_maps_for_pids(profile_folder, &bench_data.fifo_data.bench_pids).await?;
harvest_perf_jit_for_pids(profile_folder, &bench_data.fifo_data.bench_pids).await?;

// Append perf maps, unwind info and other metadata
if let Err(BenchmarkDataSaveError::MissingIntegration) =
bench_data.save_to(profile_folder, &self.get_perf_file_path(profile_folder))
if let Err(BenchmarkDataSaveError::MissingIntegration) = bench_data
.save_to(profile_folder, &self.get_perf_file_path(profile_folder))
.await
{
warn!(
"Perf is enabled, but failed to detect benchmarks. If you wish to disable this warning, set CODSPEED_PERF_ENABLED=false"
Expand Down Expand Up @@ -273,16 +268,20 @@ pub struct BenchmarkData {
pub enum BenchmarkDataSaveError {
MissingIntegration,
FailedToParsePerfFile,
FailedToHarvestPerfMaps,
FailedToHarvestJitDumps,
}

impl BenchmarkData {
pub fn save_to<P: AsRef<std::path::Path>>(
pub async fn save_to<P: AsRef<std::path::Path>>(
&self,
path: P,
perf_file_path: P,
) -> Result<(), BenchmarkDataSaveError> {
self.marker_result.save_to(&path).unwrap();

let path_ref = path.as_ref();

debug!("Reading perf data from file for mmap extraction");
let MemmapRecordsOutput {
symbols_by_pid,
Expand All @@ -294,7 +293,22 @@ impl BenchmarkData {
})?
};

let path_ref = path.as_ref();
// Harvest the perf maps generated by python. This will copy the perf
// maps from /tmp to the profile folder. We have to write our own perf
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
harvest_perf_maps_for_pids(path_ref, symbols_by_pid.keys().copied())
.await
.map_err(|e| {
error!("Failed to harvest perf maps: {e}");
BenchmarkDataSaveError::FailedToHarvestPerfMaps
})?;
harvest_perf_jit_for_pids(path_ref, symbols_by_pid.keys().copied())
.await
.map_err(|e| {
error!("Failed to harvest jit dumps: {e}");
BenchmarkDataSaveError::FailedToHarvestJitDumps
})?;

debug!("Saving symbols addresses");
symbols_by_pid.par_iter().for_each(|(_, proc_sym)| {
proc_sym.save_to(path_ref).unwrap();
Expand Down
Loading