Support python in profiling cli #235

not-matthias · 2026-02-05T17:29:07Z

nit: maybe add a quick comment that we need this so that python generates perf maps (technically it's in the commit message, but comments make it a bit more obvious)

not-matthias · 2026-02-05T17:59:43Z

can we have a comment here that explains a bit why and how this works (basically what we discussed in the DMs); don't spend too much time on it, just want to make sure we have an explanation when we come back to this code

not-matthias · 2026-02-05T17:25:05Z

Since this function is async, and the BenchmarkData::save_to function is now also async we could switch to tokio::fs instead of std::fs here.

not-matthias · 2026-02-05T17:18:34Z

Dont forget to dedup the pids here, that's why we used HashSet

not-matthias · 2026-02-05T17:22:54Z

same comment here, make sure we're iterating only over the dedup/unique pids. Otherwise we can easily run into issues where we duplicate work and/or overwrite files

not-matthias · 2026-02-05T17:30:57Z

Can you test this on pytest-codspeed and check the final report sizes? Last time when we tried it, we had some issues there.

It would also be useful to know when we could run into those limits since we're saving 4x more data now.

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,8 +7,6 @@ @@
     //
     // Environment variables:
     //   CODSPEED_BENCH_URI - The benchmark URI to report (required)
-    //   CODSPEED_PRELOAD_LOCK - Set by the first process to prevent child processes
-    //                          from re-initializing instrumentation
     #include <stdlib.h>
     #include <unistd.h>
@@ Expand All / @@ -22,8 +20,6 @@ @@
     #define RUNNING_ON_VALGRIND 0
     #endif
-    static const char *LOCK_ENV = "CODSPEED_PRELOAD_LOCK";
     // These constants are defined by the build script (build.rs) via -D flags
     #ifndef CODSPEED_URI_ENV
     #error "CODSPEED_URI_ENV must be defined by the build system"
@@ Expand Down Expand Up @@
         return;
       }
-      // Check if another process already owns the instrumentation
-      if (getenv(LOCK_ENV)) {
-        return;
-      }
-      // Set the lock to prevent child processes from initializing
-      setenv(LOCK_ENV, "1", 1);
       g_bench_uri = getenv(URI_ENV);
       if (!g_bench_uri) {
         return;
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ use crate::BenchmarkCommand; @@
     use crate::constants;
     use crate::uri;
     use instrument_hooks_bindings::InstrumentHooks;
+    use std::path::PathBuf;
     use std::process::Command;
     mod ld_preload_check;
@@ Expand Down Expand Up @@
             cmd.args(&benchmark_cmd.command[1..]);
             // Use LD_PRELOAD to inject instrumentation into the child process
             cmd.env("LD_PRELOAD", preload_lib_path);
+            cmd.env("PYTHONPERFSUPPORT", "1");
             cmd.env(constants::URI_ENV, &name_and_uri.uri);
-            let status = cmd.status().context("Failed to execute command")?;
+            let mut child = cmd.spawn().context("Failed to spawn command")?;
+            let status = child.wait().context("Failed to execute command")?;
+            bail_if_command_spawned_subprocesses_under_valgrind(child.id())?;
             if !status.success() {
                 bail!("Command exited with non-zero status: {status}");
@@ Expand All @@
         Ok(())
     }
+    fn bail_if_command_spawned_subprocesses_under_valgrind(pid: u32) -> Result<()> {
+        let Some(profile_folder) = std::env::var_os("CODSPEED_PROFILE_FOLDER") else {
+            debug!("CODSPEED_PROFILE_FOLDER is not set, skipping subprocess detection");
+            return Ok(());
+        };
+        let profile_folder = PathBuf::from(profile_folder);
+        // Bail if any <pid>.out where <pid> > pid of the benchmark process exists in the profile
+        // folder, which indicates that the benchmark process spawned subprocesses.
+        for entry in std::fs::read_dir(profile_folder)? {
+            let entry = entry?;
+            let file_name = entry.file_name();
+            let file_name = file_name.to_string_lossy();
+            if let Some(stripped) = file_name.strip_suffix(".out") {
+                if let Ok(subprocess_pid) = stripped.parse::<u32>() {
+                    if subprocess_pid > pid {
+                        bail!(
+                            "The codspeed CLI in CPU Simulation mode does not support measuring processes that spawn other processes yet.\n\n\
+                             Please either:\n\
+                             - Use the walltime measurement mode, or\n\
+                             - Benchmark a process that does not create subprocesses"
+                        )
+                    }
+                }
+            }
+        }
+        Ok(())
+    }

-Original file line number
+Diff line change
@@ -1,14 +1,13 @@
     use crate::prelude::*;
-    use std::collections::HashSet;
     use std::fs;
     use std::path::{Path, PathBuf};
-    pub async fn harvest_perf_maps_for_pids(
-        profile_folder: &Path,
-        pids: &HashSet<libc::pid_t>,
-    ) -> Result<()> {
+    pub async fn harvest_perf_maps_for_pids<I>(profile_folder: &Path, pids: I) -> Result<()>
+    where
+        I: IntoIterator<Item = libc::pid_t>,
+    {
         let perf_maps = pids
-            .iter()
+            .into_iter()
             .map(|pid| format!("perf-{pid}.map"))
             .map(|file_name| {
                 (
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -69,7 +69,6 @@ impl GenericFifo { @@
     pub struct FifoBenchmarkData {
         /// Name and version of the integration
         pub integration: Option<(String, String)>,
-        pub bench_pids: HashSet<pid_t>,
     }
     pub struct RunnerFifo {
@@ Expand Down Expand Up / @@ -255,10 +254,7 @@ impl RunnerFifo { @@
                         );
                         let marker_result =
                             ExecutionTimestamps::new(&bench_order_by_timestamp, &markers);
-                        let fifo_data = FifoBenchmarkData {
-                            integration,
-                            bench_pids,
-                        };
+                        let fifo_data = FifoBenchmarkData { integration };
                         return Ok((marker_result, fifo_data, exit_status));
                     }
                     Err(e) => return Err(anyhow::Error::from(e)),
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
             .filter_map(|pid| pid.parse().ok())
             .collect::<HashSet<_>>();
-        harvest_perf_maps_for_pids(profile_folder, &pids).await
+        harvest_perf_maps_for_pids(profile_folder, pids.iter().copied()).await
     }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support python in profiling cli #235

Uh oh!

Diff view

Diff view

There are no files selected for viewing

not-matthias Feb 5, 2026

Uh oh!

not-matthias Feb 5, 2026

Uh oh!

not-matthias Feb 5, 2026

Uh oh!

not-matthias Feb 5, 2026

Uh oh!

not-matthias Feb 5, 2026

Uh oh!

not-matthias Feb 5, 2026

Uh oh!

Uh oh!

-Original file line number
+Diff line change
@@ Expand Up / @@ -4,10 +4,7 @@ use crate::{ @@
     };
     use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord};
     use runner_shared::unwind_data::UnwindData;
-    use std::{
-        collections::HashSet,
-        path::{Path, PathBuf},
-    };
+    use std::path::{Path, PathBuf};
     struct JitDump {
         path: PathBuf,
@@ Expand Down Expand Up / @@ -102,7 +99,10 @@ impl JitDump { @@
     }
     /// Converts all the `jit-<pid>.dump` into unwind data and copies it to the profile folder.
-    pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet<i32>) -> Result<()> {
+    pub async fn harvest_perf_jit_for_pids<I>(profile_folder: &Path, pids: I) -> Result<()>
+    where
+        I: IntoIterator<Item = libc::pid_t>,
+    {
         for pid in pids {
             let name = format!("jit-{pid}.dump");
             let path = PathBuf::from("/tmp").join(&name);
@@ Expand Down Expand Up @@
             };
             for module in unwind_data {
-                module.save_to(profile_folder, *pid as _)?;
+                module.save_to(profile_folder, pid)?;
             }
         }
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -105,9 +105,9 @@ impl PerfRunner { @@
                 || config.command.contains("uv")
                 || config.command.contains("python")
             {
-                // Max supported stack size is 64KiB, but this will increase the file size by a lot. In
-                // order to allow uploads and maintain accuracy, we limit this to 8KiB.
-                (UnwindingMode::Dwarf, Some(8 * 1024))
+                // Note that the higher the stack size, the larger the file, although it is mitigated
+                // by zstd compression
+                (UnwindingMode::Dwarf, Some(32 * 1024))
             } else {
                 // Default to dwarf unwinding since it works well with most binaries.
                 debug!("No call graph mode detected, defaulting to dwarf");
@@ Expand Down Expand Up / @@ -195,15 +195,10 @@ impl PerfRunner { @@
                 .get()
                 .expect("Benchmark order is not available");
-            // Harvest the perf maps generated by python. This will copy the perf
-            // maps from /tmp to the profile folder. We have to write our own perf
-            // maps to these files AFTERWARDS, otherwise it'll be overwritten!
-            harvest_perf_maps_for_pids(profile_folder, &bench_data.fifo_data.bench_pids).await?;
-            harvest_perf_jit_for_pids(profile_folder, &bench_data.fifo_data.bench_pids).await?;
             // Append perf maps, unwind info and other metadata
-            if let Err(BenchmarkDataSaveError::MissingIntegration) =
-                bench_data.save_to(profile_folder, &self.get_perf_file_path(profile_folder))
+            if let Err(BenchmarkDataSaveError::MissingIntegration) = bench_data
+                .save_to(profile_folder, &self.get_perf_file_path(profile_folder))
+                .await
             {
                 warn!(
                     "Perf is enabled, but failed to detect benchmarks. If you wish to disable this warning, set CODSPEED_PERF_ENABLED=false"
@@ Expand Down Expand Up / @@ -273,16 +268,20 @@ pub struct BenchmarkData { @@
     pub enum BenchmarkDataSaveError {
         MissingIntegration,
         FailedToParsePerfFile,
+        FailedToHarvestPerfMaps,
+        FailedToHarvestJitDumps,
     }
     impl BenchmarkData {
-        pub fn save_to<P: AsRef<std::path::Path>>(
+        pub async fn save_to<P: AsRef<std::path::Path>>(
             &self,
             path: P,
             perf_file_path: P,
         ) -> Result<(), BenchmarkDataSaveError> {
             self.marker_result.save_to(&path).unwrap();
+            let path_ref = path.as_ref();
             debug!("Reading perf data from file for mmap extraction");
             let MemmapRecordsOutput {
                 symbols_by_pid,
@@ Expand All / @@ -294,7 +293,22 @@ impl BenchmarkData { @@
                 })?
             };
-            let path_ref = path.as_ref();
+            // Harvest the perf maps generated by python. This will copy the perf
+            // maps from /tmp to the profile folder. We have to write our own perf
+            // maps to these files AFTERWARDS, otherwise it'll be overwritten!
+            harvest_perf_maps_for_pids(path_ref, symbols_by_pid.keys().copied())
+                .await
+                .map_err(|e| {
+                    error!("Failed to harvest perf maps: {e}");
+                    BenchmarkDataSaveError::FailedToHarvestPerfMaps
+                })?;
+            harvest_perf_jit_for_pids(path_ref, symbols_by_pid.keys().copied())
+                .await
+                .map_err(|e| {
+                    error!("Failed to harvest jit dumps: {e}");
+                    BenchmarkDataSaveError::FailedToHarvestJitDumps
+                })?;
             debug!("Saving symbols addresses");
             symbols_by_pid.par_iter().for_each(|(_, proc_sym)| {
                 proc_sym.save_to(path_ref).unwrap();
@@ Expand Down @@

Support python in profiling cli #235

Are you sure you want to change the base?

Uh oh!

Support python in profiling cli #235

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

not-matthias Feb 5, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!