ruby · eightbitraptor · Jun 10, 2026 · Jun 10, 2026
diff --git a/README.md b/README.md
@@ -284,12 +284,22 @@ after each iteration with the default harness.
 
 ## Measuring memory usage
 
-`--rss` option of `run_benchmarks.rb` allows you to measure RSS after benchmark iterations.
+`--rss` option of `run_benchmarks.rb` allows you to measure RSS (resident set size).
 
 ```
 ./run_benchmarks.rb --rss
 ```
 
+The harness samples RSS once per iteration across the benchmarking window (after
+warmup), so the `RSS (MiB)` column reports the mean working set during measurement
+along with its run-to-run variability (`mean ± stddev%`), and the `RSS` ratio is
+computed from those means. The raw per-iteration samples are stored in the JSON
+output under `rss_samples` (bytes).
+
+For reference, the JSON output also keeps `rss`, a single snapshot taken after a
+full GC at the end of the run (the retained set, a lower bound), and `maxrss`, the
+process's lifetime peak from `getrusage`.
+
 ## Rendering a graph
 
 `--graph` option of `run_benchmarks.rb` allows you to render benchmark results as a graph.

diff --git a/harness-gc/harness.rb b/harness-gc/harness.rb
@@ -33,6 +33,7 @@ def gc_stat_heap_delta(before, after)
 
 def run_benchmark(_num_itrs_hint, **, &block)
   times = []
+  rss_samples = []
   marking_times = []
   sweeping_times = []
   gc_counts = []
@@ -82,6 +83,7 @@ def run_benchmark(_num_itrs_hint, **, &block)
     puts itr_str
 
     times << time
+    rss_samples << get_rss
     marking_times << mark_delta
     sweeping_times << sweep_delta
     gc_counts << count_delta
@@ -95,6 +97,8 @@ def run_benchmark(_num_itrs_hint, **, &block)
   bench_range = WARMUP_ITRS..-1
 
   extra = {}
+  rss_bench = rss_samples[bench_range] || []
+  extra["rss_samples"] = rss_bench unless rss_bench.empty?
   extra["gc_marking_time_warmup"] = marking_times[warmup_range]
   extra["gc_marking_time_bench"] = marking_times[bench_range]
   extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range]

diff --git a/harness-warmup/harness.rb b/harness-warmup/harness.rb
@@ -36,10 +36,12 @@ def print_stats(bench, elapsed)
 def run_benchmark(num_itrs_hint, **)
   start = monotonic_time
   times = []
+  rss_samples = []
 
   begin
     time = Benchmark.realtime { yield }
     times << time
+    rss_samples << get_rss
 
     stats = Stats.new(times)
     median = stats.median
@@ -63,7 +65,9 @@ def run_benchmark(num_itrs_hint, **)
   end until times.size >= MIN_ITERS and elapsed >= MIN_TIME and mad <= threshold
 
   warmup, bench = times[0...times.size/2], times[times.size/2..-1]
-  return_results(warmup, bench)
+  rss_bench = rss_samples[times.size/2..-1] || []
+  extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
+  return_results(warmup, bench, **extra)
 
   print_stats(bench, elapsed)
 end
diff --git a/harness/harness-common.rb b/harness/harness-common.rb
@@ -1,4 +1,5 @@
 require 'rbconfig'
+require_relative '../misc/stats'
 
 # Ensure the ruby in PATH is the ruby running this, so we can safely shell out to other commands
 ruby_in_path = `ruby -e 'print RbConfig.ruby'`
@@ -214,6 +215,17 @@ def return_results(warmup_iterations, bench_iterations, **extra)
     puts "MAXRSS: %.1fMiB" % (maxrss / 1024.0 / 1024.0)
   end
 
+  rss_samples = ruby_bench_results["rss_samples"]
+  if rss_samples.is_a?(Array) && !rss_samples.empty?
+    mib = rss_samples.map { |bytes| bytes / 1024.0 / 1024.0 }
+    stats = Stats.new(mib)
+    median = stats.median
+    mad = stats.median_absolute_deviation(median)
+    puts "RSS sampled (n=%d): median %.1fMiB \u00b1 %.1fMiB (MAD), range [%.1f, %.1f]MiB" % [
+      mib.size, median, mad, stats.min, stats.max
+    ]
+  end
+
   write_json_file(ruby_bench_results)
 end
 

diff --git a/harness/harness.rb b/harness/harness.rb
@@ -34,6 +34,7 @@ def realtime
 # Takes a block as input
 def run_benchmark(_num_itrs_hint, **, &block)
   times = []
+  rss_samples = []
   total_time = 0
   num_itrs = 0
   header = "itr:   time"
@@ -75,10 +76,15 @@ def run_benchmark(_num_itrs_hint, **, &block)
     # We internally save the time in seconds to avoid loss of precision
     times << time
     total_time += time
+    # Sample current RSS between iterations (outside the timed block) so we can
+    # report the working set across the window with variance.
+    rss_samples << get_rss
   end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME
 
   warmup, bench = times[0...WARMUP_ITRS], times[WARMUP_ITRS..-1]
-  return_results(warmup, bench)
+  rss_bench = rss_samples[WARMUP_ITRS..-1] || []
+  extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
+  return_results(warmup, bench, **extra)
 
   non_warmups = times[WARMUP_ITRS..-1]
   if non_warmups.size > 1

diff --git a/lib/cpu_config.rb b/lib/cpu_config.rb
@@ -52,6 +52,24 @@ def maximize_frequency
   def check_pstate(turbo:)
     # Override in subclasses
   end
+
+  def keep_sudo_alive
+    @sudo_keepalive ||= Thread.new { sudo_keepalive_loop }
+  end
+
+  def sudo_keepalive_loop
+    loop do
+      sleep 50 # comfortably under sudo's default timestamp_timeout (15 min)
+      break unless system("sudo", "-n", "-v", out: File::NULL, err: File::NULL)
+    end
+  end
+
+  def sudo_restore(manual_hint, shell_cmd)
+    return if system("sudo", "-n", "sh", "-c", shell_cmd, err: File::NULL)
+
+    warn "\nCould not restore CPU setting automatically (sudo credentials expired)."
+    warn "Run this manually:\n  #{manual_hint}"
+  end
 end
 
 # Intel CPU configuration
@@ -67,7 +85,9 @@ class IntelCPUConfig < CPUConfig
   def disable_turbo_boost
     # sudo requires the flag '-S' in order to take input from stdin
     BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_DISABLED_VALUE} > #{NO_TURBO_PATH}'")
-    at_exit { BenchmarkRunner.check_call("sudo -S sh -c 'echo 0 > #{NO_TURBO_PATH}'", quiet: true) }
+    keep_sudo_alive
+    restore = "sudo sh -c 'echo 0 > #{NO_TURBO_PATH}'"
+    at_exit { sudo_restore(restore, "echo 0 > #{NO_TURBO_PATH}") }
   end
 
   def maximize_frequency
@@ -114,7 +134,9 @@ class AMDCPUConfig < CPUConfig
   def disable_turbo_boost
     # sudo requires the flag '-S' in order to take input from stdin
     BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_DISABLED_VALUE} > #{BOOST_PATH}'")
-    at_exit { BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}'", quiet: true) }
+    keep_sudo_alive
+    restore = "sudo sh -c 'echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}'"
+    at_exit { sudo_restore(restore, "echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}") }
   end
 
   def maximize_frequency

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
@@ -12,6 +12,7 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu
     @include_pvalue = include_pvalue
     @zjit_stats = zjit_stats || []
     @include_gc = detect_gc_data(bench_data)
+    @rss_has_samples = @include_rss && detect_rss_samples(bench_data)
     @base_name = executable_names.first
     @other_names = executable_names[1..]
     @bench_names = compute_bench_names
@@ -86,7 +87,7 @@ def build_format
 
     @executable_names.each do |_name|
       format << "%s"
-      format << "%.1f" if @include_rss
+      format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
       @zjit_stats.each { format << "%s" }
       if @include_gc
         format << "%s"
@@ -125,11 +126,15 @@ def build_row(bench_name)
     t0s = extract_first_iteration_times(bench_name)
     times_no_warmup = extract_benchmark_times(bench_name)
     rsss = extract_rss_values(bench_name)
+    rss_series = @rss_has_samples ? extract_rss_series(bench_name) : nil
 
     base_t0, *other_t0s = t0s
     base_t, *other_ts = times_no_warmup
     base_rss, *other_rsss = rsss
 
+    base_rss_cell = rss_cell(base_rss, rss_series && rss_series[0])
+    other_rss_cells = other_rsss.each_index.map { |i| rss_cell(other_rsss[i], rss_series && rss_series[i + 1]) }
+
     # Extract zjit stats: { stat_name => [base_val, other1_val, ...] }
     zjit_stat_values = @zjit_stats.map do |stat|
       [stat, extract_zjit_stat(bench_name, stat)]
@@ -143,8 +148,8 @@ def build_row(bench_name)
     end
 
     row = [bench_name]
-    build_base_columns(row, base_t, base_rss, zjit_stat_values, 0, base_mark, base_sweep)
-    build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
+    build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
+    build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
     build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
     build_rss_ratio_columns(row, base_rss, other_rsss)
     build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
@@ -162,10 +167,10 @@ def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_
     end
   end
 
-  def build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
+  def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
     other_ts.each_with_index do |other_t, i|
       row << format_time_with_stddev(other_t)
-      row << other_rsss[i] if @include_rss
+      row << other_rss_cells[i] if @include_rss
       zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
       if @include_gc
         row << format_time_with_stddev(other_marks[i])
@@ -283,9 +288,38 @@ def extract_benchmark_times(bench_name)
     end
   end
 
+  # Numeric RSS (MiB) per executable, used for the RSS ratio. When per-iteration
+  # samples are present we use their mean so the ratio matches the displayed value.
   def extract_rss_values(bench_name)
     @executable_names.map do |name|
-      bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
+      data = bench_data_for(name, bench_name)
+      samples = data['rss_samples']
+      if samples.is_a?(Array) && !samples.empty?
+        mean(samples) / BYTES_TO_MIB
+      else
+        data['rss'] / BYTES_TO_MIB
+      end
+    end
+  end
+
+  # Per-iteration RSS samples (MiB) per executable, or nil when a run lacks them.
+  def extract_rss_series(bench_name)
+    @executable_names.map do |name|
+      samples = bench_data_for(name, bench_name)['rss_samples']
+      next nil unless samples.is_a?(Array) && !samples.empty?
+      samples.map { |bytes| bytes / BYTES_TO_MIB }
+    end
+  end
+
+  # Display value for an RSS column: mean ± stddev% when samples exist (matching
+  # the timing columns), otherwise a plain MiB value. Returns a Float when no run
+  # in the suite has samples, preserving the legacy "%.1f" formatting.
+  def rss_cell(mean_value, series)
+    return mean_value unless @rss_has_samples
+    if series && !series.empty?
+      format_time_with_stddev(series)
+    else
+      "%.1f" % mean_value
     end
   end
 
@@ -305,6 +339,12 @@ def detect_gc_data(bench_data)
     bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } }
   end
 
+  def detect_rss_samples(bench_data)
+    bench_data.values.any? do |benchmarks|
+      benchmarks.values.any? { |d| d.is_a?(Hash) && d['rss_samples'].is_a?(Array) && !d['rss_samples'].empty? }
+    end
+  end
+
   def bench_data_for(name, bench_name)
     @bench_data[name][bench_name]
   end