Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,22 @@ after each iteration with the default harness.

## Measuring memory usage

`--rss` option of `run_benchmarks.rb` allows you to measure RSS after benchmark iterations.
`--rss` option of `run_benchmarks.rb` allows you to measure RSS (resident set size).

```
./run_benchmarks.rb --rss
```

The harness samples RSS once per iteration across the benchmarking window (after
warmup), so the `RSS (MiB)` column reports the mean working set during measurement
along with its run-to-run variability (`mean ± stddev%`), and the `RSS` ratio is
computed from those means. The raw per-iteration samples are stored in the JSON
output under `rss_samples` (bytes).

For reference, the JSON output also keeps `rss`, a single snapshot taken after a
full GC at the end of the run (the retained set, a lower bound), and `maxrss`, the
process's lifetime peak from `getrusage`.

## Rendering a graph

`--graph` option of `run_benchmarks.rb` allows you to render benchmark results as a graph.
Expand Down
4 changes: 4 additions & 0 deletions harness-gc/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def gc_stat_heap_delta(before, after)

def run_benchmark(_num_itrs_hint, **, &block)
times = []
rss_samples = []
marking_times = []
sweeping_times = []
gc_counts = []
Expand Down Expand Up @@ -82,6 +83,7 @@ def run_benchmark(_num_itrs_hint, **, &block)
puts itr_str

times << time
rss_samples << get_rss
marking_times << mark_delta
sweeping_times << sweep_delta
gc_counts << count_delta
Expand All @@ -95,6 +97,8 @@ def run_benchmark(_num_itrs_hint, **, &block)
bench_range = WARMUP_ITRS..-1

extra = {}
rss_bench = rss_samples[bench_range] || []
extra["rss_samples"] = rss_bench unless rss_bench.empty?
extra["gc_marking_time_warmup"] = marking_times[warmup_range]
extra["gc_marking_time_bench"] = marking_times[bench_range]
extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range]
Expand Down
6 changes: 5 additions & 1 deletion harness-warmup/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ def print_stats(bench, elapsed)
def run_benchmark(num_itrs_hint, **)
start = monotonic_time
times = []
rss_samples = []

begin
time = Benchmark.realtime { yield }
times << time
rss_samples << get_rss

stats = Stats.new(times)
median = stats.median
Expand All @@ -63,7 +65,9 @@ def run_benchmark(num_itrs_hint, **)
end until times.size >= MIN_ITERS and elapsed >= MIN_TIME and mad <= threshold

warmup, bench = times[0...times.size/2], times[times.size/2..-1]
return_results(warmup, bench)
rss_bench = rss_samples[times.size/2..-1] || []
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
return_results(warmup, bench, **extra)

print_stats(bench, elapsed)
end
12 changes: 12 additions & 0 deletions harness/harness-common.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'rbconfig'
require_relative '../misc/stats'

# Ensure the ruby in PATH is the ruby running this, so we can safely shell out to other commands
ruby_in_path = `ruby -e 'print RbConfig.ruby'`
Expand Down Expand Up @@ -214,6 +215,17 @@ def return_results(warmup_iterations, bench_iterations, **extra)
puts "MAXRSS: %.1fMiB" % (maxrss / 1024.0 / 1024.0)
end

rss_samples = ruby_bench_results["rss_samples"]
if rss_samples.is_a?(Array) && !rss_samples.empty?
mib = rss_samples.map { |bytes| bytes / 1024.0 / 1024.0 }
stats = Stats.new(mib)
median = stats.median
mad = stats.median_absolute_deviation(median)
puts "RSS sampled (n=%d): median %.1fMiB \u00b1 %.1fMiB (MAD), range [%.1f, %.1f]MiB" % [
mib.size, median, mad, stats.min, stats.max
]
end

write_json_file(ruby_bench_results)
end

Expand Down
8 changes: 7 additions & 1 deletion harness/harness.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def realtime
# Takes a block as input
def run_benchmark(_num_itrs_hint, **, &block)
times = []
rss_samples = []
total_time = 0
num_itrs = 0
header = "itr: time"
Expand Down Expand Up @@ -75,10 +76,15 @@ def run_benchmark(_num_itrs_hint, **, &block)
# We internally save the time in seconds to avoid loss of precision
times << time
total_time += time
# Sample current RSS between iterations (outside the timed block) so we can
# report the working set across the window with variance.
rss_samples << get_rss
end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME

warmup, bench = times[0...WARMUP_ITRS], times[WARMUP_ITRS..-1]
return_results(warmup, bench)
rss_bench = rss_samples[WARMUP_ITRS..-1] || []
extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench }
return_results(warmup, bench, **extra)

non_warmups = times[WARMUP_ITRS..-1]
if non_warmups.size > 1
Expand Down
26 changes: 24 additions & 2 deletions lib/cpu_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,24 @@ def maximize_frequency
def check_pstate(turbo:)
# Override in subclasses
end

def keep_sudo_alive
@sudo_keepalive ||= Thread.new { sudo_keepalive_loop }
end

def sudo_keepalive_loop
loop do
sleep 50 # comfortably under sudo's default timestamp_timeout (15 min)
break unless system("sudo", "-n", "-v", out: File::NULL, err: File::NULL)
end
end

def sudo_restore(manual_hint, shell_cmd)
return if system("sudo", "-n", "sh", "-c", shell_cmd, err: File::NULL)

warn "\nCould not restore CPU setting automatically (sudo credentials expired)."
warn "Run this manually:\n #{manual_hint}"
end
end

# Intel CPU configuration
Expand All @@ -67,7 +85,9 @@ class IntelCPUConfig < CPUConfig
def disable_turbo_boost
# sudo requires the flag '-S' in order to take input from stdin
BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_DISABLED_VALUE} > #{NO_TURBO_PATH}'")
at_exit { BenchmarkRunner.check_call("sudo -S sh -c 'echo 0 > #{NO_TURBO_PATH}'", quiet: true) }
keep_sudo_alive
restore = "sudo sh -c 'echo 0 > #{NO_TURBO_PATH}'"
at_exit { sudo_restore(restore, "echo 0 > #{NO_TURBO_PATH}") }
end

def maximize_frequency
Expand Down Expand Up @@ -114,7 +134,9 @@ class AMDCPUConfig < CPUConfig
def disable_turbo_boost
# sudo requires the flag '-S' in order to take input from stdin
BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_DISABLED_VALUE} > #{BOOST_PATH}'")
at_exit { BenchmarkRunner.check_call("sudo -S sh -c 'echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}'", quiet: true) }
keep_sudo_alive
restore = "sudo sh -c 'echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}'"
at_exit { sudo_restore(restore, "echo #{TURBO_ENABLED_VALUE} > #{BOOST_PATH}") }
end

def maximize_frequency
Expand Down
52 changes: 46 additions & 6 deletions lib/results_table_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu
@include_pvalue = include_pvalue
@zjit_stats = zjit_stats || []
@include_gc = detect_gc_data(bench_data)
@rss_has_samples = @include_rss && detect_rss_samples(bench_data)
@base_name = executable_names.first
@other_names = executable_names[1..]
@bench_names = compute_bench_names
Expand Down Expand Up @@ -86,7 +87,7 @@ def build_format

@executable_names.each do |_name|
format << "%s"
format << "%.1f" if @include_rss
format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss
@zjit_stats.each { format << "%s" }
if @include_gc
format << "%s"
Expand Down Expand Up @@ -125,11 +126,15 @@ def build_row(bench_name)
t0s = extract_first_iteration_times(bench_name)
times_no_warmup = extract_benchmark_times(bench_name)
rsss = extract_rss_values(bench_name)
rss_series = @rss_has_samples ? extract_rss_series(bench_name) : nil

base_t0, *other_t0s = t0s
base_t, *other_ts = times_no_warmup
base_rss, *other_rsss = rsss

base_rss_cell = rss_cell(base_rss, rss_series && rss_series[0])
other_rss_cells = other_rsss.each_index.map { |i| rss_cell(other_rsss[i], rss_series && rss_series[i + 1]) }

# Extract zjit stats: { stat_name => [base_val, other1_val, ...] }
zjit_stat_values = @zjit_stats.map do |stat|
[stat, extract_zjit_stat(bench_name, stat)]
Expand All @@ -143,8 +148,8 @@ def build_row(bench_name)
end

row = [bench_name]
build_base_columns(row, base_t, base_rss, zjit_stat_values, 0, base_mark, base_sweep)
build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep)
build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
build_rss_ratio_columns(row, base_rss, other_rsss)
build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps)
Expand All @@ -162,10 +167,10 @@ def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_
end
end

def build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps)
def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps)
other_ts.each_with_index do |other_t, i|
row << format_time_with_stddev(other_t)
row << other_rsss[i] if @include_rss
row << other_rss_cells[i] if @include_rss
zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) }
if @include_gc
row << format_time_with_stddev(other_marks[i])
Expand Down Expand Up @@ -283,9 +288,38 @@ def extract_benchmark_times(bench_name)
end
end

# Numeric RSS (MiB) per executable, used for the RSS ratio. When per-iteration
# samples are present we use their mean so the ratio matches the displayed value.
def extract_rss_values(bench_name)
@executable_names.map do |name|
bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
data = bench_data_for(name, bench_name)
samples = data['rss_samples']
if samples.is_a?(Array) && !samples.empty?
mean(samples) / BYTES_TO_MIB
else
data['rss'] / BYTES_TO_MIB
end
end
end

# Per-iteration RSS samples (MiB) per executable, or nil when a run lacks them.
def extract_rss_series(bench_name)
@executable_names.map do |name|
samples = bench_data_for(name, bench_name)['rss_samples']
next nil unless samples.is_a?(Array) && !samples.empty?
samples.map { |bytes| bytes / BYTES_TO_MIB }
end
end

# Display value for an RSS column: mean ± stddev% when samples exist (matching
# the timing columns), otherwise a plain MiB value. Returns a Float when no run
# in the suite has samples, preserving the legacy "%.1f" formatting.
def rss_cell(mean_value, series)
return mean_value unless @rss_has_samples
if series && !series.empty?
format_time_with_stddev(series)
else
"%.1f" % mean_value
end
end

Expand All @@ -305,6 +339,12 @@ def detect_gc_data(bench_data)
bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } }
end

def detect_rss_samples(bench_data)
bench_data.values.any? do |benchmarks|
benchmarks.values.any? { |d| d.is_a?(Hash) && d['rss_samples'].is_a?(Array) && !d['rss_samples'].empty? }
end
end

def bench_data_for(name, bench_name)
@bench_data[name][bench_name]
end
Expand Down
Loading
Loading