diff --git a/README.md b/README.md index bba0d1ef..a36d0955 100644 --- a/README.md +++ b/README.md @@ -284,12 +284,22 @@ after each iteration with the default harness. ## Measuring memory usage -`--rss` option of `run_benchmarks.rb` allows you to measure RSS after benchmark iterations. +`--rss` option of `run_benchmarks.rb` allows you to measure RSS (resident set size). ``` ./run_benchmarks.rb --rss ``` +The harness samples RSS once per iteration across the benchmarking window (after +warmup), so the `RSS (MiB)` column reports the mean working set during measurement +along with its run-to-run variability (`mean ± stddev%`), and the `RSS` ratio is +computed from those means. The raw per-iteration samples are stored in the JSON +output under `rss_samples` (bytes). + +For reference, the JSON output also keeps `rss`, a single snapshot taken after a +full GC at the end of the run (the retained set, a lower bound), and `maxrss`, the +process's lifetime peak from `getrusage`. + ## Rendering a graph `--graph` option of `run_benchmarks.rb` allows you to render benchmark results as a graph. diff --git a/harness-gc/harness.rb b/harness-gc/harness.rb index 70c113a9..9398fdcf 100644 --- a/harness-gc/harness.rb +++ b/harness-gc/harness.rb @@ -33,6 +33,7 @@ def gc_stat_heap_delta(before, after) def run_benchmark(_num_itrs_hint, **, &block) times = [] + rss_samples = [] marking_times = [] sweeping_times = [] gc_counts = [] @@ -82,6 +83,7 @@ def run_benchmark(_num_itrs_hint, **, &block) puts itr_str times << time + rss_samples << get_rss marking_times << mark_delta sweeping_times << sweep_delta gc_counts << count_delta @@ -95,6 +97,8 @@ def run_benchmark(_num_itrs_hint, **, &block) bench_range = WARMUP_ITRS..-1 extra = {} + rss_bench = rss_samples[bench_range] || [] + extra["rss_samples"] = rss_bench unless rss_bench.empty? extra["gc_marking_time_warmup"] = marking_times[warmup_range] extra["gc_marking_time_bench"] = marking_times[bench_range] extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range] diff --git a/harness-warmup/harness.rb b/harness-warmup/harness.rb index 21045227..5f1050cc 100644 --- a/harness-warmup/harness.rb +++ b/harness-warmup/harness.rb @@ -36,10 +36,12 @@ def print_stats(bench, elapsed) def run_benchmark(num_itrs_hint, **) start = monotonic_time times = [] + rss_samples = [] begin time = Benchmark.realtime { yield } times << time + rss_samples << get_rss stats = Stats.new(times) median = stats.median @@ -63,7 +65,9 @@ def run_benchmark(num_itrs_hint, **) end until times.size >= MIN_ITERS and elapsed >= MIN_TIME and mad <= threshold warmup, bench = times[0...times.size/2], times[times.size/2..-1] - return_results(warmup, bench) + rss_bench = rss_samples[times.size/2..-1] || [] + extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench } + return_results(warmup, bench, **extra) print_stats(bench, elapsed) end diff --git a/harness/harness-common.rb b/harness/harness-common.rb index 096036d0..31035310 100644 --- a/harness/harness-common.rb +++ b/harness/harness-common.rb @@ -1,4 +1,5 @@ require 'rbconfig' +require_relative '../misc/stats' # Ensure the ruby in PATH is the ruby running this, so we can safely shell out to other commands ruby_in_path = `ruby -e 'print RbConfig.ruby'` @@ -214,6 +215,17 @@ def return_results(warmup_iterations, bench_iterations, **extra) puts "MAXRSS: %.1fMiB" % (maxrss / 1024.0 / 1024.0) end + rss_samples = ruby_bench_results["rss_samples"] + if rss_samples.is_a?(Array) && !rss_samples.empty? + mib = rss_samples.map { |bytes| bytes / 1024.0 / 1024.0 } + stats = Stats.new(mib) + median = stats.median + mad = stats.median_absolute_deviation(median) + puts "RSS sampled (n=%d): median %.1fMiB \u00b1 %.1fMiB (MAD), range [%.1f, %.1f]MiB" % [ + mib.size, median, mad, stats.min, stats.max + ] + end + write_json_file(ruby_bench_results) end diff --git a/harness/harness.rb b/harness/harness.rb index 7c3b79a4..2fac8c3f 100644 --- a/harness/harness.rb +++ b/harness/harness.rb @@ -34,6 +34,7 @@ def realtime # Takes a block as input def run_benchmark(_num_itrs_hint, **, &block) times = [] + rss_samples = [] total_time = 0 num_itrs = 0 header = "itr: time" @@ -75,10 +76,15 @@ def run_benchmark(_num_itrs_hint, **, &block) # We internally save the time in seconds to avoid loss of precision times << time total_time += time + # Sample current RSS between iterations (outside the timed block) so we can + # report the working set across the window with variance. + rss_samples << get_rss end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME warmup, bench = times[0...WARMUP_ITRS], times[WARMUP_ITRS..-1] - return_results(warmup, bench) + rss_bench = rss_samples[WARMUP_ITRS..-1] || [] + extra = rss_bench.empty? ? {} : { "rss_samples" => rss_bench } + return_results(warmup, bench, **extra) non_warmups = times[WARMUP_ITRS..-1] if non_warmups.size > 1 diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb index 32556016..d7ac004d 100644 --- a/lib/results_table_builder.rb +++ b/lib/results_table_builder.rb @@ -12,6 +12,7 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu @include_pvalue = include_pvalue @zjit_stats = zjit_stats || [] @include_gc = detect_gc_data(bench_data) + @rss_has_samples = @include_rss && detect_rss_samples(bench_data) @base_name = executable_names.first @other_names = executable_names[1..] @bench_names = compute_bench_names @@ -86,7 +87,7 @@ def build_format @executable_names.each do |_name| format << "%s" - format << "%.1f" if @include_rss + format << (@rss_has_samples ? "%s" : "%.1f") if @include_rss @zjit_stats.each { format << "%s" } if @include_gc format << "%s" @@ -125,11 +126,15 @@ def build_row(bench_name) t0s = extract_first_iteration_times(bench_name) times_no_warmup = extract_benchmark_times(bench_name) rsss = extract_rss_values(bench_name) + rss_series = @rss_has_samples ? extract_rss_series(bench_name) : nil base_t0, *other_t0s = t0s base_t, *other_ts = times_no_warmup base_rss, *other_rsss = rsss + base_rss_cell = rss_cell(base_rss, rss_series && rss_series[0]) + other_rss_cells = other_rsss.each_index.map { |i| rss_cell(other_rsss[i], rss_series && rss_series[i + 1]) } + # Extract zjit stats: { stat_name => [base_val, other1_val, ...] } zjit_stat_values = @zjit_stats.map do |stat| [stat, extract_zjit_stat(bench_name, stat)] @@ -143,8 +148,8 @@ def build_row(bench_name) end row = [bench_name] - build_base_columns(row, base_t, base_rss, zjit_stat_values, 0, base_mark, base_sweep) - build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps) + build_base_columns(row, base_t, base_rss_cell, zjit_stat_values, 0, base_mark, base_sweep) + build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps) build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts) build_rss_ratio_columns(row, base_rss, other_rsss) build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps) @@ -162,10 +167,10 @@ def build_base_columns(row, base_t, base_rss, zjit_stat_values, exe_index, base_ end end - def build_comparison_columns(row, other_ts, other_rsss, zjit_stat_values, other_marks, other_sweeps) + def build_comparison_columns(row, other_ts, other_rss_cells, zjit_stat_values, other_marks, other_sweeps) other_ts.each_with_index do |other_t, i| row << format_time_with_stddev(other_t) - row << other_rsss[i] if @include_rss + row << other_rss_cells[i] if @include_rss zjit_stat_values.each { |_stat, values| row << format_stat(values[i + 1]) } if @include_gc row << format_time_with_stddev(other_marks[i]) @@ -283,9 +288,38 @@ def extract_benchmark_times(bench_name) end end + # Numeric RSS (MiB) per executable, used for the RSS ratio. When per-iteration + # samples are present we use their mean so the ratio matches the displayed value. def extract_rss_values(bench_name) @executable_names.map do |name| - bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB + data = bench_data_for(name, bench_name) + samples = data['rss_samples'] + if samples.is_a?(Array) && !samples.empty? + mean(samples) / BYTES_TO_MIB + else + data['rss'] / BYTES_TO_MIB + end + end + end + + # Per-iteration RSS samples (MiB) per executable, or nil when a run lacks them. + def extract_rss_series(bench_name) + @executable_names.map do |name| + samples = bench_data_for(name, bench_name)['rss_samples'] + next nil unless samples.is_a?(Array) && !samples.empty? + samples.map { |bytes| bytes / BYTES_TO_MIB } + end + end + + # Display value for an RSS column: mean ± stddev% when samples exist (matching + # the timing columns), otherwise a plain MiB value. Returns a Float when no run + # in the suite has samples, preserving the legacy "%.1f" formatting. + def rss_cell(mean_value, series) + return mean_value unless @rss_has_samples + if series && !series.empty? + format_time_with_stddev(series) + else + "%.1f" % mean_value end end @@ -305,6 +339,12 @@ def detect_gc_data(bench_data) bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } } end + def detect_rss_samples(bench_data) + bench_data.values.any? do |benchmarks| + benchmarks.values.any? { |d| d.is_a?(Hash) && d['rss_samples'].is_a?(Array) && !d['rss_samples'].empty? } + end + end + def bench_data_for(name, bench_name) @bench_data[name][bench_name] end diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb index a79d5288..303b84f5 100644 --- a/test/results_table_builder_test.rb +++ b/test/results_table_builder_test.rb @@ -549,4 +549,125 @@ assert_equal 'fib', bench_names[4] end end + + describe 'RSS sampling (rss_samples)' do + MIB = 1024 * 1024 + + it 'shows mean ± stddev% and uses %s format when samples are present' do + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1, 0.1, 0.1], + 'rss' => 10 * MIB, + 'rss_samples' => [9 * MIB, 10 * MIB, 11 * MIB] + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby'], + bench_data: bench_data, + include_rss: true + ) + + table, format = builder.build + + assert_equal ['bench', 'ruby (ms)', 'RSS (MiB)'], table[0] + assert_equal ['%s', '%s', '%s'], format + + m = table[1][2].match(/\A(\d+\.\d) ± (\d+\.\d)%\z/) + assert m, "expected mean ± stddev%, got #{table[1][2].inspect}" + assert_in_delta 10.0, m[1].to_f, 0.1 + assert_operator m[2].to_f, :>, 0.0 + end + + it 'computes the RSS ratio from the mean of samples' do + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1, 0.1, 0.1], + 'rss' => 99 * MIB, # should be ignored in favour of samples + 'rss_samples' => [10 * MIB, 10 * MIB, 10 * MIB] + } + }, + 'ruby-yjit' => { + 'fib' => { + 'warmup' => [0.05], + 'bench' => [0.05, 0.05, 0.05], + 'rss' => 1 * MIB, + 'rss_samples' => [18 * MIB, 20 * MIB, 22 * MIB] + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby', 'ruby-yjit'], + bench_data: bench_data, + include_rss: true + ) + + table, _format = builder.build + + # ratio = mean(ruby samples) / mean(yjit samples) = 10 / 20 = 0.5 + assert_in_delta 0.5, table[1].last, 0.001 + end + + it 'falls back to a plain MiB value for runs without samples in a mixed suite' do + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1, 0.1], + 'rss' => 10 * MIB, + 'rss_samples' => [10 * MIB, 10 * MIB] + }, + 'loop' => { + 'warmup' => [0.2], + 'bench' => [0.2, 0.2], + 'rss' => 15 * MIB + # no rss_samples for this benchmark + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby'], + bench_data: bench_data, + include_rss: true + ) + + table, format = builder.build + + # Suite has samples somewhere, so the RSS column is string-formatted. + assert_equal ['%s', '%s', '%s'], format + + rows = table[1..].each_with_object({}) { |row, h| h[row[0]] = row } + assert_match(/\A\d+\.\d ± \d+\.\d%\z/, rows['fib'][2]) + # The sample-less benchmark still renders as a bare MiB value. + assert_equal '15.0', rows['loop'][2] + end + + it 'keeps %.1f formatting when no run in the suite has samples' do + bench_data = { + 'ruby' => { + 'fib' => { + 'warmup' => [0.1], + 'bench' => [0.1], + 'rss' => 10 * MIB + } + } + } + + builder = ResultsTableBuilder.new( + executable_names: ['ruby'], + bench_data: bench_data, + include_rss: true + ) + + _table, format = builder.build + assert_equal ['%s', '%s', '%.1f'], format + end + end end