From 511d34a079eab2a2ae53e7a3908bae9165f16a3d Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Wed, 24 Jun 2026 08:48:33 +0200 Subject: [PATCH] [RF] Fix RooHistPdf sub-range integrals with interpolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analytical integral of a RooHistPdf sums the bin contents of the underlying RooDataHist, which integrates the piecewise-constant histogram. For interpolated histograms (intOrder > 0) this only matches the actual curve over the full range; over a sub-range it does not, so the integral was wrong (it returned the intOrder == 0 step-function area regardless of the interpolation order). Fall back to numerical integration for sub-range integrals of interpolated histograms, but only when integrating over the histogram's own fundamental observable. A derived (non-fundamental) observable has already passed the constant-Jacobian check in okayForAnalytical and is a genuine transform of the histogram variable, e.g. the RooLinearVar that RooMomentMorphFuncND uses to renormalize its components; those keep the analytical path they rely on. Full-range integrals and non-interpolated histograms are unaffected. Closes #20116. 🤖 Done with the help of AI. --- roofit/roofitcore/src/RooHistPdf.cxx | 14 ++- roofit/roofitcore/test/testRooHistPdf.cxx | 110 ++++++++++++++++++++++ 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/roofit/roofitcore/src/RooHistPdf.cxx b/roofit/roofitcore/src/RooHistPdf.cxx index 4879f782596b3..94c2164a5e791 100644 --- a/roofit/roofitcore/src/RooHistPdf.cxx +++ b/roofit/roofitcore/src/RooHistPdf.cxx @@ -300,6 +300,7 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, Int_t code = 0; Int_t frcode = 0; + bool directSubRange = false; for (unsigned int n=0; n < pdfObsList.size() && n < histObsList.size(); ++n) { const auto pa = pdfObsList[n]; const auto ha = histObsList[n]; @@ -309,6 +310,9 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, analVars.add(*pa); if (fullRange(*pa, *ha, rangeName)) { frcode |= 2 << n; + } else if (pa->isFundamental()) { + // Sub-range integral over the histogram observable itself (no transform). + directSubRange = true; } } } @@ -319,10 +323,12 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, code |= 1; } - // Disable partial analytical integrals if interpolation is used, and we - // integrate over sub-ranges, but leave them enabled when we integrate over - // the full range of one or several variables - if (intOrder > 1 && !(code & 1)) { + // the full range. For interpolated histograms (intOrder > 0), fall back to + // numerical integration over a direct sub-range of the histogram observable. + // A derived (non-fundamental) observable, such as a linear transform used to + // renormalize the components of a RooMomentMorphFuncND, keeps the analytical + // path it relies on. + if (intOrder > 0 && directSubRange) { analVars.removeAll(); return 0; } diff --git a/roofit/roofitcore/test/testRooHistPdf.cxx b/roofit/roofitcore/test/testRooHistPdf.cxx index b8ec66e6a2250..ae4c32edea56a 100644 --- a/roofit/roofitcore/test/testRooHistPdf.cxx +++ b/roofit/roofitcore/test/testRooHistPdf.cxx @@ -12,6 +12,9 @@ #include +#include +#include + // Verify that RooFit correctly uses analytic integration when having a // RooLinearVar as the observable of a RooHistPdf. TEST(RooHistPdf, AnalyticIntWithRooLinearVar) @@ -36,6 +39,113 @@ TEST(RooHistPdf, AnalyticIntWithRooLinearVar) EXPECT_EQ(integ.anaIntVars().size(), 1); } +namespace { + +// Narrow Gaussian peak (sigma smaller than the bin width) on a steeply falling +// exponential, so the sub-range area genuinely depends on the interpolation +// order (a shape well sampled by the binning would let the bug below slip +// through). +void fillPeakedHist(RooDataHist &dataHist, RooRealVar &xHist) +{ + double const mean = 100.; + double const sigma = 8.; + for (int i = 0; i < xHist.numBins(); ++i) { + xHist.setBin(i); + double const c = xHist.getVal(); + double const d = (c - mean) / sigma; + dataHist.set(i, 5000. * std::exp(-0.5 * d * d) + 3000. * std::exp(-0.05 * c) + 10., 0.0); + } +} + +// Integral of the raw RooHistPdf curve over the full range of "x", sampling +// pdf.getVal() directly. Independent of the integration machinery under test and +// correct for any interpolation order. +double trapezoidalReference(RooHistPdf &pdf, RooRealVar &x) +{ + int const n = 200000; + double const lo = x.getMin(); + double const hi = x.getMax(); + double const h = (hi - lo) / n; + double sum = 0.0; + for (int i = 0; i <= n; ++i) { + x.setVal(lo + i * h); + double const w = (i == 0 || i == n) ? 0.5 : 1.0; + sum += w * pdf.getVal(); + } + return sum * h; +} + +} // namespace + +// The analytical integral (RooDataHist::sum()) integrates the piecewise-constant +// histogram, which only matches the interpolated curve over the full range. Over +// a sub-range with intOrder > 0 RooHistPdf used to take that shortcut anyway, +// returning the intOrder == 0 area regardless of the interpolation order. +// +// This checks the integral *value* (against an independent reference) rather +// than which strategy is used, so it stays valid both for the current numerical +// fallback and for a possible future analytical sub-range integral. +TEST(RooHistPdf, SubRangeIntegralWithInterpolation) +{ + // Histogram observable, wider than the pdf observable "x" below. + RooRealVar xHist{"xHist", "xHist", -30., 270.}; + xHist.setBins(20); + + RooDataHist dataHist{"dataHist", "dataHist", xHist}; + fillPeakedHist(dataHist, xHist); + + // The pdf observable "x" spans only a sub-range of the histogram. + RooRealVar x{"x", "x", 0., 200.}; + + double refByOrder[2] = {0., 0.}; + + for (int intOrder : {0, 1}) { + RooHistPdf pdf{"pdf", "pdf", x, xHist, dataHist, intOrder}; + double const ref = trapezoidalReference(pdf, x); + refByOrder[intOrder] = ref; + + std::unique_ptr integ{pdf.createIntegral(x)}; + EXPECT_NEAR(integ->getVal(), ref, 1e-3 * ref) << "Wrong sub-range integral for intOrder=" << intOrder; + } + + // Guard against the histogram shape becoming too smooth to tell the + // interpolation orders apart, which would make this test insensitive. + EXPECT_GT(std::abs(refByOrder[1] - refByOrder[0]), 0.02 * refByOrder[1]) + << "Test histogram is too smooth to distinguish interpolation orders"; +} + +// Analytic integration is used in the cases where RooDataHist::sum() is exact: +// non-interpolated histograms (any range) and full-range integrals (any order). +TEST(RooHistPdf, AnalyticIntForStepFunctionAndFullRange) +{ + RooRealVar xHist{"xHist", "xHist", -30., 270.}; + xHist.setBins(20); + + RooDataHist dataHist{"dataHist", "dataHist", xHist}; + fillPeakedHist(dataHist, xHist); + + // Sub-range observable (narrower than the histogram). + RooRealVar xSub{"xSub", "xSub", 0., 200.}; + // Full-range observable (matches the histogram range). + RooRealVar xFull{"xFull", "xFull", -30., 270.}; + + // Sub-range, no interpolation: analytical (step function is exact). + { + RooHistPdf pdf{"pdf", "pdf", xSub, xHist, dataHist, 0}; + RooRealIntegral integ{"integ", "integ", pdf, xSub}; + integ.getVal(); + EXPECT_EQ(integ.anaIntVars().size(), 1) << "intOrder=0 sub-range should be analytical"; + } + + // Full-range, with interpolation: analytical (area is conserved over the full range). + { + RooHistPdf pdf{"pdf", "pdf", xFull, xHist, dataHist, 1}; + RooRealIntegral integ{"integ", "integ", pdf, xFull}; + integ.getVal(); + EXPECT_EQ(integ.anaIntVars().size(), 1) << "intOrder=1 full-range should be analytical"; + } +} + // Regression test for https://github.com/root-project/root/issues/21159, which // uncovered that the values were not clipped to be positive when evaluating a // RooHistPdf with the new vectorizing evaluation backend.