diff --git a/roofit/roofitcore/src/RooHistPdf.cxx b/roofit/roofitcore/src/RooHistPdf.cxx index 4879f782596b3..94c2164a5e791 100644 --- a/roofit/roofitcore/src/RooHistPdf.cxx +++ b/roofit/roofitcore/src/RooHistPdf.cxx @@ -300,6 +300,7 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, Int_t code = 0; Int_t frcode = 0; + bool directSubRange = false; for (unsigned int n=0; n < pdfObsList.size() && n < histObsList.size(); ++n) { const auto pa = pdfObsList[n]; const auto ha = histObsList[n]; @@ -309,6 +310,9 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, analVars.add(*pa); if (fullRange(*pa, *ha, rangeName)) { frcode |= 2 << n; + } else if (pa->isFundamental()) { + // Sub-range integral over the histogram observable itself (no transform). + directSubRange = true; } } } @@ -319,10 +323,12 @@ Int_t RooHistPdf::getAnalyticalIntegral(RooArgSet& allVars, code |= 1; } - // Disable partial analytical integrals if interpolation is used, and we - // integrate over sub-ranges, but leave them enabled when we integrate over - // the full range of one or several variables - if (intOrder > 1 && !(code & 1)) { + // the full range. For interpolated histograms (intOrder > 0), fall back to + // numerical integration over a direct sub-range of the histogram observable. + // A derived (non-fundamental) observable, such as a linear transform used to + // renormalize the components of a RooMomentMorphFuncND, keeps the analytical + // path it relies on. + if (intOrder > 0 && directSubRange) { analVars.removeAll(); return 0; } diff --git a/roofit/roofitcore/test/testRooHistPdf.cxx b/roofit/roofitcore/test/testRooHistPdf.cxx index b8ec66e6a2250..ae4c32edea56a 100644 --- a/roofit/roofitcore/test/testRooHistPdf.cxx +++ b/roofit/roofitcore/test/testRooHistPdf.cxx @@ -12,6 +12,9 @@ #include +#include +#include + // Verify that RooFit correctly uses analytic integration when having a // RooLinearVar as the observable of a RooHistPdf. TEST(RooHistPdf, AnalyticIntWithRooLinearVar) @@ -36,6 +39,113 @@ TEST(RooHistPdf, AnalyticIntWithRooLinearVar) EXPECT_EQ(integ.anaIntVars().size(), 1); } +namespace { + +// Narrow Gaussian peak (sigma smaller than the bin width) on a steeply falling +// exponential, so the sub-range area genuinely depends on the interpolation +// order (a shape well sampled by the binning would let the bug below slip +// through). +void fillPeakedHist(RooDataHist &dataHist, RooRealVar &xHist) +{ + double const mean = 100.; + double const sigma = 8.; + for (int i = 0; i < xHist.numBins(); ++i) { + xHist.setBin(i); + double const c = xHist.getVal(); + double const d = (c - mean) / sigma; + dataHist.set(i, 5000. * std::exp(-0.5 * d * d) + 3000. * std::exp(-0.05 * c) + 10., 0.0); + } +} + +// Integral of the raw RooHistPdf curve over the full range of "x", sampling +// pdf.getVal() directly. Independent of the integration machinery under test and +// correct for any interpolation order. +double trapezoidalReference(RooHistPdf &pdf, RooRealVar &x) +{ + int const n = 200000; + double const lo = x.getMin(); + double const hi = x.getMax(); + double const h = (hi - lo) / n; + double sum = 0.0; + for (int i = 0; i <= n; ++i) { + x.setVal(lo + i * h); + double const w = (i == 0 || i == n) ? 0.5 : 1.0; + sum += w * pdf.getVal(); + } + return sum * h; +} + +} // namespace + +// The analytical integral (RooDataHist::sum()) integrates the piecewise-constant +// histogram, which only matches the interpolated curve over the full range. Over +// a sub-range with intOrder > 0 RooHistPdf used to take that shortcut anyway, +// returning the intOrder == 0 area regardless of the interpolation order. +// +// This checks the integral *value* (against an independent reference) rather +// than which strategy is used, so it stays valid both for the current numerical +// fallback and for a possible future analytical sub-range integral. +TEST(RooHistPdf, SubRangeIntegralWithInterpolation) +{ + // Histogram observable, wider than the pdf observable "x" below. + RooRealVar xHist{"xHist", "xHist", -30., 270.}; + xHist.setBins(20); + + RooDataHist dataHist{"dataHist", "dataHist", xHist}; + fillPeakedHist(dataHist, xHist); + + // The pdf observable "x" spans only a sub-range of the histogram. + RooRealVar x{"x", "x", 0., 200.}; + + double refByOrder[2] = {0., 0.}; + + for (int intOrder : {0, 1}) { + RooHistPdf pdf{"pdf", "pdf", x, xHist, dataHist, intOrder}; + double const ref = trapezoidalReference(pdf, x); + refByOrder[intOrder] = ref; + + std::unique_ptr integ{pdf.createIntegral(x)}; + EXPECT_NEAR(integ->getVal(), ref, 1e-3 * ref) << "Wrong sub-range integral for intOrder=" << intOrder; + } + + // Guard against the histogram shape becoming too smooth to tell the + // interpolation orders apart, which would make this test insensitive. + EXPECT_GT(std::abs(refByOrder[1] - refByOrder[0]), 0.02 * refByOrder[1]) + << "Test histogram is too smooth to distinguish interpolation orders"; +} + +// Analytic integration is used in the cases where RooDataHist::sum() is exact: +// non-interpolated histograms (any range) and full-range integrals (any order). +TEST(RooHistPdf, AnalyticIntForStepFunctionAndFullRange) +{ + RooRealVar xHist{"xHist", "xHist", -30., 270.}; + xHist.setBins(20); + + RooDataHist dataHist{"dataHist", "dataHist", xHist}; + fillPeakedHist(dataHist, xHist); + + // Sub-range observable (narrower than the histogram). + RooRealVar xSub{"xSub", "xSub", 0., 200.}; + // Full-range observable (matches the histogram range). + RooRealVar xFull{"xFull", "xFull", -30., 270.}; + + // Sub-range, no interpolation: analytical (step function is exact). + { + RooHistPdf pdf{"pdf", "pdf", xSub, xHist, dataHist, 0}; + RooRealIntegral integ{"integ", "integ", pdf, xSub}; + integ.getVal(); + EXPECT_EQ(integ.anaIntVars().size(), 1) << "intOrder=0 sub-range should be analytical"; + } + + // Full-range, with interpolation: analytical (area is conserved over the full range). + { + RooHistPdf pdf{"pdf", "pdf", xFull, xHist, dataHist, 1}; + RooRealIntegral integ{"integ", "integ", pdf, xFull}; + integ.getVal(); + EXPECT_EQ(integ.anaIntVars().size(), 1) << "intOrder=1 full-range should be analytical"; + } +} + // Regression test for https://github.com/root-project/root/issues/21159, which // uncovered that the values were not clipped to be positive when evaluating a // RooHistPdf with the new vectorizing evaluation backend.