Skip to content

Commit

Permalink
Merge pull request #834 from cms-analysis/anigamova-uniform_pdf_for_f…
Browse files Browse the repository at this point in the history
…latParam

uniform pdf for flat param
  • Loading branch information
anigamova committed Apr 21, 2023
2 parents 617a7f9 + 3976c43 commit 5acf6b6
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 16 deletions.
3 changes: 3 additions & 0 deletions docs/part3/commonstatsmethods.md
Expand Up @@ -346,6 +346,9 @@ The choice of test-statistic can be made via the option `--testStat` and differe
While the above shortcuts are the common variants, you can also try others. The treatment of the nuisances can be changed to the so-called "Hybrid-Bayesian" method which effectively integrates over the nuisance parameters. This is especially relevant when you have very few expected events in your data and you are using those events to constrain background processes. This can be achieved by setting `--generateNuisances=1 --generateExternalMeasurements=0`. You might also want to avoid first fitting to the data to choose the nominal values in this case, which can be done by also setting `--fitNuisances=0`.
!!! warning
If you have unconstrained parameters in your model (`rateParam` or if using a `_norm` variable for a pdf) and you want to use the "Hybrid-Bayesian" method, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line. This will create uniform priors for these parameters, which is needed for this method and which otherwise would not get created.
!!! info
Note that (observed and toy) values of the test statistic stored in the instances of `RooStats::HypoTestResult` when the option `--saveHybridResult` has been specified, are defined without the factor 2 and therefore are twice as small as the values given by the formulas above. This factor is however included automatically by all plotting script supplied within the Combine package.
Expand Down
2 changes: 1 addition & 1 deletion docs/part3/runningthetool.md
Expand Up @@ -186,7 +186,7 @@ You can turn off the internal logic by setting `--X-rtd TMCSO_AdaptivePseudoAsim

#### Nuisance parameter generation

The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation.
The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation. If you wish to also randomise these parameters, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line.

The following are options which define how the toys will be generated,

Expand Down
7 changes: 7 additions & 0 deletions python/Datacard.py
Expand Up @@ -63,6 +63,12 @@ def __init__(self):
self.groups = {}
self.discretes = []

# list of parameters called _norm in user input workspace
self.pdfnorms = {}

# collection of nuisances to auto-produce flat priors for
self.toCreateFlatParam = {}

def print_structure(self):
"""
Print the contents of the -> should allow for direct text2workspace on python config
Expand Down Expand Up @@ -140,6 +146,7 @@ def print_structure(self):
print("DC.binParFlags = ", self.binParFlags, "#", type(self.binParFlags))
print("DC.groups = ", self.groups, "#", type(self.groups))
print("DC.discretes = ", self.discretes, "#", type(self.discretes))
print("DC.pdfnorms = ", self.pdfnorms, "#", type(self.pdfnorms))

print(
"""
Expand Down
15 changes: 14 additions & 1 deletion python/DatacardParser.py
Expand Up @@ -279,6 +279,13 @@ def addDatacardParserOptions(parser):
default=None,
help="Simplify MH dependent objects: 'fixed', 'pol<N>' with N=0..4",
)
parser.add_option(
"--X-assign-flatParam-prior",
dest="flatParamPrior",
default=False,
action="store_true",
help="Assign RooUniform pdf for flatParam NPs",
)


def strip(l):
Expand Down Expand Up @@ -359,6 +366,9 @@ def parseCard(file, options):
if not hasattr(options, "evaluateEdits"):
setattr(options, "evaluateEdits", True)

if not hasattr(options, "flatParamPrior"):
setattr(options, "flatParamPrior", False)

try:
for lineNumber, l in enumerate(file):
f = l.split()
Expand Down Expand Up @@ -522,6 +532,9 @@ def parseCard(file, options):
elif pdf == "flatParam":
ret.flatParamNuisances[lsyst] = True
# for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model
if options.flatParamPrior:
ret.systs.append([lsyst, nofloat, pdf, args, []])
ret.add_syst_id(lsyst)
continue
elif pdf == "extArg":
# look for additional parameters in workspaces
Expand Down Expand Up @@ -668,7 +681,7 @@ def parseCard(file, options):
syst2 = []
for lsyst, nofloat, pdf, args, errline in ret.systs:
nonNullEntries = 0
if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam": # this doesn't have an errline
if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam" or pdf == "flatParam": # this doesn't have an errline
syst2.append((lsyst, nofloat, pdf, args, errline))
continue
for b, p, s in ret.keyline:
Expand Down
74 changes: 60 additions & 14 deletions python/ModelTools.py
Expand Up @@ -154,6 +154,14 @@ def __init__(self, datacard, options):
self.selfNormBins = []
self.extraNuisances = []
self.extraGlobalObservables = []
self.globalobs = []

def getSafeNormName(self, n):
# need to be careful in case user has _norm name and wants to auto-create flatPrior
if self.options.flatParamPrior:
if n in self.DC.pdfnorms.keys():
return self.DC.pdfnorms[n]
return n

def setPhysics(self, physicsModel):
self.physics = physicsModel
Expand All @@ -174,6 +182,8 @@ def doModel(self, justCheckPhysicsModel=False):
self.doNuisances()
self.doExtArgs()
self.doRateParams()
self.doAutoFlatNuisancePriors()
self.doFillNuisPdfsAndSets()
self.doExpectedEvents()
if justCheckPhysicsModel:
self.physics.done()
Expand Down Expand Up @@ -316,6 +326,12 @@ def doRateParams(self):
v = float(argv)
removeRange = len(param_range) == 0
if param_range == "":
if self.options.flatParamPrior:
raise ValueError(
"Cannot create flat Prior for rateParam nuisance parameter '"
+ argu
+ "' without specifying a range [a,b]. Please fix in the datacard"
)
## check range. The parameter needs to be created in range. Then we will remove it
param_range = "%g,%g" % (-2.0 * abs(v), 2.0 * abs(v))
# additional check for range requested
Expand Down Expand Up @@ -382,7 +398,7 @@ def doNuisances(self):
if len(self.DC.systs) == 0:
return
self.doComment(" ----- nuisances -----")
globalobs = []
# globalobs = []

for n, nofloat, pdf, args, errline in self.DC.systs:
is_func_scaled = False
Expand Down Expand Up @@ -433,7 +449,7 @@ def doNuisances(self):
# Use existing constraint since it could be a param
self.out.var(n).setVal(0)
self.out.var(n).setError(1)
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
if self.options.bin:
self.out.var("%s_In" % n).setConstant(True)
if self.options.optimizeBoundNuisances and not is_func_scaled:
Expand Down Expand Up @@ -466,7 +482,7 @@ def doNuisances(self):
theta,
),
)
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
if self.options.bin:
self.out.var("%s_In" % n).setConstant(True)
elif pdf == "gmN":
Expand Down Expand Up @@ -501,7 +517,7 @@ def doNuisances(self):
"Poisson",
"%s_In[%d,%f,%f], %s[%f,%f,%f], 1" % (n, args[0], minObs, maxObs, n, args[0] + 1, minExp, maxExp),
)
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
if self.options.bin:
self.out.var("%s_In" % n).setConstant(True)
elif pdf == "trG":
Expand All @@ -515,13 +531,21 @@ def doNuisances(self):
trG_max = -1.0 / v
r = "%f,%f" % (trG_min, trG_max)
self.doObj("%s_Pdf" % n, "Gaussian", "%s[0,%s], %s_In[0,%s], 1" % (n, r, n, r))
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
if self.options.bin:
self.out.var("%s_In" % n).setConstant(True)
elif pdf == "lnU" or pdf == "shapeU":
self.doObj("%s_Pdf" % n, "Uniform", "%s[-1,1]" % n)
elif pdf == "unif":
self.doObj("%s_Pdf" % n, "Uniform", "%s[%f,%f]" % (n, args[0], args[1]))
elif pdf == "flatParam" and self.options.flatParamPrior:
c_param_name = self.getSafeNormName(n)
if self.out.var(c_param_name):
v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax()
self.DC.toCreateFlatParam[c_param_name] = [v, x1, x2]
else:
self.DC.toCreateFlatParam[c_param_name] = []

elif pdf == "dFD" or pdf == "dFD2":
dFD_min = -(1 + 8 / args[0])
dFD_max = +(1 + 8 / args[0])
Expand All @@ -546,7 +570,7 @@ def doNuisances(self):
ROOFIT_EXPR_PDF,
"'1/(2*(1+exp(%f*(@0-1)))*(1+exp(-%f*(@0+1))))', %s[0,%s], %s_In[0,%s]" % (args[0], args[0], n, r, n, r),
)
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
if self.options.bin:
self.out.var("%s_In" % n).setConstant(True)
elif pdf == "constr":
Expand Down Expand Up @@ -760,7 +784,7 @@ def doNuisances(self):
self.out.function("%s_BoundLo" % n),
self.out.function("%s_BoundHi" % n),
)
globalobs.append("%s_In" % n)
self.globalobs.append("%s_In" % n)
# if self.options.optimizeBoundNuisances: self.out.var(n).setAttribute("optimizeBounds")
elif pdf == "extArg":
continue
Expand All @@ -772,15 +796,18 @@ def doNuisances(self):
# self.out.var(n).Print('V')
if n in self.DC.frozenNuisances:
self.out.var(n).setConstant(True)

def doFillNuisPdfsAndSets(self):
if self.options.bin:
# avoid duplicating _Pdf in list
setNuisPdf = []
nuisPdfs = ROOT.RooArgList()
nuisVars = ROOT.RooArgSet()
for n, nf, p, a, e in self.DC.systs:
c_param_name = self.getSafeNormName(n)
if p != "constr":
nuisVars.add(self.out.var(n))
setNuisPdf.append(n)
nuisVars.add(self.out.var(c_param_name))
setNuisPdf.append(c_param_name)
setNuisPdf = set(setNuisPdf)
for n in setNuisPdf:
nuisPdfs.add(self.out.pdf(n + "_Pdf"))
Expand All @@ -789,15 +816,33 @@ def doNuisances(self):
self.out.safe_import(self.out.nuisPdf)
self.out.nuisPdfs = nuisPdfs
gobsVars = ROOT.RooArgSet()
for g in globalobs:
for g in self.globalobs:
gobsVars.add(self.out.var(g))
self.out.defineSet("globalObservables", gobsVars)
else: # doesn't work for too many nuisances :-(
# avoid duplicating _Pdf in list
setNuisPdf = set([n for (n, nf, p, a, e) in self.DC.systs])
self.doSet("nuisances", ",".join(["%s" % n for (n, nf, p, a, e) in self.DC.systs]))
setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])
self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]))
self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf]))
self.doSet("globalObservables", ",".join(globalobs))
self.doSet("globalObservables", ",".join(self.globalobs))

def doAutoFlatNuisancePriors(self):
if len(self.DC.toCreateFlatParam.keys()) > 0:
for flatNP in self.DC.toCreateFlatParam.items():
c_param_name = flatNP[0]
c_param_details = flatNP[1]
if len(c_param_details):
v, x1, x2 = c_param_details
else:
v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax()
if self.options.verbose > 2:
print("Will create flat prior for parameter ", c_param_name, " with range [", x1, x2, "]")
self.doExp(
"%s_diff_expr" % c_param_name, "%s-%s_In" % (c_param_name, c_param_name), "%s,%s_In[%g,%g,%g]" % (c_param_name, c_param_name, v, x1, x2)
)
self.doObj("%s_Pdf" % c_param_name, "Uniform", "%s_diff_expr" % c_param_name)
self.out.var("%s_In" % c_param_name).setConstant(True)
self.globalobs.append("%s_In" % c_param_name)

def doNuisancesGroups(self):
# Prepare a dictionary of which group a certain nuisance belongs to
Expand Down Expand Up @@ -878,7 +923,7 @@ def doExpectedEvents(self):
continue
if pdf == "constr":
continue
if pdf == "rateParam":
if pdf == "rateParam" or pdf == "flatParam":
continue
if p not in errline[b]:
continue
Expand All @@ -899,6 +944,7 @@ def doExpectedEvents(self):
logNorms.append((errline[b][p], n))
elif pdf == "gmM":
factors.append(n)
# elif pdf == "trG" or pdf == "unif" or pdf == "flatParam" or pdf == "dFD" or pdf == "dFD2":
elif pdf == "trG" or pdf == "unif" or pdf == "dFD" or pdf == "dFD2":
myname = "n_exp_shift_bin%s_proc_%s_%s" % (b, p, n)
self.doObj(myname, ROOFIT_EXPR, "'1+%f*@0', %s" % (errline[b][p], n))
Expand Down
2 changes: 2 additions & 0 deletions python/ShapeTools.py
Expand Up @@ -615,6 +615,8 @@ def prepareAllShapes(self):
raise RuntimeError("There's more than once choice of observables: %s\n" % str(list(shapeObs.keys())))
self.out.binVars = list(shapeObs.values())[0]
self.out.safe_import(self.out.binVars)
# keep hold of pdf_norm renaming
self.DC.pdfnorms = self.norm_rename_map.copy()

def doCombinedDataset(self):
if len(self.DC.bins) == 1 and self.options.forceNonSimPdf:
Expand Down

0 comments on commit 5acf6b6

Please sign in to comment.