mass_hists[0] + mass_hists[1]
results in
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-7bab2e7a86b3> in <module>
12 # mass_hists[0]
13 # help(stack_hist)
---> 14 mass_hists[0] + mass_hists[1]
/srv/conda/envs/notebook/lib/python3.7/site-packages/boost_histogram/_internal/hist.py in __add__(self, other)
222 def __add__(self, other):
223 result = self.copy(deep=False)
--> 224 return result.__iadd__(other)
225
226 def __iadd__(self, other):
/srv/conda/envs/notebook/lib/python3.7/site-packages/boost_histogram/_internal/hist.py in __iadd__(self, other)
227 if isinstance(other, (int, float)) and other == 0:
228 return self
--> 229 self._compute_inplace_op("__iadd__", other)
230
231 # Addition may change the axes if they can grow
/srv/conda/envs/notebook/lib/python3.7/site-packages/boost_histogram/_internal/hist.py in _compute_inplace_op(self, name, other)
261 def _compute_inplace_op(self, name, other):
262 if isinstance(other, Histogram):
--> 263 getattr(self._hist, name)(other._hist)
264 elif isinstance(other, _histograms):
265 getattr(self._hist, name)(other)
ValueError: axes not mergable
This is all helpful and makes me appreciate the concept of metadata more. I guess the good news is that uproot
will handle things in a more intelligent manner than I have been once some issues have been resolved, where what I have been (hackily) doing is the following https://github.com/matthewfeickert/heputils/blob/3dd0e858f002041a7ae15fc310d92ddb0ea4fe26/src/heputils/convert.py#L7-L32
If I just don't even attempt to handle the name then things work with the following:
import numpy as np
import uproot4 as uproot
import mplhep
import hist
from hist import Hist
import functools
import operator
mplhep.set_style("ATLAS")
def uproot_to_hist(uproot_hist):
values, edges = uproot_hist.to_numpy()
_hist = hist.Hist(
hist.axis.Regular(len(edges) - 1, edges[0], edges[-1]),
storage=hist.storage.Double(),
)
_hist[:] = values
return _hist
root_file = uproot.open("example.root")
mass_hists = [
uproot_to_hist(root_file[key]) for key in root_file.keys()
]
stack_hist = functools.reduce(operator.add, mass_hists)
stack_hist.plot1d()
for hist in mass_hists:
hist.plot1d()
ax._ax.metadata
!
__dict__
is not used.
It is explained at the end of the overview section in the Boost Histogram docs:
https://www.boost.org/doc/libs/develop/libs/histogram/doc/html/histogram/guide.html#histogram.guide.axis_guide.overview
It is something to add to the Rationale, too, I think.
https://www.boost.org/doc/libs/develop/libs/histogram/doc/html/histogram/rationale.html
Dear Hist/boost-histogram developers,
thank you for this great histogramming library, it is a pleasure to work with it :)
I have a question to you regarding fancy indexing on a StrCategory
axis.
My (Hist) histogram looks as follows:
In [38]: h
Out[38]:
Hist(
StrCategory(['GluGluToHHTo2B2VTo2L2Nu_node_cHHH2p45'], growth=True),
StrCategory(['ee', 'mumu', 'emu'], growth=True),
StrCategory(['nominal'], growth=True),
Regular(40, 0, 200, name='MET', label='$p_{T}^{miss}$'),
storage=Weight()) # Sum: WeightedSum(value=2608.44, variance=47.3505) (WeightedSum(value=2775.4, variance=50.5706) with flow)
Now I would like to "group" e.g. the "ee" and "emu" category together, which means that I'd like to do something like:
h[{"dataset": "GluGluToHHTo2B2VTo2L2Nu_node_cHHH2p45", "category": ["ee", "emu"], "systematic": "nominal"}]
Unfortunately this does not work, as the ["ee", "emu"]
is not a valid indexing operation.
Is there a way to do such a fancy indexing on StrCategory
axes? In case this is not supported, is there a nice workaround?
(I am basically looking for something, which works similar to coffea.Hist.group
method: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/hist_tools.py#L1115)
Thank you already a lot in advance!
Best, Peter
Hist
, if this can be handled a bit more conveniently. What do you think?np.sum(h[{"dataset": "GluGluToHHTo2B2VTo2L2Nu_node_cHHH2p45", "systematic": "nominal"}].view()[h.axes["category"].index(["ee", "emu"]), ...], axis=0)
["ee", "emu”]
would not cause the histogram to convert into a view.
[0, 5, 7, 8, 9.5, 1]
(only 5 bins). As far as I can see, there is the rebin
action, but unfortunately it only accepts a single value/factor. Is it possible to rebin a Regular/Variable
axis to any arbitrary binning, as long as the binning is "valid"?
I think this is still a feature that hasn’t been added to Boost.Histogram yet (@HDembinski can comment), but one trick is the bin edges - you have to have exact bin edges or it doesn’t make sense. You can manually write this with copying from one histogram to another, though - maybe something Hist could simplify, as well. Think something like this (rough draft, might not be correct):
(ax,) = new_hist.axes
for bin in range(5):
new_hist[bin] = old_hist[bh.loc(ax[bin]):bh.loc(ax[bin+1]):sum]
(Didn’t include flow bins, but you can do that with bh.tag.at
and going from -1 to N)
Boost.Histogram
for uncertainties? That is, if you wanted to calculate the weighted uncertainty on each bin in a histogram formed from the sum of histograms (a stack hist) is there a way to ask a Boost.Histogram
object for the uncertainties (and weights?) associated with each bin (as a NumPy array or list)?
Also, a somewhat unrelated question: What am I not understanding about how Boost.Histogram
does the variance? Or am I making a stupid mistake?
import boost_histogram as bh
import numpy as np
# From the Boost.Histogram examples
values = [1]*9 + [91]
weight=2
weights=weight*np.ones(len(values))
wm = bh.accumulators.WeightedMean().fill(values, weight=weight)
print("\n# By hand")
print(f"values: {values}")
print(f"weights: {weights}")
_sum_of_weights = sum(weights)
print(f"sum of weights: {_sum_of_weights}")
_sum_of_weights_squared = sum(np.square(weights))
print(f"sum of weights squared: {_sum_of_weights_squared}")
print(f"weighted mean: {np.average(values, weights=weights)}")
print(f"np variance: {np.var(values)}")
_weighted_variance = np.average((values-np.average(values, weights=weights))**2, weights=weights) # Better ways to do this
print(f"weighted variance: {_weighted_variance}")
print("\n# Boost.Histogram accumulator")
print(wm) # Why is the variance different here?
gives
# By hand
values: [1, 1, 1, 1, 1, 1, 1, 1, 1, 91]
weights: [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
sum of weights: 20.0
sum of weights squared: 40.0
weighted mean: 10.0
np variance: 729.0
weighted variance: 729.0
# Boost.Histogram accumulator
WeightedMean(sum_of_weights=20, sum_of_weights_squared=40, value=10, variance=810)
import boost_histogram as bh
import numpy as np
# From the Boost.Histogram examples
values = [1]*9 + [91]
weight=2
weights=weight*np.ones(len(values))
wm = bh.accumulators.WeightedMean().fill(values, weight=weight)
print("\n# By hand")
print(f"values: {values}")
print(f"weights: {weights}")
_sum_of_weights = sum(weights)
print(f"sum of weights: {_sum_of_weights}")
_sum_of_weights_squared = sum(np.square(weights))
print(f"sum of weights squared: {_sum_of_weights_squared}")
print(f"weighted mean: {np.average(values, weights=weights)}")
print(f"np variance: {np.var(values)}")
_weighted_variance = sum(np.square(values-np.average(values, weights=weights)))/(len(values)-1) # Better ways to do this
print(f"weighted variance: {_weighted_variance}")
print("\n# Boost.Histogram accumulator")
print(wm) # Difference _was_ that Boost.Histogram nicely gives an unbiased estiamtor! :)
# By hand
values: [1, 1, 1, 1, 1, 1, 1, 1, 1, 91]
weights: [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
sum of weights: 20.0
sum of weights squared: 40.0
weighted mean: 10.0
np variance: 729.0
weighted variance: 810.0
# Boost.Histogram accumulator
WeightedMean(sum_of_weights=20, sum_of_weights_squared=40, value=10, variance=810)
import boost_histogram as bh
h = bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.WeightedMean())
h.fill([1], sample=[3], weight=[2])