-
Notifications
You must be signed in to change notification settings - Fork 0
/
check-file
executable file
·77 lines (64 loc) · 2.59 KB
/
check-file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
"""
Check for duplicate events or invalid values (NaN, inf)
in all trees in a ROOT file.
"""
from rootpy.extern.argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('files', nargs='+')
args = parser.parse_args()
import sys
import os
from rootpy.io import root_open
import numpy as np
from root_numpy import RootNumpyUnconvertibleWarning
import warnings
# Ignore all conversion warnings from root_numpy
warnings.simplefilter("ignore",
RootNumpyUnconvertibleWarning)
def dup_idx(l):
"""
Return the indices of all duplicated array elements
I am very proud of this epic kung fu
I promise to document it someday :)
"""
_, b = np.unique(l, return_inverse=True)
return np.nonzero(np.logical_or.reduce(
b[:, np.newaxis] == np.nonzero(np.bincount(b) > 1),
axis=1))[0]
def check_nan_inf(rec):
for field in rec.dtype.names:
# Sum each column and check if this is NaN
# This catches occurrences of both NaN and +/-inf in a column
if np.isnan(np.sum(rec[field])):
print "invalid values for field %s" % field
for i, filename in enumerate(args.files):
print filename
with root_open(filename) as rfile:
# Walk through all trees in each ROOT file
for dirpath, dirs, treenames in rfile.walk(class_pattern='TTree'):
for treename in treenames:
print treename
tree = rfile.Get(os.path.join(dirpath, treename))
events = len(tree)
if not events:
continue
# Use RunNumber and EventNumber to uniquely define each event
keys = tree.to_array(['RunNumber', 'EventNumber'])
if keys.shape != np.unique(keys).shape:
# This tree contains duplicate events
print "BAD %s" % treename
print dup_idx(keys)
else:
print "GOOD %s" % treename
## Check the tree for invalid values
#check_nan_inf(tree.to_array())
# Check fraction of even and odd EventNumbers
even = tree.GetEntries('((abs(MET_phi_original*100)%2)>=1)&&((abs(MET_phi_original*100)%2)<2)')
odd = tree.GetEntries('((abs(MET_phi_original*100)%2)>=0)&&((abs(MET_phi_original*100)%2)<1)')
assert even + odd == events
print "even: %f" % (even / float(events))
print "odd: %f" % (odd / float(events))
if i < len(args.files) - 1:
# not the last file, print a blank line
print