Project

General

Profile

Sample code

from future import print_function
import uproot as up
import numpy as np
import random
import numba as nb
import os.path

#This macro uses upROOT to read in TTrees from ROOT files as numpy arrays. This method is much faster than C++ ROOT or PyROOT. Surely, there are more advanced implementations of upROOT, but this is a quick and dirty method for reading in the information. The TTrees are slice level and shower level.

def TreeOpen():
  1. outfile = 'tree_arrays_nopresel.npz'
    max_shw = 7
    offset = 3
    nfeature = 17
    ninput = max_shw*nfeature
#Just return the arrays if the array file already exists
if os.path.exists(outfile):
npzfile = np.load(outfile)
X = npzfile['X']
H = npzfile['H']
Y = npzfile['Y']
Osc = npzfile['Osc']
POT = npzfile['POT']
LID = npzfile['LID']
return X,H,Y,Osc,POT,LID
else:
  1. SlcSig = up.open('~/LSTM_Train/files/fardet_genie_fluxswap_genierw_fhc_v08_1000_r00025410_s63_c000_S17-11-22_v1_20170330_145648_sim.FluxSwap_EventTrain.root')['eventtrain/fSlice']
  2. ShwSig = up.open('~/LSTM_Train/files/fardet_genie_fluxswap_genierw_fhc_v08_1000_r00025410_s63_c000_S17-11-22_v1_20170330_145648_sim.FluxSwap_EventTrain.root')['eventtrain/fShw']
  3. SlcBg = up.open('~/LSTM_Train/files/fardet_genie_nonswap_genierw_fhc_v08_1000_r00025412_s51_c001_S17-11-22_v1_20170330_113817_sim.NonSwap_EventTrain.root')['eventtrain/fSlice']
  4. ShwBg = up.open('~/LSTM_Train/files/fardet_genie_nonswap_genierw_fhc_v08_1000_r00025412_s51_c001_S17-11-22_v1_20170330_113817_sim.NonSwap_EventTrain.root')['eventtrain/fShw']

    #Read in ROOT files
    SlcSig = up.open('../LSTM_Train/files/FluxSwap.root')['eventtrain/fSlice'] #Slice level
    ShwSig = up.open('../LSTM_Train/files/FluxSwap.root')['eventtrain/fShw'] #Shw level
    SlcBg = up.open('../LSTM_Train/files/NonSwap.root')['eventtrain/fSlice']
    ShwBg = up.open('../LSTM_Train/files/NonSwap.root')['eventtrain/fShw']

    #Create numpy arrays from TTree branches
    SigPOT = SlcSig.array("FilePOT")
    SigE = SlcSig.array("SliceEnergy")
    SigLong = SlcSig.array("SliceLongestShwLength")
    SigNHit = SlcSig.array("SliceNHits")
    SigReMId = SlcSig.array("SliceBestReMId")
    SigNShw = SlcSig.array("SliceNShw")
    SigCCNC = SlcSig.array("SliceTrueNuCCNC")
    SigPdg = SlcSig.array("SliceTrueNuPdg")
    SigEMuLLL = ShwSig.array("ShwLIDEMuLLL")
    SigEPi0LLL = ShwSig.array("ShwLIDEPi0LLL")
    SigEPLLL = ShwSig.array("ShwLIDEPLLL")
    SigENLLL = ShwSig.array("ShwLIDENLLL")
    SigEPiLLL = ShwSig.array("ShwLIDEPiLLL")
    SigEGLLL = ShwSig.array("ShwLIDEGLLL")
    SigEMuLLT = ShwSig.array("ShwLIDEMuLLT")
    SigEPi0LLT = ShwSig.array("ShwLIDEPi0LLT")
    SigEPLLT = ShwSig.array("ShwLIDEPLLT")
    SigENLLT = ShwSig.array("ShwLIDENLLT")
    SigEPiLLT = ShwSig.array("ShwLIDEPiLLT")
    SigEGLLT = ShwSig.array("ShwLIDEGLLT")
    SigShwE = ShwSig.array("ShwEnergy")
    SigShwLen = ShwSig.array("ShwLength")
    SigShwHitX = ShwSig.array("ShwNHitsX")
    SigShwHitY = ShwSig.array("ShwNHitsY")
    SigShwCos = ShwSig.array("ShwCosTheta")
    SigShwPi0 = ShwSig.array("ShwPi0Mgg")
    SigShwGap = ShwSig.array("ShwGap")
    SigShwMiss = ShwSig.array("ShwMissSpan")
    SigLID = ShwSig.array("ShwLID")
    SigNue = ShwSig.array("nue_app")
    SigContain = ShwSig.array("contain")
    SigDQ = ShwSig.array("dq")
    BgPOT      = SlcBg.array("FilePOT")
    BgE = SlcBg.array("SliceEnergy")
    BgLong = SlcBg.array("SliceLongestShwLength")
    BgNHit = SlcBg.array("SliceNHits")
    BgReMId = SlcBg.array("SliceBestReMId")
    BgNShw = SlcBg.array("SliceNShw")
    BgCCNC = SlcBg.array("SliceTrueNuCCNC")
    BgPdg = SlcBg.array("SliceTrueNuPdg")
    BgEMuLLL = ShwBg.array("ShwLIDEMuLLL")
    BgEPi0LLL = ShwBg.array("ShwLIDEPi0LLL")
    BgEPLLL = ShwBg.array("ShwLIDEPLLL")
    BgENLLL = ShwBg.array("ShwLIDENLLL")
    BgEPiLLL = ShwBg.array("ShwLIDEPiLLL")
    BgEGLLL = ShwBg.array("ShwLIDEGLLL")
    BgEMuLLT = ShwBg.array("ShwLIDEMuLLT")
    BgEPi0LLT = ShwBg.array("ShwLIDEPi0LLT")
    BgEPLLT = ShwBg.array("ShwLIDEPLLT")
    BgENLLT = ShwBg.array("ShwLIDENLLT")
    BgEPiLLT = ShwBg.array("ShwLIDEPiLLT")
    BgEGLLT = ShwBg.array("ShwLIDEGLLT")
    BgShwE = ShwBg.array("ShwEnergy")
    BgShwLen = ShwBg.array("ShwLength")
    BgShwHitX = ShwBg.array("ShwNHitsX")
    BgShwHitY = ShwBg.array("ShwNHitsY")
    BgShwCos = ShwBg.array("ShwCosTheta")
    BgShwPi0 = ShwBg.array("ShwPi0Mgg")
    BgShwGap = ShwBg.array("ShwGap")
    BgShwMiss = ShwBg.array("ShwMissSpan")
    BgLID = ShwBg.array("ShwLID")
    BgNue = ShwBg.array("nue_dis")
    BgNumu = ShwBg.array("numu_dis")
    BgContain = ShwBg.array("contain")
    BgDQ = ShwBg.array("dq")
    nshw = 0
    #Initialize output arrays with zeros
    SX = np.zeros((SigNShw.shape[0],max_shw,nfeature))
    BX = np.zeros((BgNShw.shape[0],max_shw,nfeature))
    SH = np.zeros((SigNShw.shape[0],offset))
    BH = np.zeros((BgNShw.shape[0],offset))
    SY = np.zeros((SigNShw.shape[0],1))
    BY = np.zeros((BgNShw.shape[0],1))
    SLID = np.zeros((SigNShw.shape[0],1))
    BLID = np.zeros((BgNShw.shape[0],1))
    SOsc = np.zeros((SigNShw.shape[0],1))
    BOsc = np.zeros((BgNShw.shape[0],1))
    #Get the net POT from the files. Since this is filled slice by slice, this will be scaled to match test set POT
    SPOT = np.sum(SigPOT)
    BPOT = np.sum(BgPOT)
    POT = np.append(SPOT,BPOT)
    nsig = 0
    for i in range(SigNShw.shape[0]):
    #Cuts which remove virtually no oscillated nue CC events
    presel = (SigNShw[i]>7 or SigE[i]>4.0 or SigLong[i]<80 or SigLong[i]>700 or SigReMId[i]>0.7 or SigNHit[i]>700)
    if presel == False: continue
    lllcut = (np.abs(SigEMuLLL[i])>2 or np.abs(SigEPi0LLL[i])>0.6 or np.abs(SigEPLLL[i])>0.7)
    lltcut = (SigEMuLLT[i]<-0.5 or SigEMuLLT[i]>2.0 or SigEPi0LLT[i]<-0.5 or SigEPi0LLT[i]>0.8 or SigEPLLT[i]<-0.7 or SigEPLLT[i]>1.0)
    if lllcut or lltcut: continue
    #Ensure NueCC
    if SigPdg[i]!=12 or SigCCNC[i]==1: continue
    #Ensure containment
    if np.sum(SigContain[nshw:nshw+SigNShw[i]])==0: continue
    #Arrange the slice specific vectors according to shower energy
    shwEvec = np.sort(SigShwE[nshw:nshw+SigNShw[i]])[::-1]
    p = shwEvec.argsort()
    shwlen = SigShwLen[nshw:nshw+SigNShw[i]]
    shwlen = shwlen[p]
    shwhitx = SigShwHitX[nshw:nshw+SigNShw[i]]
    shwhitx = shwhitx[p]
    shwhity = SigShwHitY[nshw:nshw+SigNShw[i]]
    shwhity = shwhity[p]
    hitdif = np.divide(shwhitx,shwhity)
    emulll = SigEMuLLL[nshw:nshw+SigNShw[i]]
    emulll = emulll[p]
    epi0lll = SigEPi0LLL[nshw:nshw+SigNShw[i]]
    epi0lll = epi0lll[p]
    eplll = SigEPLLL[nshw:nshw+SigNShw[i]]
    eplll = eplll[p]
    enlll = SigENLLL[nshw:nshw+SigNShw[i]]
    enlll = enlll[p]
    epilll = SigEPiLLL[nshw:nshw+SigNShw[i]]
    epilll = epilll[p]
    eglll = SigEGLLL[nshw:nshw+SigNShw[i]]
    eglll = eglll[p]
    emullt = SigEMuLLT[nshw:nshw+SigNShw[i]]
    emullt = emullt[p]
    epi0llt = SigEPi0LLT[nshw:nshw+SigNShw[i]]
    epi0llt = epi0llt[p]
    epllt = SigEPLLT[nshw:nshw+SigNShw[i]]
    epllt = epllt[p]
    enllt = SigENLLT[nshw:nshw+SigNShw[i]]
    enllt = enllt[p]
    epillt = SigEPiLLT[nshw:nshw+SigNShw[i]]
    epillt = epillt[p]
    egllt = SigEGLLT[nshw:nshw+SigNShw[i]]
    egllt = egllt[p]
    cos = SigShwCos[nshw:nshw+SigNShw[i]]
    cos = cos[0]
    pi0 = SigShwPi0[nshw:nshw+SigNShw[i]]
    pi0 = pi0[0]
    gap = SigShwGap[nshw:nshw+SigNShw[i]]
    gap = gap[p]
    miss = SigShwMiss[nshw:nshw+SigNShw[i]]
    miss = miss[p]
    osc = SigNue[nshw:nshw+SigNShw[i]]
    SOsc[i,0] = np.amax(osc)
    lid = SigLID[nshw:nshw+SigNShw[i]]
    lid = lid[p]
    SLID[i,0] = lid[0] #Fill slice level arrays
    SH[i,0] = SigE[i]
    SH[i,1] = cos
    SH[i,2] = pi0
    for j in range(max_shw):
    if j >= shwEvec.shape[0]: break
    if j%nfeature==0: SX[i,j,:] = shwEvec[j] #Fill shower level arrays
    elif j%nfeature==1: SX[i,j,:] = shwlen[j]
    elif j%nfeature==2: SX[i,j,:] = hitdif[j]
    elif j%nfeature==3: SX[i,j,:] = emulll[j]
    elif j%nfeature==4: SX[i,j,:] = epi0lll[j]
    elif j%nfeature==5: SX[i,j,:] = eplll[j]
    elif j%nfeature==6: SX[i,j,:] = enlll[j]
    elif j%nfeature==7: SX[i,j,:] = epilll[j]
    elif j%nfeature==8: SX[i,j,:] = elll[j]
    elif j%nfeature==9: SX[i,j,:] = emullt[j]
    elif j%nfeature==10: SX[i,j,:] = epi0llt[j]
    elif j%nfeature==11: SX[i,j,:] = epllt[j]
    elif j%nfeature==12: SX[i,j,:] = enllt[j]
    elif j%nfeature==13: SX[i,j,:] = epillt[j]
    elif j%nfeature==14: SX[i,j,:] = ellt[j]
    elif j%nfeature==15: SX[i,j,:] = miss[j]
    elif j%nfeature==16: SX[i,j,:] = gap[j]
    nshw = SigNShw[i]
    nsig
    =1
    nshw = 0
    nbg = 0
    #Same procdure as signal
    for i in range(BgNShw.shape[0]):
  5. presel = (BgNShw[i]>7)
    presel = (BgNShw[i]>7 or BgE[i]>4.0 or BgLong[i]<80 or BgLong[i]>700 or BgReMId[i]>0.7 or BgNHit[i]>700)
    if presel == False: continue
    lllcut = (np.abs(BgEMuLLL[i])>2 or np.abs(BgEPi0LLL[i])>0.6 or np.abs(BgEPLLL[i])>0.7)
    lltcut = (BgEMuLLT[i]<-0.5 or BgEMuLLT[i]>2.0 or BgEPi0LLT[i]<-0.5 or BgEPi0LLT[i]>0.8 or BgEPLLT[i]<-0.7 or BgEPLLT[i]>1.0)
    if lllcut or lltcut: continue
    if np.sum(BgContain[nshw:nshw+BgNShw[i]])==0: continue
    shwEvec = np.sort(BgShwE[nshw:nshw+BgNShw[i]])[::-1]
    tmpcontain = BgContain[nshw:nshw+BgNShw[i]]
    tmpdq = BgDQ[nshw:nshw+BgDQ[i]]
    p = shwEvec.argsort()
    shwlen = BgShwLen[nshw:nshw+BgNShw[i]]
    shwlen = shwlen[p]
    shwhitx = BgShwHitX[nshw:nshw+BgNShw[i]]
    shwhitx = shwhitx[p]
    shwhity = BgShwHitY[nshw:nshw+BgNShw[i]]
    shwhity = shwhity[p]
    hitdif = np.divide(shwhitx,shwhity)
    emulll = BgEMuLLL[nshw:nshw+BgNShw[i]]
    emulll = emulll[p]
    epi0lll = BgEPi0LLL[nshw:nshw+BgNShw[i]]
    epi0lll = epi0lll[p]
    eplll = BgEPLLL[nshw:nshw+BgNShw[i]]
    eplll = eplll[p]
    enlll = BgENLLL[nshw:nshw+BgNShw[i]]
    enlll = enlll[p]
    epilll = BgEPiLLL[nshw:nshw+BgNShw[i]]
    epilll = epilll[p]
    eglll = BgEGLLL[nshw:nshw+BgNShw[i]]
    eglll = eglll[p]
    emullt = BgEMuLLT[nshw:nshw+BgNShw[i]]
    emullt = emullt[p]
    epi0llt = BgEPi0LLT[nshw:nshw+BgNShw[i]]
    epi0llt = epi0llt[p]
    epllt = BgEPLLT[nshw:nshw+BgNShw[i]]
    epllt = epllt[p]
    enllt = BgENLLT[nshw:nshw+BgNShw[i]]
    enllt = enllt[p]
    epillt = BgEPiLLT[nshw:nshw+BgNShw[i]]
    epillt = epillt[p]
    egllt = BgEGLLT[nshw:nshw+BgNShw[i]]
    egllt = egllt[p]
    cos = BgShwCos[nshw:nshw+BgNShw[i]]
    cos = cos0
    pi0 = BgShwPi0[nshw:nshw+BgNShw[i]]
    pi0 = pi00
    gap = BgShwGap[nshw:nshw+BgNShw[i]]
    gap = gap[p]
    miss = BgShwMiss[nshw:nshw+BgNShw[i]]
    miss = miss[p]
    osc = 0
    if BgPdg[i]==14: osc = BgNumu[i]
    else: osc = BgNue[i]
    osc = BgNue[nshw:nshw+BgNShw[i]]
    BOsc[i,0] = np.amax(osc)
    lid = BgLID[nshw:nshw+BgNShw[i]]
    lid = lid[p]
    BLID[i,0] = lid0
    BH[i,0] = BgE[i]
    BH[i,1] = cos
    BH[i,2] = pi0
    for j in range(max_shw):
    if j >= shwEvec.shape0: break
    if j%nfeature==0: BX[i,j,:] = shwEvec[j]
    elif j%nfeature==1: BX[i,j,:] = shwlen[j]
    elif j%nfeature==2: BX[i,j,:] = hitdif[j]
    elif j%nfeature==3: BX[i,j,:] = emulll[j]
    elif j%nfeature==4: BX[i,j,:] = epi0lll[j]
    elif j%nfeature==5: BX[i,j,:] = eplll[j]
    elif j%nfeature==6: BX[i,j,:] = enlll[j]
    elif j%nfeature==7: BX[i,j,:] = epilll[j]
    elif j%nfeature==8: BX[i,j,:] = elll[j]
    elif j%nfeature==9: BX[i,j,:] = emullt[j]
    elif j%nfeature==10: BX[i,j,:] = epi0llt[j]
    elif j%nfeature==11: BX[i,j,:] = epllt[j]
    elif j%nfeature==12: BX[i,j,:] = enllt[j]
    elif j%nfeature==13: BX[i,j,:] = epillt[j]
    elif j%nfeature==14: BX[i,j,:] = ellt[j]
    elif j%nfeature==15: BX[i,j,:] = miss[j]
    elif j%nfeature==16: BX[i,j,:] = gap[j]
    nshw = BgNShw[i]
    nbg
    =1

    #Remove slices which failed cuts
    SX = SX[:nsig,:,:]
    BX = BX[:nbg,:,:]
    #Create labels
    SY = np.ones((SX.shape0,1))
    BY = np.zeros((BX.shape0,1))
    SOsc = SOsc[:nsig,:]
    BOsc = BOsc[:nbg,:]
    SLID = SLID[:nsig,:]
    BLID = BLID[:nbg,:]
    #Append and shuffle
    X = np.append(SX,BX,axis=0)
    H = np.append(SH,BH,axis=0)
    Y = np.append(SY,BY,axis=0)
    Osc = np.append(SOsc,BOsc,axis=0)
    LID = np.append(SLID,BLID,axis=0)
    permutation = list(np.random.permutation(X.shape0))
    X = X[permutation,:,:]
    H = H[permutation,:]
    Y = Y[permutation,:]
    Osc = Osc[permutation,:]
    LID = LID[permutation,:]

    np.savez(outfile,X=X,H=H,Y=Y,Osc=Osc,LID=LID,POT=POT)
    return X,H,Y,Osc,LID,POT

X,H,Y,Osc,LID,POT = TreeOpen()