justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 1470, Stage 1

Priority50
Processors1
Wall seconds3600
Image/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes2097152000 (2000 MiB)
Max distance for inputs102.0
Enabled input RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites US_NERSC-CPU
Scopetestpro
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stageRSE expression
1Rucio testpro:T3_US_NERSC-fnal-w1470s1p1*.log2592000False
2Rucio testpro:T3_US_NERSC-fnal-w1470s1p2*.root2592000False

Environment variables

NameValue
DATA_STREAMcalibrated
DATA_TIERreco_pandora
DATA_TYPEdata
DEBUG_SUBMISSION_SCRIPT0
DETECTOR_CONFIGproto_nd
END_POSITIONNone
JOBSCRIPT_TEST0
MX2_WORKFLOW_ID1
NEVENTS-1
RUN_CAF_MX20
RUN_CAF_PANDORA0
RUN_CAF_PANDORA_MX20
RUN_CAF_PANDORA_SPINE0
RUN_CAF_PANDORA_SPINE_MX20
RUN_CAF_SPINE0
RUN_CAF_SPINE_MX20
RUN_PERIODrun1
SPINE_WORKFLOW_ID1
START_POSITIONNone
TWOBYTWO_RELEASEv1.5.0
USERduneproshift

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
1300001300

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
25000022030000
Files processed001122334455667788Aug-18 20:00Aug-18 21:00Aug-18 22:00Aug-18 23:00Files processedBin start timesNumber per binUS_NERSC-CPU
Replicas per RSE13380.00057375369.7499999985656Replicas per RSET3_US_NERSC (100%)

RSEs used

NameInputsOutputs
T3_US_NERSC1657

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
US_NERSC-CPU30

Jobscript

#!/bin/bash

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# 	This script for running the pandora workflow is based on the data production
# 	development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-pandora)
# 
#	Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh


#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "reco_pandora" ]]; then
   echo -e "This script [$(basename $BASH_SOURCE)] submits the Pandora reconstruction jobs. Please see the help menu. The data tier is not defined correctly." 
   exit 0
fi
 

#++++++++++++++++++++++++++++++++++++++++
# Begin JustIN
#++++++++++++++++++++++++++++++++++++++++
justin_begin_of_job_commands


#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
CREATED_FILES=()


#+++++++++++++++++++++++++++++++++++++++++
# Run the hdf5 to root workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_hdf5_root_workflow() {
   echo -e "Enter executing the hdf5 to root  workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog

   IFS='-' read -a flist <<< "${INPUT_FILE}"
   TMP_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.FLOW.ASSOC.0.root"
   ROOT_OUTPUT_FILE="${TMP_OUTPUT_FILE/.ASSOC.0.root/.ASSOC.root}" 

   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/miniforge3/etc/profile.d/conda.sh
   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/conda_envs/conda.envs.sh

   export CONVERT_DATA_WORKFLOW=${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/h5_to_root_ndlarflow.py
   if [ ! -f "${CONVERT_DATA_WORKFLOW}" ]; then
      echo -e "\tCannot run the convert raw data to packet data. The file [${CONVERT_DATA_WORKFLOW}] does not exist." 2>&1 | tee -a $envlog
      exit 0
   fi   

   isMC=0 
   isFinal=1
   isData=1
   if [[ "${DATA_TYPE}" == "mc" ]]; then 
      isData=0
      isMC=1
   fi

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION} 

   conda activate ndlar_flow_${TWOBYTWO_RELEASE}
   if [ -z "${CONDA_DEFAULT_ENV}" ]; then
       echo -e "The conda virtual environment is not activated [ ndlar_flow_${TWOBYTWO_RELEASE} ]. exiting." 2>&1 | tee -a $envlog
       exit 0
   fi

   echo -e "\tThe current conda virtual environment is activated: [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   echo -e "\tRunning the pandora workflow in converting hdf5 to root." 2>&1 | tee -a $envlog 
   echo -e "\t[ python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE} ]" 2>&1 | tee -a $envlog
   python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE}

   echo -e "\nExit the conda environment [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   conda deactivate
   
   if [ ! -f ${TMP_OUTPUT_FILE} ]; then
      echo -e "\tCannot continue. The file [${TMP_OUTPUT_FILE}] was not created." 2>&1 | tee -a $envlog
      exit 0
   fi

   if [ ! -f "${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C" ]; then
      echo -e "\tCannnot run the final step in the root conversion workflow. The file [${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C] does not exist." 2>&1 | tee -a $envlog
      exit 0
   else
      (
        setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}
        cp ${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C .
        echo -e "\t[ root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\) ]" 2>&1 | tee -a $envlog  
        root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\)
      )
   fi

   rm -f ${TMP_OUTPUT_FILE}
   rm -f rootToRootConversion*
   rm -f AutoDict*
   
   export FLOW_OUTPUT_FILE="${ROOT_OUTPUT_FILE}"
   if [ -f ${FLOW_OUTPUT_FILE} ]; then
      mv ${FLOW_OUTPUT_FILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${FLOW_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1 
   fi

   cd ${WORKSPACE}
   echo -e "Exit executing the hdf5 to root  workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++++++++++++
# Run the pandora workflow for ndlar root files
#+++++++++++++++++++++++++++++++++++++++++++++++++++
execute_pandora_workflow() {
   echo -e "Enter executing the pandora workflow for data stream [${DATA_STREAM}]" 2>&1 | tee -a $envlog

   export PANDORA_OUTPUT_DATAFILE="${FLOW_OUTPUT_FILE/FLOW.ASSOC.root/LAR_RECO_ND.root}"
   echo -e "\tThe pandora output file name is [${PANDORA_OUTPUT_DATAFILE}]"  2>&1 | tee -a $envlog

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION}
   setup tbb ${TBB_VERSION} -q ${TBB_QUALIFIER}

   export LD_LIBRARY_PATH=${CVMFS_WORKING_DIR}/pandora/PandoraSDK/lib:${CVMFS_WORKING_DIR}/pandora/LArContent/lib:${CVMFS_WORKING_DIR}/pandora/PandoraMonitoring/lib:${CVMFS_WORKING_DIR}/pandora/LArRecoND/lib:${LD_LIBRARY_PATH}

   PANDORA_INPUT_FORMAT=SP
   if [[ "${DATA_TYPE}" == "mc" ]]; then
      PANDORA_INPUT_FORMAT=SPMC
   fi

   export PANDORA_DET_GEOM=""
   if [[ "${DETECTOR_CONFIG}" == "proto_nd" ]]; then
      export PANDORA_DET_GEOM=${CVMFS_WORKING_DIR}/pandora/LArRecoND/Merged2x2MINERvA_v4_withRock.root
   else
      echo -e "FATAL::The detector [${DETECTOR_CONFIG}] root file does not exist. Cannot continue with executing the Pandora reconstruction.\n" 2>&1 | tee -a $envlog
      exit 0
   fi

   export PANDORA_SETTINGS=${CVMFS_WORKING_DIR}/pandora/LArRecoND/settings/PandoraSettings_LArRecoND_ThreeD.xml

   echo -e "\tSetup the build area [ source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora ]" 2>&1 | tee -a $envlog 
   source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora

   echo -e "\tRun the pandora workflow:" 2>&1 | tee -a $envlog
   echo -e "\t\t[ ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${IN_FILE} -j both -M -N]" 2>&1 | tee -a $envlog
   ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${FLOW_OUTPUT_FILE} -j both -M 

   if [ -f "LArRecoND.root" ]; then
      mv LArRecoND.root ${PANDORA_OUTPUT_DATAFILE}
      mv ${PANDORA_OUTFILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${PANDORA_OUTPUT_DATAFILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi
   
   rm -f MCHierarchy.root
   rm -f EventHierarchy.root
   rm -f ${FLOW_OUTPUT_FILE}

   CREATED_FILES+=("${PANDORA_OUTPUT_DATAFILE}")

   cd ${WORKSPACE}
   echo -e "Exit executing the pandora workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++
# create an output directory
#+++++++++++++++++++++++++++++++++++++++++
cd ${WORKSPACE}
export OUTFILES_DIR=${WORKSPACE}
echo -e "The output files are placed in the directory [$OUTFILES_DIR]\n" 2>&1 | tee -a $envlog
if [ ${DEBUG_SUBMISSION_SCRIPT} -eq 1 ]; then
   ls -lhrt ${OUTFILES_DIR} 2>&1 | tee -a $envlog
fi


#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog
execute_hdf5_root_workflow
execute_pandora_workflow
WORKFLOW+=("pandora")
export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="pandora-reconstruction"


#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file


#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands



######################################
#
# END OF RUNNING NDLAr PANDORA JOBS
#
######################################

exit 0
justIN time: 2025-09-19 08:27:15 UTC       justIN version: 01.05.00