Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
Jobscript
#!/bin/bash
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# This script for running the pandora workflow is based on the data production
# development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-pandora)
#
# Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "reco" ]]; then
echo -e "This script [$(basename $BASH_SOURCE)] submits the Pandora reconstruction jobs. Please see the help menu. The data tier is not defined correctly."
exit 0
fi
#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
CREATED_FILES=()
#+++++++++++++++++++++++++++++++++++++++++
# Run the hdf5 to root workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_hdf5_root_workflow() {
echo -e "Enter executing the hdf5 to root workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog
IFS='-' read -a flist <<< "${INPUT_FILE}"
TMP_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.FLOW.ASSOC.0.root"
ROOT_OUTPUT_FILE="${TMP_OUTPUT_FILE/.ASSOC.0.root/.ASSOC.root}"
source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/miniforge3/etc/profile.d/conda.sh
source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/conda_envs/conda.envs.sh
export CONVERT_DATA_WORKFLOW=${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/h5_to_root_ndlarflow.py
if [ ! -f "${CONVERT_DATA_WORKFLOW}" ]; then
echo -e "\tCannot run the convert raw data to packet data. The file [${CONVERT_DATA_WORKFLOW}] does not exist." 2>&1 | tee -a $envlog
exit 0
fi
isMC=0
isFinal=1
isData=1
if [[ "${DATA_TYPE}" == "mc" ]]; then
isData=0
isMC=1
fi
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup gcc ${GCC_VERSION}
conda activate ndlar_flow_${TWOBYTWO_RELEASE}
if [ -z "${CONDA_DEFAULT_ENV}" ]; then
echo -e "The conda virtual environment is not activated [ ndlar_flow_${TWOBYTWO_RELEASE} ]. exiting." 2>&1 | tee -a $envlog
exit 0
fi
echo -e "\tThe current conda virtual environment is activated: [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
echo -e "\tRunning the pandora workflow in converting hdf5 to root." 2>&1 | tee -a $envlog
echo -e "\t[ python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE} ]" 2>&1 | tee -a $envlog
python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE}
echo -e "\nExit the conda environment [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
conda deactivate
if [ ! -f ${TMP_OUTPUT_FILE} ]; then
echo -e "\tCannot continue. The file [${TMP_OUTPUT_FILE}] was not created." 2>&1 | tee -a $envlog
exit 0
fi
if [ ! -f "${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C" ]; then
echo -e "\tCannnot run the final step in the root conversion workflow. The file [${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C] does not exist." 2>&1 | tee -a $envlog
exit 0
else
(
setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}
cp ${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C .
echo -e "\t[ root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\) ]" 2>&1 | tee -a $envlog
root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\)
)
fi
rm -f ${TMP_OUTPUT_FILE}
rm -f rootToRootConversion*
rm -f AutoDict*
export FLOW_OUTPUT_FILE="${ROOT_OUTPUT_FILE}"
if [ -f ${FLOW_OUTPUT_FILE} ]; then
mv ${FLOW_OUTPUT_FILE} ${OUTFILES_DIR}/
else
echo -e "FATAL::The file [${FLOW_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
exit 1
fi
cd ${WORKSPACE}
echo -e "Exit executing the hdf5 to root workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}
#+++++++++++++++++++++++++++++++++++++++++++++++++++
# Run the pandora workflow for ndlar root files
#+++++++++++++++++++++++++++++++++++++++++++++++++++
execute_pandora_workflow() {
echo -e "Enter executing the pandora workflow for data stream [${DATA_STREAM}]" 2>&1 | tee -a $envlog
export PANDORA_OUTPUT_DATAFILE="${FLOW_OUTPUT_FILE/FLOW.ASSOC.root/LAR_RECO_ND.root}"
echo -e "\tThe pandora output file name is [${PANDORA_OUTPUT_DATAFILE}]" 2>&1 | tee -a $envlog
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup gcc ${GCC_VERSION}
setup tbb ${TBB_VERSION} -q ${TBB_QUALIFIER}
export LD_LIBRARY_PATH=${CVMFS_WORKING_DIR}/pandora/PandoraSDK/lib:${CVMFS_WORKING_DIR}/pandora/LArContent/lib:${CVMFS_WORKING_DIR}/pandora/PandoraMonitoring/lib:${CVMFS_WORKING_DIR}/pandora/LArRecoND/lib:${LD_LIBRARY_PATH}
PANDORA_INPUT_FORMAT=SP
if [[ "${DATA_TYPE}" == "mc" ]]; then
PANDORA_INPUT_FORMAT=SPMC
fi
export PANDORA_DET_GEOM=""
if [[ "${DETECTOR_CONFIG}" == "proto_nd" ]]; then
export PANDORA_DET_GEOM=${CVMFS_WORKING_DIR}/pandora/LArRecoND/Merged2x2MINERvA_v4_withRock.root
else
echo -e "FATAL::The detector [${DETECTOR_CONFIG}] root file does not exist. Cannot continue with executing the Pandora reconstruction.\n" 2>&1 | tee -a $envlog
exit 0
fi
export PANDORA_SETTINGS=${CVMFS_WORKING_DIR}/pandora/LArRecoND/settings/PandoraSettings_LArRecoND_ThreeD.xml
echo -e "\tSetup the build area [ source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora ]" 2>&1 | tee -a $envlog
source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora
echo -e "\tRun the pandora workflow:" 2>&1 | tee -a $envlog
echo -e "\t\t[ ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${IN_FILE} -j both -M -N]" 2>&1 | tee -a $envlog
${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${FLOW_OUTPUT_FILE} -j both -M
if [ -f "LArRecoND.root" ]; then
mv LArRecoND.root ${PANDORA_OUTPUT_DATAFILE}
mv ${PANDORA_OUTFILE} ${OUTFILES_DIR}/
else
echo -e "FATAL::The file [${PANDORA_OUTPUT_DATAFILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
exit 1
fi
rm -f MCHierarchy.root
rm -f EventHierarchy.root
rm -f ${FLOW_OUTPUT_FILE}
CREATED_FILES+=("${PANDORA_OUTPUT_DATAFILE}")
cd ${WORKSPACE}
echo -e "Exit executing the pandora workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}
#+++++++++++++++++++++++++++++++++++++++++
# create an output directory
#+++++++++++++++++++++++++++++++++++++++++
cd ${WORKSPACE}
export OUTFILES_DIR=${WORKSPACE}
echo -e "The output files are placed in the directory [$OUTFILES_DIR]\n" 2>&1 | tee -a $envlog
if [ ${DEBUG_SUBMISSION_SCRIPT} -eq 1 ]; then
ls -lhrt ${OUTFILES_DIR} 2>&1 | tee -a $envlog
fi
#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog
execute_hdf5_root_workflow
execute_pandora_workflow
WORKFLOW+=("pandora")
export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="pandora-reconstruction"
#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file
#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands
######################################
#
# END OF RUNNING NDLAr PANDORA JOBS
#
######################################
exit 0
justIN time: 2025-08-04 14:07:45 UTC justIN version: 01.04.00