Workflow 115, Stage 1

Priority	50
Processors	1
Wall seconds	3600
Image	/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes	2097152000 (2000 MiB)
Max distance for inputs	102.0
Enabled input RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites	US_NERSC-CPU
Scope	testpro
Events for this stage

Output patterns

	Destination	Pattern	Lifetime	For next stage	RSE expression
1	Rucio testpro:T3_US_NERSC-fnal-w115s1p1	*.log	1209600	False
2	Rucio testpro:T3_US_NERSC-fnal-w115s1p2	*.root	1209600	False

Environment variables

Name	Value
DATA_STREAM	pandora
DATA_TIER	reco
DATA_TYPE	data
DEBUG_SUBMISSION_SCRIPT	0
DETECTOR_CONFIG	proto_nd
END_POSITION	None
JOBSCRIPT_TEST	0
NEVENTS	-1
RUN_PERIOD	run1
START_POSITION	None
TWOBYTWO_RELEASE	v1.5.0
USER	duneproshift

File states

Total files	Finding	Unallocated	Allocated	Outputting	Processed	Not found	Failed
1	0	0	0	0	1	0	0

Job states

Total	Submitted	Started	Processing	Outputting	Finished	Notused	Aborted	Stalled	Jobscript error	Outputting failed	None processed
2	0	0	0	0	2	0	0	0	0	0	0

RSEs used

Name	Inputs	Outputs
RAL_ECHO	1	0
T3_US_NERSC	0	5

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

Jobscript

#!/bin/bash

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# 	This script for running the pandora workflow is based on the data production
# 	development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-pandora)
# 
#	Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "reco" ]]; then
   echo -e "This script [$(basename $BASH_SOURCE)] submits the Pandora reconstruction jobs. Please see the help menu. The data tier is not defined correctly." 
   exit 0
fi
 

#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh


#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
CREATED_FILES=()


#+++++++++++++++++++++++++++++++++++++++++
# Run the hdf5 to root workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_hdf5_root_workflow() {
   echo -e "Enter executing the hdf5 to root  workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog

   IFS='-' read -a flist <<< "${INPUT_FILE}"
   TMP_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.FLOW.ASSOC.0.root"
   ROOT_OUTPUT_FILE="${TMP_OUTPUT_FILE/.ASSOC.0.root/.ASSOC.root}" 

   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/miniforge3/etc/profile.d/conda.sh
   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/conda_envs/conda.envs.sh

   export CONVERT_DATA_WORKFLOW=${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/h5_to_root_ndlarflow.py
   if [ ! -f "${CONVERT_DATA_WORKFLOW}" ]; then
      echo -e "\tCannot run the convert raw data to packet data. The file [${CONVERT_DATA_WORKFLOW}] does not exist." 2>&1 | tee -a $envlog
      exit 0
   fi   

   isMC=0 
   isFinal=1
   isData=1
   if [[ "${DATA_TYPE}" == "mc" ]]; then 
      isData=0
      isMC=1
   fi

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION} 

   conda activate ndlar_flow_${TWOBYTWO_RELEASE}
   if [ -z "${CONDA_DEFAULT_ENV}" ]; then
       echo -e "The conda virtual environment is not activated [ ndlar_flow_${TWOBYTWO_RELEASE} ]. exiting." 2>&1 | tee -a $envlog
       exit 0
   fi

   echo -e "\tThe current conda virtual environment is activated: [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   echo -e "\tRunning the pandora workflow in converting hdf5 to root." 2>&1 | tee -a $envlog 
   echo -e "\t[ python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE} ]" 2>&1 | tee -a $envlog
   python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE}

   echo -e "\nExit the conda environment [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   conda deactivate
   
   if [ ! -f ${TMP_OUTPUT_FILE} ]; then
      echo -e "\tCannot continue. The file [${TMP_OUTPUT_FILE}] was not created." 2>&1 | tee -a $envlog
      exit 0
   fi

   if [ ! -f "${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C" ]; then
      echo -e "\tCannnot run the final step in the root conversion workflow. The file [${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C] does not exist." 2>&1 | tee -a $envlog
      exit 0
   else
      (
        setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}
        cp ${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C .
        echo -e "\t[ root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\) ]" 2>&1 | tee -a $envlog  
        root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\)
      )
   fi

   rm -f ${TMP_OUTPUT_FILE}
   rm -f rootToRootConversion*
   rm -f AutoDict*
   
   export FLOW_OUTPUT_FILE="${ROOT_OUTPUT_FILE}"
   if [ -f ${FLOW_OUTPUT_FILE} ]; then
      mv ${FLOW_OUTPUT_FILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${FLOW_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1 
   fi

   cd ${WORKSPACE}
   echo -e "Exit executing the hdf5 to root  workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++++++++++++
# Run the pandora workflow for ndlar root files
#+++++++++++++++++++++++++++++++++++++++++++++++++++
execute_pandora_workflow() {
   echo -e "Enter executing the pandora workflow for data stream [${DATA_STREAM}]" 2>&1 | tee -a $envlog

   export PANDORA_OUTPUT_DATAFILE="${FLOW_OUTPUT_FILE/FLOW.ASSOC.root/LAR_RECO_ND.root}"
   echo -e "\tThe pandora output file name is [${PANDORA_OUTPUT_DATAFILE}]"  2>&1 | tee -a $envlog

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION}
   setup tbb ${TBB_VERSION} -q ${TBB_QUALIFIER}

   export LD_LIBRARY_PATH=${CVMFS_WORKING_DIR}/pandora/PandoraSDK/lib:${CVMFS_WORKING_DIR}/pandora/LArContent/lib:${CVMFS_WORKING_DIR}/pandora/PandoraMonitoring/lib:${CVMFS_WORKING_DIR}/pandora/LArRecoND/lib:${LD_LIBRARY_PATH}

   PANDORA_INPUT_FORMAT=SP
   if [[ "${DATA_TYPE}" == "mc" ]]; then
      PANDORA_INPUT_FORMAT=SPMC
   fi

   export PANDORA_DET_GEOM=""
   if [[ "${DETECTOR_CONFIG}" == "proto_nd" ]]; then
      export PANDORA_DET_GEOM=${CVMFS_WORKING_DIR}/pandora/LArRecoND/Merged2x2MINERvA_v4_withRock.root
   else
      echo -e "FATAL::The detector [${DETECTOR_CONFIG}] root file does not exist. Cannot continue with executing the Pandora reconstruction.\n" 2>&1 | tee -a $envlog
      exit 0
   fi

   export PANDORA_SETTINGS=${CVMFS_WORKING_DIR}/pandora/LArRecoND/settings/PandoraSettings_LArRecoND_ThreeD.xml

   echo -e "\tSetup the build area [ source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora ]" 2>&1 | tee -a $envlog 
   source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora

   echo -e "\tRun the pandora workflow:" 2>&1 | tee -a $envlog
   echo -e "\t\t[ ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${IN_FILE} -j both -M -N]" 2>&1 | tee -a $envlog
   ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${FLOW_OUTPUT_FILE} -j both -M 

   if [ -f "LArRecoND.root" ]; then
      mv LArRecoND.root ${PANDORA_OUTPUT_DATAFILE}
      mv ${PANDORA_OUTFILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${PANDORA_OUTPUT_DATAFILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi
   
   rm -f MCHierarchy.root
   rm -f EventHierarchy.root
   rm -f ${FLOW_OUTPUT_FILE}

   CREATED_FILES+=("${PANDORA_OUTPUT_DATAFILE}")

   cd ${WORKSPACE}
   echo -e "Exit executing the pandora workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++
# create an output directory
#+++++++++++++++++++++++++++++++++++++++++
cd ${WORKSPACE}
export OUTFILES_DIR=${WORKSPACE}
echo -e "The output files are placed in the directory [$OUTFILES_DIR]\n" 2>&1 | tee -a $envlog
if [ ${DEBUG_SUBMISSION_SCRIPT} -eq 1 ]; then
   ls -lhrt ${OUTFILES_DIR} 2>&1 | tee -a $envlog
fi


#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog
execute_hdf5_root_workflow
execute_pandora_workflow
WORKFLOW+=("pandora")
export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="pandora-reconstruction"


#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file


#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands



######################################
#
# END OF RUNNING NDLAr PANDORA JOBS
#
######################################

exit 0