Workflow 1470, Stage 1

Priority	50
Processors	1
Wall seconds	3600
Image	/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes	2097152000 (2000 MiB)
Max distance for inputs	102.0
Enabled input RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites	US_NERSC-CPU
Scope	testpro
Events for this stage

Output patterns

	Destination	Pattern	Lifetime	For next stage	RSE expression
1	Rucio testpro:T3_US_NERSC-fnal-w1470s1p1	*.log	2592000	False
2	Rucio testpro:T3_US_NERSC-fnal-w1470s1p2	*.root	2592000	False

Environment variables

Name	Value
DATA_STREAM	calibrated
DATA_TIER	reco_pandora
DATA_TYPE	data
DEBUG_SUBMISSION_SCRIPT	0
DETECTOR_CONFIG	proto_nd
END_POSITION	None
JOBSCRIPT_TEST	0
MX2_WORKFLOW_ID	1
NEVENTS	-1
RUN_CAF_MX2	0
RUN_CAF_PANDORA	0
RUN_CAF_PANDORA_MX2	0
RUN_CAF_PANDORA_SPINE	0
RUN_CAF_PANDORA_SPINE_MX2	0
RUN_CAF_SPINE	0
RUN_CAF_SPINE_MX2	0
RUN_PERIOD	run1
SPINE_WORKFLOW_ID	1
START_POSITION	None
TWOBYTWO_RELEASE	v1.5.0
USER	duneproshift

File states

Total files	Finding	Unallocated	Allocated	Outputting	Processed	Not found	Failed
13	0	0	0	0	13	0	0

Job states

Total	Submitted	Started	Processing	Outputting	Finished	Notused	Aborted	Stalled	Jobscript error	Outputting failed	None processed
25	0	0	0	0	22	0	3	0	0	0	0

RSEs used

Name	Inputs	Outputs
T3_US_NERSC	16	57

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

Site	Allocated	Outputting
US_NERSC-CPU	3	0

Jobscript

#!/bin/bash

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# 	This script for running the pandora workflow is based on the data production
# 	development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-pandora)
# 
#	Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh


#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "reco_pandora" ]]; then
   echo -e "This script [$(basename $BASH_SOURCE)] submits the Pandora reconstruction jobs. Please see the help menu. The data tier is not defined correctly." 
   exit 0
fi
 

#++++++++++++++++++++++++++++++++++++++++
# Begin JustIN
#++++++++++++++++++++++++++++++++++++++++
justin_begin_of_job_commands


#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
CREATED_FILES=()


#+++++++++++++++++++++++++++++++++++++++++
# Run the hdf5 to root workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_hdf5_root_workflow() {
   echo -e "Enter executing the hdf5 to root  workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog

   IFS='-' read -a flist <<< "${INPUT_FILE}"
   TMP_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.FLOW.ASSOC.0.root"
   ROOT_OUTPUT_FILE="${TMP_OUTPUT_FILE/.ASSOC.0.root/.ASSOC.root}" 

   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/miniforge3/etc/profile.d/conda.sh
   source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/conda_envs/conda.envs.sh

   export CONVERT_DATA_WORKFLOW=${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/h5_to_root_ndlarflow.py
   if [ ! -f "${CONVERT_DATA_WORKFLOW}" ]; then
      echo -e "\tCannot run the convert raw data to packet data. The file [${CONVERT_DATA_WORKFLOW}] does not exist." 2>&1 | tee -a $envlog
      exit 0
   fi   

   isMC=0 
   isFinal=1
   isData=1
   if [[ "${DATA_TYPE}" == "mc" ]]; then 
      isData=0
      isMC=1
   fi

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION} 

   conda activate ndlar_flow_${TWOBYTWO_RELEASE}
   if [ -z "${CONDA_DEFAULT_ENV}" ]; then
       echo -e "The conda virtual environment is not activated [ ndlar_flow_${TWOBYTWO_RELEASE} ]. exiting." 2>&1 | tee -a $envlog
       exit 0
   fi

   echo -e "\tThe current conda virtual environment is activated: [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   echo -e "\tRunning the pandora workflow in converting hdf5 to root." 2>&1 | tee -a $envlog 
   echo -e "\t[ python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE} ]" 2>&1 | tee -a $envlog
   python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE}

   echo -e "\nExit the conda environment [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
   conda deactivate
   
   if [ ! -f ${TMP_OUTPUT_FILE} ]; then
      echo -e "\tCannot continue. The file [${TMP_OUTPUT_FILE}] was not created." 2>&1 | tee -a $envlog
      exit 0
   fi

   if [ ! -f "${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C" ]; then
      echo -e "\tCannnot run the final step in the root conversion workflow. The file [${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C] does not exist." 2>&1 | tee -a $envlog
      exit 0
   else
      (
        setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}
        cp ${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C .
        echo -e "\t[ root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\) ]" 2>&1 | tee -a $envlog  
        root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\)
      )
   fi

   rm -f ${TMP_OUTPUT_FILE}
   rm -f rootToRootConversion*
   rm -f AutoDict*
   
   export FLOW_OUTPUT_FILE="${ROOT_OUTPUT_FILE}"
   if [ -f ${FLOW_OUTPUT_FILE} ]; then
      mv ${FLOW_OUTPUT_FILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${FLOW_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1 
   fi

   cd ${WORKSPACE}
   echo -e "Exit executing the hdf5 to root  workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++++++++++++
# Run the pandora workflow for ndlar root files
#+++++++++++++++++++++++++++++++++++++++++++++++++++
execute_pandora_workflow() {
   echo -e "Enter executing the pandora workflow for data stream [${DATA_STREAM}]" 2>&1 | tee -a $envlog

   export PANDORA_OUTPUT_DATAFILE="${FLOW_OUTPUT_FILE/FLOW.ASSOC.root/LAR_RECO_ND.root}"
   echo -e "\tThe pandora output file name is [${PANDORA_OUTPUT_DATAFILE}]"  2>&1 | tee -a $envlog

   source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
   setup gcc ${GCC_VERSION}
   setup tbb ${TBB_VERSION} -q ${TBB_QUALIFIER}

   export LD_LIBRARY_PATH=${CVMFS_WORKING_DIR}/pandora/PandoraSDK/lib:${CVMFS_WORKING_DIR}/pandora/LArContent/lib:${CVMFS_WORKING_DIR}/pandora/PandoraMonitoring/lib:${CVMFS_WORKING_DIR}/pandora/LArRecoND/lib:${LD_LIBRARY_PATH}

   PANDORA_INPUT_FORMAT=SP
   if [[ "${DATA_TYPE}" == "mc" ]]; then
      PANDORA_INPUT_FORMAT=SPMC
   fi

   export PANDORA_DET_GEOM=""
   if [[ "${DETECTOR_CONFIG}" == "proto_nd" ]]; then
      export PANDORA_DET_GEOM=${CVMFS_WORKING_DIR}/pandora/LArRecoND/Merged2x2MINERvA_v4_withRock.root
   else
      echo -e "FATAL::The detector [${DETECTOR_CONFIG}] root file does not exist. Cannot continue with executing the Pandora reconstruction.\n" 2>&1 | tee -a $envlog
      exit 0
   fi

   export PANDORA_SETTINGS=${CVMFS_WORKING_DIR}/pandora/LArRecoND/settings/PandoraSettings_LArRecoND_ThreeD.xml

   echo -e "\tSetup the build area [ source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora ]" 2>&1 | tee -a $envlog 
   source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora

   echo -e "\tRun the pandora workflow:" 2>&1 | tee -a $envlog
   echo -e "\t\t[ ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${IN_FILE} -j both -M -N]" 2>&1 | tee -a $envlog
   ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${FLOW_OUTPUT_FILE} -j both -M 

   if [ -f "LArRecoND.root" ]; then
      mv LArRecoND.root ${PANDORA_OUTPUT_DATAFILE}
      mv ${PANDORA_OUTFILE} ${OUTFILES_DIR}/
   else 
      echo -e "FATAL::The file [${PANDORA_OUTPUT_DATAFILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi
   
   rm -f MCHierarchy.root
   rm -f EventHierarchy.root
   rm -f ${FLOW_OUTPUT_FILE}

   CREATED_FILES+=("${PANDORA_OUTPUT_DATAFILE}")

   cd ${WORKSPACE}
   echo -e "Exit executing the pandora workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++
# create an output directory
#+++++++++++++++++++++++++++++++++++++++++
cd ${WORKSPACE}
export OUTFILES_DIR=${WORKSPACE}
echo -e "The output files are placed in the directory [$OUTFILES_DIR]\n" 2>&1 | tee -a $envlog
if [ ${DEBUG_SUBMISSION_SCRIPT} -eq 1 ]; then
   ls -lhrt ${OUTFILES_DIR} 2>&1 | tee -a $envlog
fi


#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog
execute_hdf5_root_workflow
execute_pandora_workflow
WORKFLOW+=("pandora")
export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="pandora-reconstruction"


#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file


#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands



######################################
#
# END OF RUNNING NDLAr PANDORA JOBS
#
######################################

exit 0