Workflow 105, Stage 1
Priority | 50 |
Processors | 1 |
Wall seconds | 3600 |
Image | /cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest |
RSS bytes | 2097152000 (2000 MiB) |
Max distance for inputs | 102.0 |
Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
Enabled sites |
BR_CBPF, CA_SFU, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Glasgow, UK_Imperial, UK_Lancaster, UK_Liverpool, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
Scope | testpro |
Events for this stage |
Output patterns
| Destination | Pattern | Lifetime | For next stage | RSE expression |
---|
1 | Rucio testpro:DUNE_US_FNAL_DISK_STAGE-fnal-w105s1p1 | *.log | 1209600 | False | |
2 | Rucio testpro:DUNE_US_FNAL_DISK_STAGE-fnal-w105s1p2 | *.root | 1209600 | False | |
Environment variables
Name | Value |
---|
DATA_STREAM | pandora |
DATA_TIER | reco |
DATA_TYPE | data |
DEBUG_SUBMISSION_SCRIPT | 0 |
DETECTOR_CONFIG | proto_nd |
END_POSITION | None |
JOBSCRIPT_TEST | 0 |
NEVENTS | -1 |
RUN_PERIOD | run1 |
START_POSITION | None |
TWOBYTWO_RELEASE | v1.5.0 |
USER | duneproshift |
File states
Total files | Finding | Unallocated | Allocated | Outputting | Processed | Not found | Failed |
---|
|
1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Job states
Total | Submitted | Started | Processing | Outputting | Finished | Notused | Aborted | Stalled | Jobscript error | Outputting failed | None processed |
---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
Jobscript
#!/bin/bash
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# This script for running the pandora workflow is based on the data production
# development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-pandora)
#
# Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "reco" ]]; then
echo -e "This script [$(basename $BASH_SOURCE)] submits the Pandora reconstruction jobs. Please see the help menu. The data tier is not defined correctly."
exit 0
fi
#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
CREATED_FILES=()
#+++++++++++++++++++++++++++++++++++++++++
# Run the hdf5 to root workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_hdf5_root_workflow() {
echo -e "Enter executing the hdf5 to root workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog
IFS='-' read -a flist <<< "${INPUT_FILE}"
TMP_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.FLOW.ASSOC.0.root"
ROOT_OUTPUT_FILE="${TMP_OUTPUT_FILE/.ASSOC.0.root/.ASSOC.root}"
source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/miniforge3/etc/profile.d/conda.sh
source ${CVMFS_TWOBYTWO_DIR}/miniforge/${EXTERNAL_RELEASE}/conda_envs/conda.envs.sh
export CONVERT_DATA_WORKFLOW=${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/h5_to_root_ndlarflow.py
if [ ! -f "${CONVERT_DATA_WORKFLOW}" ]; then
echo -e "\tCannot run the convert raw data to packet data. The file [${CONVERT_DATA_WORKFLOW}] does not exist." 2>&1 | tee -a $envlog
exit 0
fi
isMC=0
isFinal=1
isData=1
if [[ "${DATA_TYPE}" == "mc" ]]; then
isData=0
isMC=1
fi
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup gcc ${GCC_VERSION}
conda activate ndlar_flow_${TWOBYTWO_RELEASE}
if [ -z "${CONDA_DEFAULT_ENV}" ]; then
echo -e "The conda virtual environment is not activated [ ndlar_flow_${TWOBYTWO_RELEASE} ]. exiting." 2>&1 | tee -a $envlog
exit 0
fi
echo -e "\tThe current conda virtual environment is activated: [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
echo -e "\tRunning the pandora workflow in converting hdf5 to root." 2>&1 | tee -a $envlog
echo -e "\t[ python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE} ]" 2>&1 | tee -a $envlog
python3 ${CONVERT_DATA_WORKFLOW} ${INPUT_FILE} ${isData} ${isFinal} ${TMP_OUTPUT_FILE}
echo -e "\nExit the conda environment [${CONDA_DEFAULT_ENV}]" 2>&1 | tee -a $envlog
conda deactivate
if [ ! -f ${TMP_OUTPUT_FILE} ]; then
echo -e "\tCannot continue. The file [${TMP_OUTPUT_FILE}] was not created." 2>&1 | tee -a $envlog
exit 0
fi
if [ ! -f "${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C" ]; then
echo -e "\tCannnot run the final step in the root conversion workflow. The file [${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C] does not exist." 2>&1 | tee -a $envlog
exit 0
else
(
setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}
cp ${CVMFS_WORKING_DIR}/pandora/LArRecoND/ndlarflow/rootToRootConversion.C .
echo -e "\t[ root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\) ]" 2>&1 | tee -a $envlog
root -l -q rootToRootConversion.C+\(${isMC},\"${TMP_OUTPUT_FILE}\",\"${ROOT_OUTPUT_FILE}\"\)
)
fi
rm -f ${TMP_OUTPUT_FILE}
rm -f rootToRootConversion*
rm -f AutoDict*
export FLOW_OUTPUT_FILE="${ROOT_OUTPUT_FILE}"
if [ -f ${FLOW_OUTPUT_FILE} ]; then
mv ${FLOW_OUTPUT_FILE} ${OUTFILES_DIR}/
else
echo -e "FATAL::The file [${FLOW_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
exit 1
fi
cd ${WORKSPACE}
echo -e "Exit executing the hdf5 to root workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}
#+++++++++++++++++++++++++++++++++++++++++++++++++++
# Run the pandora workflow for ndlar root files
#+++++++++++++++++++++++++++++++++++++++++++++++++++
execute_pandora_workflow() {
echo -e "Enter executing the pandora workflow for data stream [${DATA_STREAM}]" 2>&1 | tee -a $envlog
export PANDORA_OUTPUT_DATAFILE="${FLOW_OUTPUT_FILE/FLOW.ASSOC.root/LAR_RECO_ND.root}"
echo -e "\tThe pandora output file name is [${PANDORA_OUTPUT_DATAFILE}]" 2>&1 | tee -a $envlog
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup gcc ${GCC_VERSION}
setup tbb ${TBB_VERSION} -q ${TBB_QUALIFIER}
export LD_LIBRARY_PATH=${CVMFS_WORKING_DIR}/pandora/PandoraSDK/lib:${CVMFS_WORKING_DIR}/pandora/LArContent/lib:${CVMFS_WORKING_DIR}/pandora/PandoraMonitoring/lib:${CVMFS_WORKING_DIR}/pandora/LArRecoND/lib:${LD_LIBRARY_PATH}
PANDORA_INPUT_FORMAT=SP
if [[ "${DATA_TYPE}" == "mc" ]]; then
PANDORA_INPUT_FORMAT=SPMC
fi
export PANDORA_DET_GEOM=""
if [[ "${DETECTOR_CONFIG}" == "proto_nd" ]]; then
export PANDORA_DET_GEOM=${CVMFS_WORKING_DIR}/pandora/LArRecoND/Merged2x2MINERvA_v4_withRock.root
else
echo -e "FATAL::The detector [${DETECTOR_CONFIG}] root file does not exist. Cannot continue with executing the Pandora reconstruction.\n" 2>&1 | tee -a $envlog
exit 0
fi
export PANDORA_SETTINGS=${CVMFS_WORKING_DIR}/pandora/LArRecoND/settings/PandoraSettings_LArRecoND_ThreeD.xml
echo -e "\tSetup the build area [ source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora ]" 2>&1 | tee -a $envlog
source ${CVMFS_WORKING_DIR}/pandora/LArRecoND/scripts/tags.sh ${CVMFS_WORKING_DIR}/pandora
echo -e "\tRun the pandora workflow:" 2>&1 | tee -a $envlog
echo -e "\t\t[ ${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${IN_FILE} -j both -M -N]" 2>&1 | tee -a $envlog
${CVMFS_WORKING_DIR}/pandora/LArRecoND/bin/PandoraInterface -i ${PANDORA_SETTINGS} -r AllHitsSliceNu -f ${PANDORA_INPUT_FORMAT} -g ${PANDORA_DET_GEOM} -e ${FLOW_OUTPUT_FILE} -j both -M
if [ -f "LArRecoND.root" ]; then
mv LArRecoND.root ${PANDORA_OUTPUT_DATAFILE}
mv ${PANDORA_OUTFILE} ${OUTFILES_DIR}/
else
echo -e "FATAL::The file [${PANDORA_OUTPUT_DATAFILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
exit 1
fi
rm -f MCHierarchy.root
rm -f EventHierarchy.root
CREATED_FILES+=("${PANDORA_OUTPUT_DATAFILE}")
cd ${WORKSPACE}
echo -e "Exit executing the pandora workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}
#+++++++++++++++++++++++++++++++++++++++++
# create an output directory
#+++++++++++++++++++++++++++++++++++++++++
cd ${WORKSPACE}
export OUTFILES_DIR=${WORKSPACE}
echo -e "The output files are placed in the directory [$OUTFILES_DIR]\n" 2>&1 | tee -a $envlog
if [ ${DEBUG_SUBMISSION_SCRIPT} -eq 1 ]; then
ls -lhrt ${OUTFILES_DIR} 2>&1 | tee -a $envlog
fi
#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog
execute_hdf5_root_workflow
execute_pandora_workflow
WORKFLOW+=("pandora")
export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="pandora-reconstruction"
#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file
#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands
######################################
#
# END OF RUNNING NDLAr PANDORA JOBS
#
######################################
exit 0