Workflow 1741, Stage 1

Workflow	1741
Priority	50
Processors	1
Wall seconds	3600
Image	/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes	2097152000 (2000 MiB)
Max distance for inputs	102.0
Enabled input RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs	CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites	US_NERSC-CPU
Scope	testpro
Events for this stage

Output patterns

	Destination	Pattern	Lifetime	For next stage	RSE expression
1	Rucio testpro:T3_US_NERSC-fnal-w1741s1p1	*.log	2592000	False
2	Rucio testpro:T3_US_NERSC-fnal-w1741s1p2	*.root	2592000	False

Environment variables

Name	Value
DATA_STREAM	reco
DATA_TIER	caf
DATA_TYPE	data
DEBUG_SUBMISSION_SCRIPT	0
DETECTOR_CONFIG	proto_nd
END_POSITION	None
JOBSCRIPT_TEST	0
MX2_WORKFLOW_ID	772
NEVENTS	-1
RUN_CAF_MX2	0
RUN_CAF_PANDORA	0
RUN_CAF_PANDORA_MX2	1
RUN_CAF_PANDORA_SPINE	0
RUN_CAF_PANDORA_SPINE_MX2	0
RUN_CAF_SPINE	0
RUN_CAF_SPINE_MX2	0
RUN_PERIOD	run1
SPINE_WORKFLOW_ID	1
START_POSITION	None
TWOBYTWO_RELEASE	v1.5.0
USER	duneproshift

File states

Total files	Finding	Unallocated	Allocated	Outputting	Processed	Not found	Failed
13	0	0	0	0	12	0	1

Job states

Total	Submitted	Started	Processing	Outputting	Finished	Notused	Aborted	Stalled	Jobscript error	Outputting failed	None processed
47	0	0	0	0	30	0	11	6	0	0	0

RSEs used

Name	Inputs	Outputs
T3_US_NERSC	29	84

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

Site	Allocated	Outputting
US_NERSC-CPU	16	0

Jobscript

#!/bin/bash

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# 	This script for running the pandora workflow is based on the data production
# 	development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-cafmaker)
# 
#	Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh


#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "caf" ]]; then
   echo -e "This script [$(basename $BASH_SOURCE)] submits the CAF analysis jobs. Please see the help menu. The data tier is not defined correctly." 
   exit 0
fi


#+++++++++++++++++++++++++++++++++++++++++
# environment variables
#+++++++++++++++++++++++++++++++++++++++++
echo -e "\tThe SPINE JustIN workflow id is [ ${SPINE_WORKFLOW_ID} ]\n" 2>&1 | tee -a $envlog
echo -e "\tThe Mx2 JustIN workflow id is [ ${MX2_WORKFLOW_ID} ]\n" 2>&1 | tee -a $envlog


#++++++++++++++++++++++++++++++++++++++++
# Begin JustIN
#++++++++++++++++++++++++++++++++++++++++
justin_begin_of_job_commands


#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
MATCHED_FILES=()
CREATED_FILES=()


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# get the file namespace
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
get_namespace()
{
   filename=$1

   if [[ "${filename}" == *"dst"* ]]; then
      echo "neardet-2x2-minerva"
   elif [[ "${filename}" == *SPINE* ]]; then
      echo "neardet-2x2-lar"
   fi

}	


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Get the matching files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
get_matching_files()
{
   echo -e "Downloading the matching files for the cafmaker workflow." 2>&1 | tee -a $envlog
   (
      source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh 
      setup python ${PYTHON_VERSION}

      echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/GetInputList.py --file=${did} ${MATCHING_OPTION} ].\n" 2>&1 | tee -a $envlog 
      python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/GetInputList.py --file=${did} ${MATCHING_OPTION} 2>&1 | tee -a $envlog
   )
  
   namespace="downloads" 
   if [ ! -d ${namespace} ]; then
      echo -e "\tFailed to get the matching ${MATCHED_TYPE} files.\n" 2>&1 | tee -a $envlog
      exit 0
   else 
      cd ${namespace} 
      for filename in * ;
      do
          read -r -a file_namespace <<< "$(get_namespace ${filename})"
          PARENT_FILES+=("${file_namespace}:${filename}")
          MATCHED_FILES+=("${filename}")
      done 
   fi
 
   echo -e "\tThe parent files are [${PARENT_FILES[@]}].\n" 2>&1 | tee -a $envlog
   echo -e "\t\tThe matching files are [${MATCHED_FILES[@]}].\n" 2>&1 | tee -a $envlog

   cd ${WORKSPACE}
   mv ${namespace}/* ${WORKSPACE}/
   rm -rf ${namespace}
}


#+++++++++++++++++++++++++++++++++++++++++++++
# parse the matching minvera file
#+++++++++++++++++++++++++++++++++++++++++++++
parse_matching_mx2_file()
{
   echo -e "Parse the matching mx2 files using the input file metadata." 2>&1 | tee -a $envlog
   cd ${WORKSPACE}
   MX2_FILENAME=`ls *dst*root`

   (
       source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
       setup python ${PYTHON_VERSION}
       setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}   
   
       echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/ParseMatchedMx2Data.py --input_file=${did} --minerva_file=${MX2_FILENAME} ].\n" 2>&1 | tee -a $envlog
       python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/ParseMatchedMx2Data.py --input_file=${did} --minerva_file=${MX2_FILENAME}
   ) 

   namespace="matched_mx2"
   if [ ! -d ${namespace} ]; then
      echo -e "\tFailed to get the parsed mx2 matching file.\n" 2>&1 | tee -a $envlog
      exit 0
   else
      TMP_ARRAY=("${MATCHED_FILES[@]}")
      unset MATCHED_FILES

      for filename in "${TMP_ARRAY[@]}" ; 
      do
         if [[ "${filename}" != "${MX2_FILENAME}" ]]; then
            MATCHED_FILES+=("${filename}")
         fi
      done
      
      cd ${namespace}
      UPDATED_MX2_FILE=`ls *.root`
      MATCHED_FILES+=("${UPDATED_MX2_FILE}")      

      cd ${WORKSPACE}
      mv ${namespace}/* ${WORKSPACE}/
      rm -rf ${namespace}
   fi

   echo -e "Completed parsing the matching mx2 files using the input file metadata." 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++
# Run the cafmaker workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_cafmaker_workflow() 
{
   echo -e "Enter executing the caf maker workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog
   cd ${WORKSPACE}

   DATA_FILES=""
   for filename in "${MATCHED_FILES[@]}" ;
   do 
       DATA_FILES+="${filename}"
       DATA_FILES+=","
   done
   DATA_FILES+="${INPUT_FILE}"
   echo -e "\tThe input files are [${DATA_FILES}]" 2>&1 | tee -a $envlog

   if [[ "${RUN_CAF_MX2}" == "1" ]]; then
      IFS='_' read -a flist <<< "${INPUT_FILE}"
      CAF_OUTPUT_FILE="${flist[0]}_${flist[1]}_${flist[2]}_${flist[3]}_${flist[4]}_$(date +'%y%m%d%H%M%S')_CDT.CAF.root"
   else 
      IFS='-' read -a flist <<< "${INPUT_FILE}" 
      CAF_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.CAF.root"
   fi
   echo -e "\tThe output caf file name is [${CAF_OUTPUT_FILE}]" 2>&1 | tee -a $envlog

   (
       source ${CVMFS_WORKING_DIR}/cafmaker/ND_CAFMaker/ndcaf_setup.sh 

       echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/MakeCafFhiclFile.py --infiles=${DATA_FILES} --outfile=${CAF_OUTPUT_FILE} ].\n" 2>&1 | tee -a $envlog
       python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/MakeCafFhiclFile.py --infiles="${DATA_FILES}" --outfile=${CAF_OUTPUT_FILE}

       echo -e "\tRunning the command [ export CAFFCLFILE=`ls *.fcl` ].\n" 2>&1 | tee -a $envlog
       export CAFFCLFILE=`ls *.fcl`

       echo -e "\tRunning the command [ makeCAF --fcl=${CAFFCLFILE} ].\n" 2>&1 | tee -a $envlog
       makeCAF --fcl=${CAFFCLFILE}
   )

   if [ ! -f ${CAF_OUTPUT_FILE} ]; then
      echo -e "FATAL::The file [${CAF_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi

   CAF_FLAT_OUTPUT_FILE="${CAF_OUTPUT_FILE/.CAF.root/.CAF.flat.root}"
   if [ ! -f ${CAF_FLAT_OUTPUT_FILE} ]; then
      echo -e "FATAL::The file [${CAF_FLAT_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi

   CREATED_FILES+=("${CAF_OUTPUT_FILE}")
   CREATED_FILES+=("${CAF_FLAT_OUTPUT_FILE}")
 
   echo -e "Exit executing the caf maker workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog

if [[ "${RUN_CAF_PANDORA_SPINE_MX2}" == "1" ]]; then 
   export MATCHED_TYPE="spine and mx2"
   export MATCHING_OPTION="--spine --mx2 --spine_justin=${SPINE_WORKFLOW_ID} --mx2_justin=${MX2_WORKFLOW_ID}"
   get_matching_files
   parse_matching_mx2_file

elif [[ "${RUN_CAF_PANDORA_SPINE}" == "1" ]]; then
     export MATCHED_TYPE="spine"
     export MATCHING_OPTION="--spine --spine_justin=${SPINE_WORKFLOW_ID}"
     get_matching_files

elif [[ "${RUN_CAF_PANDORA_MX2}" == "1" || "${RUN_CAF_SPINE_MX2}" == "1" ]]; then
     export MATCHED_TYPE="mx2"
     export MATCHING_OPTION="--mx2  --mx2_justin=${MX2_WORKFLOW_ID}"
     get_matching_files
     parse_matching_mx2_file
fi

execute_cafmaker_workflow

WORKFLOW+=("cafmaker")
WORKFLOW+=("cafmaker_flat")

export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="caf-analysis"


#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# remove all download matching files, do not want files to be transfer to rucio storage element
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
for filename in *.root ;
do 
    if [[ "${filename}" != *"CAF"* ]]; then
       echo -e "\tRemoving the filename [${filename}]\n" 2>&1 | tee -a $envlog
       rm ${filename}
    fi
done   


#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands



######################################
#
# END OF RUNNING NDLAr CAFMAKER JOBS
#
######################################

exit 0