justIN           Dashboard       Workflows       Jobs       AWT       Sites       Storages       Docs       Login

Workflow 1576, Stage 1

Priority50
Processors1
Wall seconds3600
Image/cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest
RSS bytes2097152000 (2000 MiB)
Max distance for inputs102.0
Enabled input RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled output RSEs CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC
Enabled sites US_NERSC-CPU
Scopetestpro
Events for this stage

Output patterns

 DestinationPatternLifetimeFor next stageRSE expression
1Rucio testpro:T3_US_NERSC-fnal-w1576s1p1*.log2592000False
2Rucio testpro:T3_US_NERSC-fnal-w1576s1p2*.root2592000False

Environment variables

NameValue
DATA_STREAMreco
DATA_TIERcaf
DATA_TYPEdata
DEBUG_SUBMISSION_SCRIPT0
DETECTOR_CONFIGproto_nd
END_POSITIONNone
JOBSCRIPT_TEST0
MX2_WORKFLOW_ID772
NEVENTS-1
RUN_CAF_MX20
RUN_CAF_PANDORA0
RUN_CAF_PANDORA_MX21
RUN_CAF_PANDORA_SPINE0
RUN_CAF_PANDORA_SPINE_MX20
RUN_CAF_SPINE0
RUN_CAF_SPINE_MX20
RUN_PERIODrun1
SPINE_WORKFLOW_ID1
START_POSITIONNone
TWOBYTWO_RELEASEv1.5.0
USERduneproshift

File states

Total filesFindingUnallocatedAllocatedOutputtingProcessedNot foundFailed
1300001201

Job states

TotalSubmittedStartedProcessingOutputtingFinishedNotusedAbortedStalledJobscript errorOutputting failedNone processed
480000300108000
Files processed00112233445566778899101011111212Aug-19 05:00Aug-19 06:00Aug-19 07:00Files processedBin start timesNumber per binUS_NERSC-CPU
Replicas per RSE13380.00057375369.7499999985656Replicas per RSET3_US_NERSC (100%)

RSEs used

NameInputsOutputs
T3_US_NERSC2884

Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)

File reset events, by site

SiteAllocatedOutputting
US_NERSC-CPU150

Jobscript

#!/bin/bash

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
# 	This script for running the pandora workflow is based on the data production
# 	development by Matt Kramer (https://github.com/DUNE/2x2_sim/blob/feature_spine_on_data/run-cafmaker)
# 
#	Starting on July 1, 2025, please use the software deployed on dune cvmfs repository
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


#++++++++++++++++++++++++++++++++++++++++
# the script with common functions
#++++++++++++++++++++++++++++++++++++++++
source /cvmfs/dune.opensciencegrid.org/dunend/2x2/releases/${TWOBYTWO_RELEASE}/ndlar_prod_scripts/ND_Production/toolbox/scripts/NDUtilsForJustin.sh


#++++++++++++++++++++++++++++++++++++++++++
# sanity check
#++++++++++++++++++++++++++++++++++++++++++
if [[ "${DATA_TIER}" != "caf" ]]; then
   echo -e "This script [$(basename $BASH_SOURCE)] submits the CAF analysis jobs. Please see the help menu. The data tier is not defined correctly." 
   exit 0
fi


#+++++++++++++++++++++++++++++++++++++++++
# environment variables
#+++++++++++++++++++++++++++++++++++++++++
echo -e "\tThe SPINE JustIN workflow id is [ ${SPINE_WORKFLOW_ID} ]\n" 2>&1 | tee -a $envlog
echo -e "\tThe Mx2 JustIN workflow id is [ ${MX2_WORKFLOW_ID} ]\n" 2>&1 | tee -a $envlog


#++++++++++++++++++++++++++++++++++++++++
# Begin JustIN
#++++++++++++++++++++++++++++++++++++++++
justin_begin_of_job_commands


#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# containers to store the parent and child files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
PARENT_FILES=("${did}")
MATCHED_FILES=()
CREATED_FILES=()


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# get the file namespace
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++
get_namespace()
{
   filename=$1

   if [[ "${filename}" == *"dst"* ]]; then
      echo "neardet-2x2-minerva"
   elif [[ "${filename}" == *SPINE* ]]; then
      echo "neardet-2x2-lar"
   fi

}	


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Get the matching files
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
get_matching_files()
{
   echo -e "Downloading the matching files for the cafmaker workflow." 2>&1 | tee -a $envlog
   (
      source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh 
      setup python ${PYTHON_VERSION}

      echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/GetInputList.py --file=${did} ${MATCHING_OPTION} ].\n" 2>&1 | tee -a $envlog 
      python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/GetInputList.py --file=${did} ${MATCHING_OPTION} 2>&1 | tee -a $envlog
   )
  
   namespace="downloads" 
   if [ ! -d ${namespace} ]; then
      echo -e "\tFailed to get the matching ${MATCHED_TYPE} files.\n" 2>&1 | tee -a $envlog
      exit 0
   else 
      cd ${namespace} 
      for filename in * ;
      do
          read -r -a file_namespace <<< "$(get_namespace ${filename})"
          PARENT_FILES+=("${file_namespace}:${filename}")
          MATCHED_FILES+=("${filename}")
      done 
   fi
 
   echo -e "\tThe parent files are [${PARENT_FILES[@]}].\n" 2>&1 | tee -a $envlog
   echo -e "\t\tThe matching files are [${MATCHED_FILES[@]}].\n" 2>&1 | tee -a $envlog

   cd ${WORKSPACE}
   mv ${namespace}/* ${WORKSPACE}/
   rm -rf ${namespace}
}


#+++++++++++++++++++++++++++++++++++++++++++++
# parse the matching minvera file
#+++++++++++++++++++++++++++++++++++++++++++++
parse_matching_mx2_file()
{
   echo -e "Parse the matching mx2 files using the input file metadata." 2>&1 | tee -a $envlog
   cd ${WORKSPACE}
   MX2_FILENAME=`ls *dst*root`

   (
       source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
       setup python ${PYTHON_VERSION}
       setup root ${ROOT_VERSION} -q ${ROOT_QUALIFIER}   
   
       echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/ParseMatchedMx2Data.py --input_file=${did} --minerva_file=${MX2_FILENAME} ].\n" 2>&1 | tee -a $envlog
       python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/ParseMatchedMx2Data.py --input_file=${did} --minerva_file=${MX2_FILENAME}
   ) 

   namespace="matched_mx2"
   if [ ! -d ${namespace} ]; then
      echo -e "\tFailed to get the parsed mx2 matching file.\n" 2>&1 | tee -a $envlog
      exit 0
   else
      TMP_ARRAY=("${MATCHED_FILES[@]}")
      unset MATCHED_FILES

      for filename in "${TMP_ARRAY[@]}" ; 
      do
         if [[ "${filename}" != "${MX2_FILENAME}" ]]; then
            MATCHED_FILES+=("${filename}")
         fi
      done
      
      cd ${namespace}
      UPDATED_MX2_FILE=`ls *.root`
      MATCHED_FILES+=("${UPDATED_MX2_FILE}")      

      cd ${WORKSPACE}
      mv ${namespace}/* ${WORKSPACE}/
      rm -rf ${namespace}
   fi

   echo -e "Completed parsing the matching mx2 files using the input file metadata." 2>&1 | tee -a $envlog
}


#+++++++++++++++++++++++++++++++++++++++++
# Run the cafmaker workflow
#+++++++++++++++++++++++++++++++++++++++++
execute_cafmaker_workflow() 
{
   echo -e "Enter executing the caf maker workflow for data stream [${DATA_STREAM}] and input file [${INPUT_FILE}]" 2>&1 | tee -a $envlog
   cd ${WORKSPACE}

   DATA_FILES=""
   for filename in "${MATCHED_FILES[@]}" ;
   do 
       DATA_FILES+="${filename}"
       DATA_FILES+=","
   done
   DATA_FILES+="${INPUT_FILE}"
   echo -e "\tThe input files are [${DATA_FILES}]" 2>&1 | tee -a $envlog

   if [[ "${RUN_CAF_MX2}" == "1" ]]; then
      IFS='_' read -a flist <<< "${INPUT_FILE}"
      CAF_OUTPUT_FILE="${flist[0]}_${flist[1]}_${flist[2]}_${flist[3]}_${flist[4]}_$(date +'%y%m%d%H%M%S')_CDT.CAF.root"
   else 
      IFS='-' read -a flist <<< "${INPUT_FILE}" 
      CAF_OUTPUT_FILE="${flist[0]}-${flist[1]}-$(date +'%Y_%m_%d_%H_%M_%S')_CDT.CAF.root"
   fi
   echo -e "\tThe output caf file name is [${CAF_OUTPUT_FILE}]" 2>&1 | tee -a $envlog

   (
       source ${CVMFS_WORKING_DIR}/cafmaker/ND_CAFMaker/ndcaf_setup.sh 

       echo -e "\tRunning the command [ python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/MakeCafFhiclFile.py --infiles=${DATA_FILES} --outfile=${CAF_OUTPUT_FILE} ].\n" 2>&1 | tee -a $envlog
       python ${CVMFS_WORKING_DIR}/ndlar_prod_scripts/ND_Production/toolbox/scripts/MakeCafFhiclFile.py --infiles="${DATA_FILES}" --outfile=${CAF_OUTPUT_FILE}

       echo -e "\tRunning the command [ export CAFFCLFILE=`ls *.fcl` ].\n" 2>&1 | tee -a $envlog
       export CAFFCLFILE=`ls *.fcl`

       echo -e "\tRunning the command [ makeCAF --fcl=${CAFFCLFILE} ].\n" 2>&1 | tee -a $envlog
       makeCAF --fcl=${CAFFCLFILE}
   )

   if [ ! -f ${CAF_OUTPUT_FILE} ]; then
      echo -e "FATAL::The file [${CAF_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi

   CAF_FLAT_OUTPUT_FILE="${CAF_OUTPUT_FILE/.CAF.root/.CAF.flat.root}"
   if [ ! -f ${CAF_FLAT_OUTPUT_FILE} ]; then
      echo -e "FATAL::The file [${CAF_FLAT_OUTPUT_FILE}] does not exist! Will not continue." 2>&1 | tee -a $envlog
      exit 1
   fi

   CREATED_FILES+=("${CAF_OUTPUT_FILE}")
   CREATED_FILES+=("${CAF_FLAT_OUTPUT_FILE}")
 
   echo -e "Exit executing the caf maker workflow for data stream [${DATA_STREAM}]\n" 2>&1 | tee -a $envlog
}


#++++++++++++++++++++++++++++++++++++++
# execute the jobs
#+++++++++++++++++++++++++++++++++++++
echo -e "\n\n" 2>&1 | tee -a $envlog

if [[ "${RUN_CAF_PANDORA_SPINE_MX2}" == "1" ]]; then 
   export MATCHED_TYPE="spine and mx2"
   export MATCHING_OPTION="--spine --mx2 --spine_justin=${SPINE_WORKFLOW_ID} --mx2_justin=${MX2_WORKFLOW_ID}"
   get_matching_files
   parse_matching_mx2_file

elif [[ "${RUN_CAF_PANDORA_SPINE}" == "1" ]]; then
     export MATCHED_TYPE="spine"
     export MATCHING_OPTION="--spine --spine_justin=${SPINE_WORKFLOW_ID}"
     get_matching_files

elif [[ "${RUN_CAF_PANDORA_MX2}" == "1" || "${RUN_CAF_SPINE_MX2}" == "1" ]]; then
     export MATCHED_TYPE="mx2"
     export MATCHING_OPTION="--mx2  --mx2_justin=${MX2_WORKFLOW_ID}"
     get_matching_files
     parse_matching_mx2_file
fi

execute_cafmaker_workflow

WORKFLOW+=("cafmaker")
WORKFLOW+=("cafmaker_flat")

export NAMESPACE="neardet-2x2-lar"
export APPLICATION_DATA_TIER="caf-analysis"


#++++++++++++++++++++++++++++++++++++++++
# create metadata json file
#++++++++++++++++++++++++++++++++++++++++
create_metadata_file


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# remove all download matching files, do not want files to be transfer to rucio storage element
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
for filename in *.root ;
do 
    if [[ "${filename}" != *"CAF"* ]]; then
       echo -e "\tRemoving the filename [${filename}]\n" 2>&1 | tee -a $envlog
       rm ${filename}
    fi
done   


#+++++++++++++++++++++++++++++++++++++++
# End of justin job running
#+++++++++++++++++++++++++++++++++++++++
justin_end_of_job_commands



######################################
#
# END OF RUNNING NDLAr CAFMAKER JOBS
#
######################################

exit 0
justIN time: 2025-09-19 05:14:51 UTC       justIN version: 01.05.00