Workflow 11984, Stage 1
| Workflow | 11984 |
| Campaign | 382 |
| Priority | 50 |
| Processors | 1 |
| Wall seconds | 80000 |
| Image | /cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest |
| RSS bytes | 5241831424 (4999 MiB) |
| Max distance for inputs | 100.0 |
| Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
| Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
| Enabled sites |
BR_CBPF, CA_SFU, CA_Victoria, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Glasgow, UK_Imperial, UK_Lancaster, UK_Liverpool, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_BNL, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_NotreDame, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
| Scope | usertests |
| Events for this stage |
Output patterns
| | Destination | Pattern | Lifetime | For next stage | RSE expression |
|---|
| 1 | https://fndcadoor.fnal.gov:2880/dune/scratch/users/jjo/fnal/11984/1 | out_*.tgz | | | |
Environment variables
| Name | Value |
|---|
| INPUT_TAR_DIR_LOCAL | /cvmfs/fifeuser4.opensciencegrid.org/sw/dune/a4216cf97a94593d2b86b6ceaaa9de83d513111b |
| NUM_EVENTS | 10 |
File states
| Total files | Finding | Unallocated | Allocated | Outputting | Processed | Not found | Failed |
|---|
|
| 1000 | 0 | 0 | 0 | 0 | 1000 | 0 | 0 |
Job states
| Total | Submitted | Started | Processing | Outputting | Finished | Notused | Aborted | Stalled | Jobscript error | Outputting failed | None processed |
|---|
| 1385 | 0 | 0 | 0 | 0 | 1297 | 0 | 0 | 21 | 67 | 0 | 0 |
RSEs used
| Name | Inputs | Outputs |
|---|
| MONTECARLO | 1084 | 0 |
| None | 0 | 1000 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
File reset events, by site
| Site | Allocated | Outputting |
|---|
| CERN | 7 | 0 |
| CA_Victoria | 6 | 0 |
| ES_PIC | 3 | 0 |
| CZ_FZU | 1 | 0 |
Jobscript
#!/bin/bash
#
# fdhd_test.jobscript
# Simple testing jobscript to run a short FD-HD chain
#
# ---- Setup environment ----
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
# Use requested version and qualifier
setup dunesw v10_16_00d00 -q e26:prof
if [ $? -ne 0 ]; then
echo "Error setting up dunesw v10_16_00d00"
exit 1
fi
# Prefer uploaded FHiCLs and WireCell configs if provided
if [ -n "${INPUT_TAR_DIR_LOCAL:-}" ]; then
# Tell art/lar where to find .fcl files
export FW_SEARCH_PATH="${INPUT_TAR_DIR_LOCAL}/cffm-if/dune10kt-1x2x6:${FW_SEARCH_PATH}"
export FCL_SEARCH_PATH="${INPUT_TAR_DIR_LOCAL}/cffm-if/dune10kt-1x2x6:${FCL_SEARCH_PATH}"
export FHICL_FILE_PATH="${INPUT_TAR_DIR_LOCAL}/cffm-if/dune10kt-1x2x6:${FHICL_FILE_PATH}"
# WireCell lookup
export WIRECELL_PATH="${INPUT_TAR_DIR_LOCAL}/cffm-if/dune10kt-1x2x6:${WIRECELL_PATH}"
fi
echo "FW_SEARCH_PATH=${FW_SEARCH_PATH}"
echo "FCL_SEARCH_PATH=${FCL_SEARCH_PATH}"
echo "FHICL_FILE_PATH=${FHICL_FILE_PATH}"
echo "WIRECELL_PATH=${WIRECELL_PATH}"
# Parallelism
if [ -z "${JUSTIN_PROCESSORS}" ]; then
JUSTIN_PROCESSORS=1
fi
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
# ---- Get one input file (MC counter or real file, depending on submission) ----
did_pfn_rse="$($JUSTIN_PATH/justin-get-file)"
if [ -z "${did_pfn_rse}" ] ; then
echo "No unprocessed files available, exiting."
exit 0
fi
did=$(echo "$did_pfn_rse" | cut -f1 -d' ')
pfn=$(echo "$did_pfn_rse" | cut -f2 -d' ')
rse=$(echo "$did_pfn_rse" | cut -f3 -d' ')
name_only=$(echo "$did" | cut -f2 -d':' )
echo "Processing file: ${pfn}"
echo "RSE: ${rse}"
echo "DID: ${did}"
# ---- Set event count ----
nevents="${NUM_EVENTS:-10}"
# ---- Timestamp for unique output names ----
now=$(date -u +"%Y%m%dT%H%M%SZ")
jobid=$(echo "${JUSTIN_JOBSUB_ID:-1}" | cut -f1 -d'@' | sed -e "s/\./_/")
stageid=${JUSTIN_STAGE_ID:-1}
# ---- LArSoft chain ----
# 1. GEN (pure MC)
echo "Running: GEN stage ..."
ls -lh
lar -n "${nevents}" -c gen_genie.fcl -o gen.root
if [ $? -ne 0 ]; then echo "GEN failed"; exit 1; fi
# 2. G4
echo "Running: G4 stage ..."
ls -lh
lar -c g4.fcl -s gen.root -o g4.root
if [ $? -ne 0 ]; then echo "G4 failed"; exit 1; fi
# 3. WireCell Sim SP
echo "Running: WC SIM SP stage ..."
ls -lh
label_tag="${name_only}_${jobid}_${stageid}_${now}"
outdir="out_${label_tag}"
mkdir -p "${outdir}"
outFile="${label_tag}_sp.root"
lar -c wcls_sim_sp.fcl -s g4.root -o "${outdir}/${outFile}"
if [ $? -ne 0 ]; then echo "WCLS failed"; exit 1; fi
# 4. Truth labelling
echo "Running: True labelling stage ..."
ls -lh
truthdir="truth_${label_tag}"
mkdir -p "${truthdir}"
pushd "${truthdir}"
lar -c wcls-labelling2d_sep.fcl -s "../${outdir}/${outFile}"
rc=$?
if [ $rc -ne 0 ]; then
echo "Truth labeling failed"
popd
exit 1
fi
echo "Truth dir contents after lar:"
ls -lh
shopt -s nullglob
for f in g4-tru-anode*.h5; do
mv -v "$f" "../${outdir}/${label_tag}_${f}"
done
shopt -u nullglob
popd
echo "Per-job output directory contents:"
ls -lh "${outdir}" || true
# ---- Package per-job outputs into a tarball for stage-out ----
tarball="${outdir}.tgz"
echo "Creating tarball ${tarball} from ${outdir}"
tar -czf "${tarball}" "${outdir}"
echo "Tarball contents:"
tar -tzf "${tarball}"
ls -lh "${tarball}"
# ---- Mark as processed ----
echo "${pfn}" > justin-processed-pfns.txt
echo "Job completed successfully."
exit 0