Workflow 3298, Stage 1
| Priority | 50 |
| Processors | 1 |
| Wall seconds | 18000 |
| Image | /cvmfs/singularity.opensciencegrid.org/fermilab/fnal-wn-sl7:latest |
| RSS bytes | 4194304000 (4000 MiB) |
| Max distance for inputs | 100.0 |
| Enabled input RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, MONTECARLO, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
| Enabled output RSEs |
CERN_PDUNE_EOS, DUNE_CA_SFU, DUNE_CERN_EOS, DUNE_ES_PIC, DUNE_FR_CCIN2P3_DISK, DUNE_IN_TIFR, DUNE_IT_INFN_CNAF, DUNE_UK_GLASGOW, DUNE_UK_LANCASTER_CEPH, DUNE_UK_MANCHESTER_CEPH, DUNE_US_BNL_SDCC, DUNE_US_FNAL_DISK_STAGE, FNAL_DCACHE, FNAL_DCACHE_STAGING, FNAL_DCACHE_TEST, NIKHEF, PRAGUE, QMUL, RAL-PP, RAL_ECHO, SURFSARA, T3_US_NERSC |
| Enabled sites |
BR_CBPF, CA_SFU, CERN, CH_UNIBE-LHEP, CZ_FZU, ES_CIEMAT, ES_PIC, FR_CCIN2P3, IT_CNAF, NL_NIKHEF, NL_SURFsara, UK_Bristol, UK_Brunel, UK_Durham, UK_Edinburgh, UK_Glasgow, UK_Lancaster, UK_Liverpool, UK_Manchester, UK_Oxford, UK_QMUL, UK_RAL-PPD, UK_RAL-Tier1, UK_Sheffield, US_Colorado, US_FNAL-FermiGrid, US_FNAL-T1, US_Michigan, US_PuertoRico, US_SU-ITS, US_Swan, US_UChicago, US_UConn-HPC, US_UCSD, US_Wisconsin |
| Scope | usertests |
| Events for this stage |
Output patterns
| | Destination | Pattern | Lifetime | For next stage | RSE expression |
|---|
| 1 | https://fndcadoor.fnal.gov:2880/dune/scratch/users/pgranger/cafs/fnal/03298/1 | caf_fd_hd*.root | | | |
Environment variables
| Name | Value |
|---|
| DUNE_QUALIFIER | e26:prof |
| DUNE_VERSION | v10_10_00d00 |
| FCL_FILE | /cvmfs/fifeuser4.opensciencegrid.org/sw/dune/823ff6a419a8685db70967089497d4f07fb2d380/reco_caf.fcl |
| HAS_ART_OUTPUT | false |
File states
Job states
| Total | Submitted | Started | Processing | Outputting | Finished | Notused | Aborted | Stalled | Jobscript error | Outputting failed | None processed |
|---|
| 16521 | 0 | 0 | 0 | 0 | 13246 | 40 | 276 | 840 | 2119 | 0 | 0 |
RSEs used
| Name | Inputs | Outputs |
|---|
| PRAGUE | 17298 | 0 |
| SURFSARA | 4551 | 0 |
| NIKHEF | 2753 | 0 |
| RAL_ECHO | 1715 | 0 |
| QMUL | 1434 | 0 |
| RAL-PP | 1352 | 0 |
| DUNE_FR_CCIN2P3_DISK | 1061 | 0 |
| DUNE_ES_PIC | 244 | 0 |
| None | 0 | 874 |
Stats of processed input files as CSV or JSON, and of uploaded output files as CSV or JSON (up to 10000 files included)
File reset events, by site
| Site | Allocated | Outputting |
|---|
| UK_RAL-PPD | 609 | 10 |
| UK_Manchester | 597 | 0 |
| CERN | 519 | 0 |
| CZ_FZU | 421 | 0 |
| US_UChicago | 392 | 20 |
| UK_RAL-Tier1 | 281 | 6 |
| ES_PIC | 246 | 1 |
| US_FNAL-FermiGrid | 240 | 10 |
| US_FNAL-T1 | 193 | 0 |
| US_UCSD | 146 | 0 |
| UK_QMUL | 107 | 9 |
| UK_Lancaster | 103 | 0 |
| UK_Oxford | 95 | 0 |
| UK_Durham | 86 | 0 |
| IT_CNAF | 81 | 7 |
| US_Wisconsin | 68 | 0 |
| NL_NIKHEF | 53 | 6 |
| UK_Bristol | 30 | 0 |
| UK_Glasgow | 27 | 0 |
| UK_Sheffield | 10 | 0 |
| BR_CBPF | 3 | 0 |
Jobscript
#!/bin/bash
#
source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh
setup metacat
export METACAT_SERVER_URL=https://metacat.fnal.gov:9443/dune_meta_prod/app
export METACAT_AUTH_SERVER_URL=https://metacat.fnal.gov:8143/auth/dune
##TODO -- write usage
#CAF_FCL="cafmaker_dunevd10kt_1x8x6_3view_30deg_runreco-nuenergy_geov3.fcl"
# RECO_FCL="reco2_atmos_dune10kt_1x2x6_geov5.fcl"
# CAF_FCL="cafmaker_atmos_dune10kt_1x2x6_runreco-nuenergy-nuangular_geov5.fcl"
# echo "Using CAF fcl: ${CAF_FCL}"
# export INPUT_TAR_DIR_LOCAL=${CODE_TAR_DIR_LOCAL}
# if [ ! -z "$FCL_TAR_DIR_LOCAL" ]; then
# echo "Using custom fcls from $FCL_TAR_DIR_LOCAL"
# source ${CODE_TAR_DIR_LOCAL}/*/localProducts*/setup-grid
# mrbslp
# fi
ls -lht $FCL_TAR_DIR_LOCAL
#Setup recent lar software suite
setup dunesw \
"${DUNE_VERSION:-v09_91_01d00}" \
-q "${DUNE_QUALIFIER:-e26:prof}"
#echo "printing env"
#env
if [ -z ${JUSTIN_PROCESSORS} ]; then
JUSTIN_PROCESSORS=1
fi
echo "Justin processors: ${JUSTIN_PROCESSORS}"
export TF_NUM_THREADS=${JUSTIN_PROCESSORS}
export OPENBLAS_NUM_THREADS=${JUSTIN_PROCESSORS}
export JULIA_NUM_THREADS=${JUSTIN_PROCESSORS}
export MKL_NUM_THREADS=${JUSTIN_PROCESSORS}
export NUMEXPR_NUM_THREADS=${JUSTIN_PROCESSORS}
export OMP_NUM_THREADS=${JUSTIN_PROCESSORS}
#
echo "Will use justin-get-file"
now=$(date -u +"%Y%m%dT%H%M%SZ")
for nf in {1..10}
do
DID_PFN_RSE=`$JUSTIN_PATH/justin-get-file`
##Check that any file was returned
if [ "${DID_PFN_RSE}" == "" ] ; then
echo "Could not get file"
# exit 0
continue
fi
FILE=`echo ${DID_PFN_RSE} | cut -f2 -d' '`
DID=`echo ${DID_PFN_RSE} | cut -f1 -d' '`
echo ${DID} >> did.list
echo ${FILE} >> file.list
done
#Exit if file.list does not exist
if [ ! -f file.list ] ; then
echo "Nothing to process - exit jobscript"
exit 0
fi
now=$(date -u +"%Y%m%dT%H%M%SZ")
####Run cafmaker
echo "now run lar on these files"
cat file.list
cat did.list
# echo $FILE
# lar -c "${CAF_FCL}" "$FILE"
lar -c "${FCL_FILE}" -S file.list > caf_$now.log
larExit=$?
echo "lar exit code $larExit"
echo '=== Start last 100 lines of lar log file ==='
tail -100 caf_${now}.log
echo '=== End last 100 lines of lar log file ==='
if [ $larExit -eq 0 ] ; then
# Success !
echo "$pfn" > justin-processed-pfns.txt
jobscriptExit=0
else
# Oh :(
jobscriptExit=1
fi
# echo "$FILE" > justin-processed-pfns.txt
cat file.list > justin-processed-pfns.txt
echo "processed files"
cat justin-processed-pfns.txt
cp justin-processed-pfns.txt caf_$now.pfns
cat did.list >> caf_$now.did
cat file.list >> caf_$now.file
cp caf.root caf_fd_hd_atmo_${JUSTIN_WORKFLOW_ID}_$now.root
# cp caf.root.json caf_$now.root.json
ls -lRS
# Create compressed tar file with all log files
tar zcf `echo "$JUSTIN_JOBSUB_ID.logs.tgz" | sed 's/@/_/g'` *.log
exit $jobscriptExit