diff --git a/Common/Utils/include/CommonUtils/NameConf.h b/Common/Utils/include/CommonUtils/NameConf.h index fb10f929c9782..8d4c0a2c1c4f8 100644 --- a/Common/Utils/include/CommonUtils/NameConf.h +++ b/Common/Utils/include/CommonUtils/NameConf.h @@ -100,6 +100,9 @@ class NameConf : public o2::conf::ConfigurableParamHelper // CTF Dictionary static std::string getCTFDictFileName(); + // O2 Raw TF Filename + static std::string getRawTFFileName(uint32_t run, uint32_t orb, uint32_t id, const std::string& host, const std::string_view prefix = "o2_rawtf_dump"); + // Default CCDB server static std::string getCCDBServer(); diff --git a/Common/Utils/src/NameConf.cxx b/Common/Utils/src/NameConf.cxx index 45646284a878b..48cefacaf14c7 100644 --- a/Common/Utils/src/NameConf.cxx +++ b/Common/Utils/src/NameConf.cxx @@ -95,6 +95,11 @@ std::string NameConf::getCTFFileName(uint32_t run, uint32_t orb, uint32_t id, co return o2::utils::Str::concat_string(prefix, '_', fmt::format("run{:08d}_orbit{:010d}_tf{:010d}_{}", run, orb, id, host), ".root"); } +std::string NameConf::getRawTFFileName(uint32_t run, uint32_t orb, uint32_t id, const std::string& host, const std::string_view prefix) +{ + return o2::utils::Str::concat_string(prefix, '_', fmt::format("run{:08d}_orbit{:010d}_tf{:010d}_{}", run, orb, id, host), ".tf"); +} + std::string NameConf::getCTFDictFileName() { return o2::utils::Str::concat_string(CTFDICT, ".root"); diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h index 109eff2654466..8195b3e39c689 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h @@ -85,11 +85,13 @@ struct Data { return positive ? magnitude : -magnitude; } - // Encode from float: clamps magnitude to 15 bits, range ±255.992 + // Encode from float: truncates magnitude to 15 bits, range ±255.992 void setCMVFloat(float value) { const bool positive = (value >= 0.f); - const uint16_t magnitude = static_cast(std::abs(value) * 128.f + 0.5f) & 0x7FFF; + const uint16_t magnitude = static_cast( + std::lround(std::abs(value) * 128.f)) & + 0x7FFF; cmv = (positive ? 0x8000 : 0x0000) | magnitude; } }; @@ -119,4 +121,4 @@ struct Container { } // namespace o2::tpc::cmv -#endif \ No newline at end of file +#endif diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h index f3070d456afb1..7939387bc76a8 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h @@ -15,6 +15,7 @@ #ifndef ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H #define ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H #ifndef GPUCA_GPUCODE_DEVICE +#include #include #include // for size_t #include @@ -62,6 +63,9 @@ struct ClusterNative { static constexpr int scalePadPacked = 64; //< ~60 is needed for 0.1mm precision, but power of two avoids rounding static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size static constexpr int scaleSigmaPadPacked = 32; + static constexpr int scaleSaturatedQtot = 8; + static constexpr int maxRegularQtot = 25 * 1024; + static constexpr int maxSaturatedQtot = (USHRT_MAX - maxRegularQtot) * scaleSaturatedQtot; uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the // upper 8 bits @@ -83,7 +87,14 @@ struct ClusterNative { } GPUd() uint16_t getQmax() const { return qMax; } - GPUd() uint16_t getQtot() const { return qTot; } + GPUd() uint16_t getQtot() const + { + if (isSaturated()) [[unlikely]] { + auto sQtot = getSaturatedQtot(); + return sQtot < USHRT_MAX ? sQtot : USHRT_MAX; + } + return qTot; + } GPUd() uint8_t getFlags() const { return timeFlagsPacked >> 24; } GPUd() uint32_t getTimePacked() const { return timeFlagsPacked & 0xFFFFFF; } GPUd() void setTimePackedFlags(uint32_t timePacked, uint8_t flags) @@ -119,7 +130,13 @@ struct ClusterNative { /// Y = (12.4 - 0.5 * (66 - 1)) * 4.16mm = -83.616mm GPUd() float getPad() const { return unpackPad(padPacked); } GPUd() void setPad(float pad) { padPacked = packPad(pad); } - GPUd() float getSigmaTime() const { return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); } + GPUd() float getSigmaTime() const + { + if (isSaturated()) [[unlikely]] { + return 0; + } + return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); + } GPUd() void setSigmaTime(float sigmaTime) { uint32_t tmp = sigmaTime * scaleSigmaTimePacked + 0.5; @@ -138,6 +155,31 @@ struct ClusterNative { sigmaPadPacked = tmp; } + GPUd() bool isSaturated() const { return qTot > maxRegularQtot; } + + GPUd() void setSaturatedQtot(uint32_t qtot) + { + this->qTot = USHRT_MAX; + if (qtot < maxSaturatedQtot) { + this->qTot = ((qtot + scaleSaturatedQtot / 2) / scaleSaturatedQtot) + maxRegularQtot; + } + } + + GPUd() uint32_t getSaturatedQtot() const + { + return uint32_t(qTot - maxRegularQtot) * scaleSaturatedQtot; + } + + GPUd() void setSaturatedTailLength(uint32_t tail) + { + sigmaTimePacked = encodeTailLength(tail); + } + + GPUd() uint32_t getSaturatedTailLength() const + { + return decodeTailLength(sigmaTimePacked); + } + GPUd() bool operator<(const ClusterNative& rhs) const { if (this->getTimePacked() != rhs.getTimePacked()) { @@ -167,6 +209,93 @@ struct ClusterNative { this->qTot == rhs.qTot && this->getFlags() == rhs.getFlags(); } + + private: + static constexpr GPUd() uint32_t decodeTailLength(uint8_t code) + { + // Quantize tail length into 8bits. + // Max expected length is 1500 tbs. + // But allow outliers up to 8000 tbs. + // + // Full code layout is: + // + // | Code range | Decoded values | Step | Codes | + // | ---------: | -------------: | ----: | ----: | + // | `0..63` | `0..63` | `1` | `64` | + // | `64..95` | `64..126` | `2` | `32` | + // | `96..127` | `128..252` | `4` | `32` | + // | `128..159` | `256..504` | `8` | `32` | + // | `160..223` | `512..1520` | `16` | `64` | + // | `224..239` | `1552..2032` | `32` | `16` | + // | `240..255` | `2048..8048` | `400` | `16` | + // + + if (code < 64) { + return code; + } + + if (code < 160) { + uint32_t q = (uint32_t)code - 64u; + uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3 + uint32_t mantissa = q & 31u; // 0..31 + + return (32u + mantissa) << exponent; + } + + if (code < 224) { + return 512u + 16u * ((uint32_t)code - 160u); + } + + if (code < 240) { + return 1552u + 32u * ((uint32_t)code - 224u); + } + + return 2048u + 400u * ((uint32_t)code - 240u); + } + + static constexpr GPUd() uint8_t encodeTailLength(uint32_t value) + { + // Saturate above representable range. + if (value >= decodeTailLength(255)) [[unlikely]] { + return 255; + } + + // Binary search for the first code whose decoded value >= value. + uint8_t lo = 0; + uint8_t hi = 255; + + while (lo < hi) { + uint8_t mid = lo + ((hi - lo) >> 1); + uint32_t decoded = decodeTailLength(mid); + + if (decoded < value) { + lo = mid + 1; + } else { + hi = mid; + } + } + + // lo is now the first code with decoded >= value. + if (lo == 0) [[unlikely]] { + return 0; + } + + uint8_t above_code = lo; + uint8_t below_code = lo - 1; + + uint32_t above_value = decodeTailLength(above_code); + uint32_t below_value = decodeTailLength(below_code); + + uint32_t above_error = above_value - value; + uint32_t below_error = value - below_value; + + // Tie-break downward. + if (below_error <= above_error) { + return below_code; + } else { + return above_code; + } + } }; // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to diff --git a/Detectors/CTF/workflow/src/CTFWriterSpec.cxx b/Detectors/CTF/workflow/src/CTFWriterSpec.cxx index 5d6db7d613674..f175bf4c2e5d3 100644 --- a/Detectors/CTF/workflow/src/CTFWriterSpec.cxx +++ b/Detectors/CTF/workflow/src/CTFWriterSpec.cxx @@ -310,7 +310,7 @@ size_t CTFWriterSpec::processDet(o2::framework::ProcessingContext& pc, DetID det if (det == DetID::ITS) { nLayers = mInput.doITSStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } else if (det == DetID::MFT) { - nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; + nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) { auto binding = getBinding(det.getName(), iLayer); @@ -431,7 +431,7 @@ size_t CTFWriterSpec::estimateCTFSize(ProcessingContext& pc) if (det == DetID::ITS) { nLayers = mInput.doITSStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } else if (det == DetID::MFT) { - nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; + nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) { auto binding = getBinding(det.getName(), iLayer); @@ -818,7 +818,7 @@ DataProcessorSpec getCTFWriterSpec(const o2::ctf::CTFWriterInp& inp) if (det == DetID::ITS) { nLayers = inp.doITSStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } else if (det == DetID::MFT) { - nLayers = inp.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; + nLayers = inp.doMFTStaggering ? o2::itsmft::DPLAlpideParam::getNLayers() : 1; } for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) { inputs.emplace_back(CTFWriterSpec::getBinding(det.getName(), iLayer), det.getDataOrigin(), "CTFDATA", iLayer, Lifetime::Timeframe); diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h index 4f9bc90c1c758..76ac8878562de 100644 --- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h +++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h @@ -356,8 +356,10 @@ void CTFCoder::decompress(const CompressedClusters& compCl, VROF& rofRecVec, assert(chipCount == compCl.header.nChips); if (clCount != compCl.header.nClusters) { - LOG(error) << "expected " << compCl.header.nClusters << " but counted " << clCount << " in ROFRecords"; - throw std::runtime_error("mismatch between expected and counter number of clusters"); + LOGP(error, "expected {} but counted {} clusters in {} ROFRecords", compCl.header.nClusters, clCount, compCl.header.nROFs); + if (clCount > compCl.header.nClusters) { + throw std::runtime_error("mismatch between expected and counter number of clusters"); + } } } @@ -456,8 +458,10 @@ void CTFCoder::decompress(const CompressedClusters& compCl, VROF& rofRecVec, assert(chipCount == compCl.header.nChips); if (clCount != compCl.header.nClusters) { - LOG(error) << "expected " << compCl.header.nClusters << " but counted " << clCount << " in ROFRecords"; - throw std::runtime_error("mismatch between expected and counter number of clusters"); + LOGP(error, "expected {} but counted {} clusters in {} ROFRecords", compCl.header.nClusters, clCount, compCl.header.nROFs); + if (clCount > compCl.header.nClusters) { + throw std::runtime_error("mismatch between expected and counter number of clusters"); + } } } diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h index eee9bdbb6a4dc..63d37a25ffbc9 100644 --- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h +++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h @@ -73,16 +73,15 @@ class ChipMappingMFT ///< total number of RUs static constexpr Int_t getNRUs() { return NRUs; } - ///< get FEEId of the RU (software id of the RU), read via given link + ///< get software id of the RU, from first 8 bits of FEEID (HW id of RU) uint8_t FEEId2RUSW(uint16_t hw) const { return mFEEId2RUSW[hw & 0xff]; } - ///< get HW id of the RU (software id of the RU) + ///< get FEEID, from software id of the RU and link number uint16_t RUSW2FEEId(uint16_t sw, uint16_t linkID = 0) const { return ((linkID << 8) + mRUInfo[sw].idHW); } ///< compose FEEid for given stave (ru) relative to layer and link, see documentation in the constructor uint16_t composeFEEId(uint16_t layer, uint16_t ruOnLayer, uint16_t link) const { - // only one link is used // ruOnLayer is 0, 1, 2, 3 for half = 0 // 4, 5, 6, 7 1 auto dhalf = std::div(ruOnLayer, 4); @@ -114,7 +113,7 @@ class ChipMappingMFT face = (feeID >> 2) & 0x1; } - ///< get info on sw RU + ///< get info on sw RU corresponding to given FEEID const RUInfo* getRUInfoFEEId(Int_t feeID) const { return &mRUInfo[FEEId2RUSW(feeID)]; } ///< get number of chips served by single cable on given RU type @@ -123,13 +122,13 @@ class ChipMappingMFT return ((0x1 << 7) + (cableHW & 0x1f)); } - ///< convert HW cable ID to its position on the ActiveLanes word in the GBT.header for given RU type + ///< convert HW cable ID to its position on the ActiveLanes word in the GBT.header for given RU type (note: this position is equal to the HW cable ID) uint8_t cableHW2Pos(uint8_t ruType, uint8_t hwid) const { return mCableHW2Pos[ruType][hwid]; } ///< convert HW cable ID to SW ID for give RU type uint8_t cableHW2SW(uint8_t ruType, uint8_t hwid) const { return hwid < mCableHW2SW[ruType].size() ? mCableHW2SW[ruType][hwid] : 0xff; } - ///< convert cable iterator ID to its position on the ActiveLanes word in the GBT.header for given RU type + ///< convert cable iterator ID (i.e. chipOnModule) to its position on the ActiveLanes word in the GBT.header for given RU type (note: this position is equal to the HW cable ID) uint8_t cablePos(uint8_t ruType, uint8_t id) const { return mCablePos[ruType][id]; } ///< get chipID on module from chip global SW ID, cable SW ID and stave (RU) info @@ -139,7 +138,7 @@ class ChipMappingMFT return 0xffff; } - ///< get chip global SW ID from chipID on module, cable SW ID and stave (RU) info + ///< get chip global SW ID from cable HW ID and stave (RU) info (note: chOnModuleHW is unused) uint16_t getGlobalChipID(uint16_t chOnModuleHW, int cableHW, const RUInfo& ruInfo) const { auto chipOnRU = cableHW2SW(ruInfo.ruType, cableHW); @@ -393,11 +392,11 @@ class ChipMappingMFT private: Int_t invalid() const; - static constexpr Int_t NRUs = NLayers * NZonesPerLayer; + static constexpr Int_t NRUs = NLayers * NZonesPerLayer; // 10 layers * 8 zones per layer static constexpr Int_t NModules = 280; static constexpr Int_t NChipsInfo = 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 16 + 17 + 18 + 19 + 14; static constexpr Int_t NChipsPerCable = 1; - static constexpr Int_t NLinks = 1; + static constexpr Int_t NLinks = 3; static constexpr Int_t NConnectors = 5; static constexpr Int_t NMaxChipsPerLadder = 5; static constexpr Int_t NRUCables = 25; diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h index 0bdbb701a9356..dd3052e2cc5bd 100644 --- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h +++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h @@ -236,6 +236,8 @@ class Clusterer ///< load the dictionary of cluster topologies void loadDictionary(const std::string& fileName) { mPattIdConverter.loadDictionary(fileName); } void setDictionary(const TopologyDictionary* dict) { mPattIdConverter.setDictionary(dict); } + const TopologyDictionary& getDictionary() const { return mPattIdConverter.getDictionary(); } + auto& getPattIdConverter() const { return mPattIdConverter; } TStopwatch& getTimer() { return mTimer; } // cannot be const TStopwatch& getTimerMerge() { return mTimerMerge; } // cannot be const diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h index 3537a1f408886..4f84a838efc70 100644 --- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h +++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h @@ -43,7 +43,7 @@ class LookUp bool isGroup(int id) const { return mDictionary.isGroup(id); } int size() const { return mDictionary.getSize(); } auto getPattern(int id) const { return mDictionary.getPattern(id); } - auto getDictionaty() const { return mDictionary; } + auto& getDictionary() const { return mDictionary; } private: TopologyDictionary mDictionary; diff --git a/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx b/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx index de2358469e894..b79c529bef803 100644 --- a/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx +++ b/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx @@ -1624,7 +1624,7 @@ ChipMappingMFT::ChipMappingMFT() { // init chips info - uint32_t maxRUHW = composeFEEId(NLayers - 1, NZonesPerLayer - 1, NLinks - 1); // Max possible FEE ID + uint32_t maxRUHW = composeFEEId(NLayers - 1, NZonesPerLayer - 1, 0); // Max possible RU HW ID (first 8 bits of max FEEID, while link stored in 9th and 10th bit of FEEID) mFEEId2RUSW.resize(maxRUHW + 1, 0xff); int curLayer = -1, curZone = -1, curHalf = -1; @@ -1698,8 +1698,8 @@ ChipMappingMFT::ChipMappingMFT() auto& ruInfo = mRUInfo[ctrRU]; ruInfo.idSW = ctrRU++; - // map FEEIds (RU read out by at most 3 GBT links) to SW ID - ruInfo.idHW = composeFEEId(iLayer, iZone, 0); // FEEId for link 0 + // map RU HW ID (RU read out by at most 3 GBT links) to SW ID + ruInfo.idHW = composeFEEId(iLayer, iZone, 0); // RU HW ID (first 8 bits of FEEID) mFEEId2RUSW[ruInfo.idHW] = ruInfo.idSW; ruInfo.layer = iLayer; ruInfo.ruType = ZoneRUType[iZone % 4][iLayer / 2]; diff --git a/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx b/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx index 7158551e02e20..874c432b097ce 100644 --- a/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx +++ b/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx @@ -40,8 +40,7 @@ RawPixelDecoder::RawPixelDecoder() mTimerDecode.Stop(); mTimerFetchData.Stop(); mSelfName = o2::utils::Str::concat_string(Mapping::getName(), "Decoder"); - DPLRawParser<>::setCheckIncompleteHBF(false); // Disable incomplete HBF checking, see ErrPacketCounterJump check in GBTLink.cxx - mInputFilter = {InputSpec{"filter", ConcreteDataTypeMatcher{Mapping::getOrigin(), o2::header::gDataDescriptionRawData}}}; // by default take all raw data + DPLRawParser<>::setCheckIncompleteHBF(false); // Disable incomplete HBF checking, see ErrPacketCounterJump check in GBTLink.cxx } ///______________________________________________________________ @@ -235,13 +234,9 @@ void RawPixelDecoder::setupLinks(InputRecord& inputs) auto nLinks = mGBTLinks.size(); auto origin = (mUserDataOrigin == o2::header::gDataOriginInvalid) ? mMAP.getOrigin() : mUserDataOrigin; auto datadesc = (mUserDataDescription == o2::header::gDataDescriptionInvalid) ? o2::header::gDataDescriptionRawData : mUserDataDescription; - if (mUserDataDescription != o2::header::gDataDescriptionInvalid) { // overwrite data filter origin&descriptions with user defined ones if possible - for (auto& filt : mInputFilter) { - if (std::holds_alternative(filt.matcher)) { - std::get(filt.matcher).origin = origin; - std::get(filt.matcher).description = datadesc; - } - } + + if (mInputFilter.empty()) { // if no filter set externally, create a global one from imposed or default origin / description + mInputFilter.emplace_back("filter", ConcreteDataTypeMatcher{origin, datadesc}); } // if we see requested data type input with 0xDEADBEEF subspec and 0 payload this means that the "delayed message" diff --git a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h index 29b9f75bcbc4e..8c16759e16726 100644 --- a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h +++ b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h @@ -75,7 +75,9 @@ class STFDecoder : public Task void finalize(); void reset(); std::unique_ptr setupClusterer(const std::string& dictName); - void ensureContinuousROF(const std::vector& in, std::vector& out, int lr, int nROFsTF, const char* name); + bool ensureContinuousROF(const std::vector& in, std::vector& out, int lr, int nROFsTF, const char* name); + void rectifyDigits(std::vector& rofVec, std::vector& digVec); + void rectifyClusters(std::vector& rofVec, std::vector& clusVec, std::vector& pattVec); TStopwatch mTimer; bool mDoClusters = false; @@ -90,6 +92,8 @@ class STFDecoder : public Task bool mUseClusterDictionary = true; bool mVerifyDecoder = false; bool mDumpFrom1stPipeline = false; + bool mRunEnsureContinuousROF = true; + bool mDisableRectifyContinuousROF = false; int mDumpOnError = 0; int mNThreads = 1; int mVerbosity = 0; diff --git a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx index 8fb6ba4e6aa97..79372160c6ade 100644 --- a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx +++ b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx @@ -63,6 +63,7 @@ STFDecoder::STFDecoder(const STFDecoderInp& inp, std::shared_ptr void STFDecoder::init(InitContext& ic) { + int lane = ic.services().get().inputTimesliceId; o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest); try { auto v0 = o2::utils::Str::tokenize(mInputSpec, ':'); @@ -72,11 +73,22 @@ void STFDecoder::init(InitContext& ic) header::DataDescription dataDesc; dataOrig.runtimeInit(v1[0].c_str()); dataDesc.runtimeInit(v2[0].c_str()); + Mapping map; for (int iLayer{0}; iLayer < mLayers; ++iLayer) { auto& dec = mDecoder.emplace_back(std::make_unique>()); dec->setUserDataOrigin(dataOrig); dec->setUserDataDescription(dataDesc); dec->init(); // is this no-op? + + if (mDoStaggering) { + std::vector filter; + for (const auto feeID : map.getLayer2FEEIDs(iLayer)) { + filter.emplace_back("filter", ConcreteDataMatcher{dataOrig, dataDesc, (o2::header::DataHeader::SubSpecificationType)feeID}); + } + dec->setInputFilter(filter); + } else { + dec->setInputFilter({InputSpec{"filter", ConcreteDataTypeMatcher(dataOrig, dataDesc)}}); + } } } catch (const std::exception& e) { LOG(error) << "exception was thrown in decoder creation: " << e.what(); @@ -126,22 +138,36 @@ void STFDecoder::init(InitContext& ic) LOG(error) << "non-std::exception was thrown in decoder configuration"; throw; } + if (mDoCalibData) { + std::string warnMsg; + bool enforceEnsureContinuousROFinCalib = ic.options().get("enforce-continuous-rof-with-calib"); + if (ic.options().get("enforce-continuous-rof-with-calib")) { + warnMsg = "Calibration data requested but the ensureContinuousROF is explicitly enforced!"; + } else { + mRunEnsureContinuousROF = false; + warnMsg = "Calibration data requested, disabling ensureContinuousROF!"; + } + if (lane == 0) { + LOGP(alarm, "{}", warnMsg); + } else { + LOGP(info, "{}", warnMsg); + } + } + + mDisableRectifyContinuousROF = ic.options().get("disable-rectify-continuous-rof"); + if (mDisableRectifyContinuousROF && mRunEnsureContinuousROF) { + std::string warnMsg = "Rectification of clusters/digits is explicitly disabled after the ensureContinuousROF!"; + if (lane == 0) { + LOGP(alarm, "{}", warnMsg); + } else { + LOGP(info, "{}", warnMsg); + } + } if (mDoClusters) { mClusterer = std::make_unique(); mClusterer->setNChips(Mapping::getNChips()); } - - if (mDoStaggering) { - Mapping map; - for (uint32_t iLayer{0}; iLayer < mLayers; ++iLayer) { - std::vector filter; - for (const auto feeID : map.getLayer2FEEIDs(iLayer)) { - filter.emplace_back("filter", ConcreteDataMatcher{Mapping::getOrigin(), o2::header::gDataDescriptionRawData, (o2::header::DataHeader::SubSpecificationType)feeID}); - } - mDecoder[iLayer]->setInputFilter(filter); - } - } } ///_______________________________________ @@ -258,21 +284,29 @@ void STFDecoder::run(ProcessingContext& pc) } } if (mDoDigits) { + std::vector expDigRofVec; + if (ensureContinuousROF(digROFVec, expDigRofVec, iLayer, nROFsTF, "digits") && !mDisableRectifyContinuousROF) { + auto oldNDig = digVec.size(); + rectifyDigits(expDigRofVec, digVec); + LOGP(warn, "Rectified {} digits out of original {} on layer {} following ensureContinuousROF", digVec.size(), oldNDig, iLayer); + } pc.outputs().snapshot(Output{orig, "DIGITS", iLayer}, digVec); - std::vector expDigRofVec(nROFsTF); - ensureContinuousROF(digROFVec, expDigRofVec, iLayer, nROFsTF, "digits"); - pc.outputs().snapshot(Output{orig, "DIGITSROF", iLayer}, digROFVec); + pc.outputs().snapshot(Output{orig, "DIGITSROF", iLayer}, expDigRofVec); mEstNDig[iLayer] = std::max(mEstNDig[iLayer], size_t(digVec.size() * 1.2)); if (mDoCalibData) { pc.outputs().snapshot(Output{orig, "GBTCALIB", iLayer}, calVec); mEstNCalib[iLayer] = std::max(mEstNCalib[iLayer], size_t(calVec.size() * 1.2)); } - LOG(debug) << mSelfName << " Decoded " << digVec.size() << " Digits in " << digROFVec.size() << " ROFs" << ((mDoStaggering) ? std::format(" on layer {}", iLayer) : ""); + LOG(debug) << mSelfName << " Decoded " << digVec.size() << " Digits in " << expDigRofVec.size() << " ROFs" << ((mDoStaggering) ? std::format(" on layer {}", iLayer) : ""); } if (mDoClusters) { // we are not obliged to create vectors which are not requested, but other devices might not know the options of this one - std::vector expClusRofVec(nROFsTF); - ensureContinuousROF(clusROFVec, expClusRofVec, iLayer, nROFsTF, "clusters"); + std::vector expClusRofVec; + if (ensureContinuousROF(clusROFVec, expClusRofVec, iLayer, nROFsTF, "clusters") && !mDisableRectifyContinuousROF) { + auto oldNClus = clusCompVec.size(), oldNPatt = clusPattVec.size(); + rectifyClusters(expClusRofVec, clusCompVec, clusPattVec); + LOGP(warn, "Rectified {} clusters and {} patterns out of original {} and {} on layer {} following ensureContinuousROF", clusCompVec.size(), clusPattVec.size(), oldNClus, oldNPatt, iLayer); + } pc.outputs().snapshot(Output{orig, "COMPCLUSTERS", iLayer}, clusCompVec); pc.outputs().snapshot(Output{orig, "PATTERNS", iLayer}, clusPattVec); pc.outputs().snapshot(Output{orig, "CLUSTERSROF", iLayer}, expClusRofVec); @@ -416,8 +450,12 @@ void STFDecoder::reset() ///_______________________________________ template -void STFDecoder::ensureContinuousROF(const std::vector& rofVec, std::vector& expROFVec, int lr, int nROFsTF, const char* name) +bool STFDecoder::ensureContinuousROF(const std::vector& rofVec, std::vector& expROFVec, int lr, int nROFsTF, const char* name) { + if (!mRunEnsureContinuousROF) { + expROFVec = rofVec; + return false; + } const auto& par = AlpideParam::Instance(); // ensure that the rof output is continuous // we will preserve the digits/clusters as they are but the stray ROFs will be removed (leaving their clusters/digits unaddressed). @@ -465,13 +503,82 @@ void STFDecoder::ensureContinuousROF(const std::vector& rofV } } } - int prevFirst{0}; + int prevLast{0}; + bool reReference = false; // in case a non-last ROF with non-0 entries is removed, ROF references need to be shifted and clusters/digits rewritten for (auto& rof : expROFVec) { if (rof.getFirstEntry() < 0) { - rof.setFirstEntry(prevFirst); + rof.setFirstEntry(prevLast); + } else if (rof.getFirstEntry() != prevLast) { + reReference = true; // there is jump + } + prevLast = rof.getFirstEntry() + rof.getNEntries(); + } + return reReference; +} + +///_______________________________________ +template +void STFDecoder::rectifyDigits(std::vector& rofVec, std::vector& digVec) +{ + // following ensureContinuousROF call some old ROFs might have been dropped, need to rebuild digits vector and rereference ROF + std::vector digVecTmp; + digVecTmp.reserve(digVec.size()); + auto beg0 = digVec.begin(); + for (auto& rof : rofVec) { + int firstEntry = digVecTmp.size(); + if (rof.getNEntries()) { + auto beg = beg0 + rof.getFirstEntry(), end = beg + rof.getNEntries(); + std::copy(beg, end, std::back_inserter(digVecTmp)); + } + rof.setFirstEntry(firstEntry); + } + digVec.swap(digVecTmp); +} + +///_______________________________________ +template +void STFDecoder::rectifyClusters(std::vector& rofVec, std::vector& clusVec, std::vector& pattVec) +{ + // following ensureContinuousROF call some old ROFs might have been dropped, need to rebuild clusters and patterns vectors and rereference ROF + std::vector clusVecTmp; + clusVecTmp.reserve(clusVec.size()); + std::vector pattVecTmp; + pattVecTmp.reserve(pattVec.size()); + const auto& dict = mClusterer->getDictionary(); + auto begCl0 = clusVec.begin(), begClForPatt = begCl0; + auto pattIt = pattVec.begin(); + + auto skipToLastPattern = [&begClForPatt, &pattIt, &dict](const decltype(begCl0) tgt) { + while (begClForPatt < tgt) { // iterate clusters skipping their patterns until we reach targed cluster + const auto& clp = *begClForPatt; + auto pattID = clp.getPatternID(); + if (pattID == itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) { + ClusterPattern::skipPattern(pattIt); + } + begClForPatt++; + } + }; + + for (auto& rof : rofVec) { + int firstEntry = clusVecTmp.size(); + if (rof.getNEntries()) { + auto begClROF = begCl0 + rof.getFirstEntry(), endClROF = begClROF + rof.getNEntries(); // clusters to copy start/end here + if (mDoPatterns) { + if (begClForPatt > begClROF) { // normally should no happen unless original ROFs were not ordered + begClForPatt = begCl0; // start from the beginning + } + skipToLastPattern(begClROF); // iterate clusters skipping their patterns until we reach the 1st cluster to be copied + auto begPattToCopy = pattIt; // the 1st pattern corresponding to the needed ROF + skipToLastPattern(endClROF); // iterate clusters skipping their patterns until we reach the last cluster to be copied + std::copy(begPattToCopy, pattIt, std::back_inserter(pattVecTmp)); + } + std::copy(begClROF, endClROF, std::back_inserter(clusVecTmp)); } - prevFirst = rof.getFirstEntry(); + // copy patterns corresponding to this ROF + rof.setFirstEntry(firstEntry); } + clusVec.swap(clusVecTmp); + pattVec.swap(pattVecTmp); } ///_______________________________________ @@ -544,6 +651,8 @@ DataProcessorSpec getSTFDecoderSpec(const STFDecoderInp& inp) {"unmute-extra-lanes", VariantType::Bool, false, {"allow extra lanes to be as verbose as 1st one"}}, {"allow-empty-rofs", VariantType::Bool, false, {"record ROFs w/o any hit"}}, {"ignore-noise-map", VariantType::Bool, false, {"do not mask pixels flagged in the noise map"}}, + {"enforce-continuous-rof-with-calib", VariantType::Bool, false, {"enforce ensureContinuousROF call even when calibration data is requested (not recommended)"}}, + {"disable-rectify-continuous-rof", VariantType::Bool, false, {"do not rectify clusters and digits after ensureContinuousROF (not recommended)"}}, {"accept-rof-rampup-data", VariantType::Bool, false, {"do not discard data during ROF ramp up"}}, {"rof-length-error-freq", VariantType::Float, 60.f, {"do not report ROF length error more frequently than this value, disable if negative"}}, {"ignore-cluster-dictionary", VariantType::Bool, false, {"do not use cluster dictionary, always store explicit patterns"}}}}; diff --git a/Detectors/Raw/README.md b/Detectors/Raw/README.md index 557245030b980..1fece239723ec 100644 --- a/Detectors/Raw/README.md +++ b/Detectors/Raw/README.md @@ -548,6 +548,87 @@ list of detectors for which raw outputs are discarded. The raw data will be propagated (if present) only if the detector is selected in `--onlyDet` and `NOT` selected in `--non-raw-only-det`. The non-raw data will be propagated (if defined for the given detector and present in the file) only if the detector is selected in `--onlyDet` and `NOT` selected in `--raw-only-det`. +## Raw TF (DD format) dumping workflow + +Use `o2-raw-tf-dump-workflow` to dump raw TF data in DD format. The options are: +``` +--dataspec arg (=tst:TST/A) +``` +Optional selection string for the data to be dumped, e.g. the same string supplied to the input raw proxy +``` +--triggerspec arg (="") +``` +Selection string for the external trigger to dump particular TF. Must be contained in the `--dataspec`. The workflow will loop over all available trigger inputs, interpreting them as span: any `span[0]==true` will trigger writing process (modulo throttling). +``` +--include-deadbeef (false) +``` +Include data with DPL-generated 0xdeadbeef subspecs (for data missing in the original TF). +``` +--exclude-trigger-specs (="") + +``` +Ignore trigger seen in these inputs of triggerspec (e.g. to suppress noisy trigger inputs) +``` +--max-dump-rate arg (=0) +``` +Fraction in (`%`) of TFs to dump. W/o external trigger: random(>0) or periodic(<0) rejection. With external trigger: throttle dumping to have the lowest estimated acceptance rate compatible with this rate. +``` +--rate-est-conf-limit arg (=0.05) +``` +Quantile for the lowest rate estimate confidence limit +``` +--max-warn arg (=5) +``` +If throttling, max allowed warnings +``` +--mute-warn-period arg (=100) +``` +Mute warnings about throttling for this number of TFs +``` +--output-dir arg (=none) +``` +Dumped TFs output directory, must exist. `none` means current dir., `/dev/null`: ignort writing (dry run) +``` +--meta-output-dir arg (=/dev/null) +``` +TF metadata output directory, must exist (if not /dev/null, in which case the metadata will not be created) +``` +--md5-for-meta (false) +``` +Fill CTF file MD5 sum in the metadata file +``` +--min-file-size arg (=0) +``` +Accumulate TFs until given file size reached +``` +--max-file-size arg (=0) +``` +If > 0, try to avoid exceeding given file size, also used for space check +``` +--max-tf-per-file arg (=0) +``` +If > 0, avoid storing more than requested CTFs per file +``` +--require-free-disk arg (=0) +``` +Pause writing op. if available disk space is below this margin, in bytes if >0, as a fraction of total if <0 +``` +--wait-for-free-disk arg (=10) +``` +If paused due to the low disk space, recheck after this time (in s) +``` +--max-wait-for-free-disk arg (=60) +``` +Produce fatal if paused due to the low disk space for more than time in seconds. +``` +--verbosity-level (=0) +``` +Verbose mode: 1: decision on every TF, 2: details of saved TF, 3: more details. +``` +--ignore-partition-run-dir +``` +Do not creare partition-run directory in output-dir + ## TF rate limiting To apply TF rate limiting (i.e. make sure that no more than N TFs are in processing) provide `--timeframes-rate-limit --timeframes-rate-limit-ipcid ` diff --git a/Detectors/Raw/TFReaderDD/CMakeLists.txt b/Detectors/Raw/TFReaderDD/CMakeLists.txt index 12ecc9ca8795d..f87d1b5a7704e 100644 --- a/Detectors/Raw/TFReaderDD/CMakeLists.txt +++ b/Detectors/Raw/TFReaderDD/CMakeLists.txt @@ -26,3 +26,10 @@ o2_add_executable(tf-reader-workflow SOURCES src/TFReaderSpec.cxx src/tf-reader-workflow.cxx PUBLIC_LINK_LIBRARIES O2::TFReaderDD) + + +o2_add_executable(tf-dump-workflow + COMPONENT_NAME raw + SOURCES src/RawTFDumpSpec.cxx + src/tf-data-dump-workflow.cxx + PUBLIC_LINK_LIBRARIES O2::TFReaderDD) diff --git a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h index 340027642b74c..eeabf8e8d4117 100644 --- a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h +++ b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h @@ -21,6 +21,8 @@ #include #include +#include "Framework/DataSpecUtils.h" +#include "Framework/OutputSpec.h" #include "Framework/Logger.h" namespace o2 @@ -151,13 +153,13 @@ struct SubTimeFrameFileMeta { /// std::uint64_t mWriteTimeMs; - auto getTimePoint() + auto getTimePoint() const { using namespace std::chrono; return time_point{milliseconds{mWriteTimeMs}}; } - std::string getTimeString() + std::string getTimeString() const { using namespace std::chrono; std::time_t lTime = system_clock::to_time_t(getTimePoint()); @@ -167,6 +169,11 @@ struct SubTimeFrameFileMeta { return lTimeStream.str(); } + const std::string info() const + { + return fmt::format("Size in file: {} Time: {} Version: {}", mStfSizeInFile, getTimeString(), mStfFileVersion); + } + SubTimeFrameFileMeta(const std::uint64_t pStfSize) : SubTimeFrameFileMeta() { @@ -220,6 +227,11 @@ struct SubTimeFrameFileDataIndex { static_assert(sizeof(DataIndexElem) == 48, "DataIndexElem changed -> Binary compatibility is lost!"); } + + const std::string info() const + { + return fmt::format("DH: {} Cnt:{} Size:{} Offset:{}", o2::framework::DataSpecUtils::describe(o2::framework::OutputSpec{mDataOrigin, mDataDescription, mSubSpecification}), mDataBlockCnt, mSize, mOffset); + } }; SubTimeFrameFileDataIndex() = default; @@ -240,6 +252,8 @@ struct SubTimeFrameFileDataIndex { return sizeof(o2::header::DataHeader) + (sizeof(DataIndexElem) * mDataIndex.size()); } + const std::vector& getDataIndex() const { return mDataIndex; } + friend std::ostream& operator<<(std::ostream& pStream, const SubTimeFrameFileDataIndex& pIndex); private: diff --git a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h index 3b926e0a79206..2b7d2b7ab8e74 100644 --- a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h +++ b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h @@ -46,11 +46,11 @@ class SubTimeFrameFileReader public: SubTimeFrameFileReader() = delete; - SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask); + SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask, int verb, bool sup0xccdb, bool repaireHeaders, bool rejectDistSTF); ~SubTimeFrameFileReader(); /// Read a single TF from the file - std::unique_ptr read(fair::mq::Device* device, const std::vector& outputRoutes, const std::string& rawChannel, size_t slice, bool sup0xccdb, int verbosity); + std::unique_ptr read(fair::mq::Device* device, const std::vector& outputRoutes, const std::string& rawChannel, size_t slice); /// Tell the current position of the file inline std::uint64_t position() const { return mFileMapOffset; } @@ -76,6 +76,13 @@ class SubTimeFrameFileReader std::uint64_t mFileMapOffset = 0; std::uint64_t mFileSize = 0; + int mVerbosity = 0; + bool mSup0xccdb = true; + bool mRepaireHeaders = true; + bool mRejectDistSTF = true; + + const std::string describeHeader(const o2::header::DataHeader& hd, bool full = false) const; + // helper to make sure written chunks are buffered, only allow pointers template ::value>> diff --git a/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx new file mode 100644 index 0000000000000..8d9986eddef48 --- /dev/null +++ b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx @@ -0,0 +1,618 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "Framework/WorkflowSpec.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/RawDeviceService.h" +#include "Framework/DataProcessingHelpers.h" +#include "Framework/InputRecordWalker.h" +#include "Framework/Task.h" +#include "Framework/DataTakingContext.h" +#include "Framework/TimingInfo.h" +#include "DataFormatsParameters/GRPECSObject.h" +#include "DetectorsCommonDataFormats/FileMetaData.h" +#include "DetectorsRaw/RDHUtils.h" +#include "RawTFDumpSpec.h" +#include "TFReaderDD/SubTimeFrameFile.h" +#include "CommonUtils/NameConf.h" +#include "CommonUtils/FileSystemUtils.h" +#include "CommonUtils/StringUtils.h" +#include "Algorithm/RangeTokenizer.h" +#include +#include +#include +#include +#include + +namespace o2::rawdd +{ +namespace o2h = o2::header; +using namespace o2::framework; +using DataHeader = o2::header::DataHeader; +using DetID = o2::detectors::DetID; +using ios = std::ios_base; + +class RawTFDump : public Task +{ + public: + static constexpr o2h::DataDescription DESCRaw{"RAWDATA"}, DESCCRaw{"CRAWDATA"}; + + RawTFDump(const std::string& trigger); + void init(InitContext& ic) final; + void run(ProcessingContext& pc) final; + void endOfStream(EndOfStreamContext& ec) final; + + private: + bool triggerTF(ProcessingContext& pc); + void updateTimeDependentParams(ProcessingContext& pc); + void prepareTFForWriting(ProcessingContext& pc); + size_t getTFSizeInFile() const; + size_t getCurrentFileSize(); + void prepareTFFile(); + void closeTFFile(); + bool checkFreeSpace(ProcessingContext& pc); + std::string reportRates() const; + + SubTimeFrameFileDataIndex mTFDataIndex; + std::vector> mTFData; + std::map> mDataMap; + std::vector mFilter{}; + std::vector mTriggerFilter{}; + std::vector mExclTriggerFilter{}; + + size_t mTFSize = 0; + size_t mMinFileSize = 0; // if > 0, accumulate TFs in the same file until the total size exceeds this minimum + size_t mMaxFileSize = 0; // if > MinSize, and accumulated size will exceed this value, stop accumulation (even if mMinFileSize is not reached) + + int mNTFsSeen = 0; // total number of TFs seen + int mNTFsExtTrig = 0; // total nunber of TFs externally triggered + int mNTFsAccepted = 0; // total number of TFs written + int mNTFsInFile = 0; // total number of TFs accumulated in the current file + int mNTFFiles = 0; // total number of TF files written + int mLastWarned = 0; // TF when last warned about throttling + int mMaxTFPerFile = 0; // max TFs per files to store + int mNWarnThrottle = 0; // number of times we warned about the throttling + int mMaxWarnThrottle = 0; // max allowed warnings about the throttling + int mWarnThrottleTF = 0; // min period (in TFs) between the warnings about the throttling + int mWaitDiskFull = 0; // if mCheckDiskFull triggers, pause for this amount of ms before new attempt + int mWaitDiskFullMax = -1; // produce fatal mCheckDiskFull block the workflow for more than this time (in ms) + float mCheckDiskFull = 0.; // wait for if available abs. disk space is < mCheckDiskFull (if >0) or if its fraction is < -mCheckDiskFull (if <0) + float mMaxAccRate = 0.f; // max acceptance rate + float mConfLim = 0.05f; // confidence limit for rate esimate (lower quantile) + float mRateEstAccLow = 0.f; // lower limit on accepted TFs rate + float mRateEstAccUpp = 0.f; // upper limit on accepted TFs rate + float mRateEstTrgLow = 0.f; // lower limit on triggered TFs rate + float mRateEstTrgUpp = 0.f; // upper limit on triggered TFs rate + + bool mFillMD5 = false; + bool mWriteTF = true; // for dry run + bool mStoreMetaFile = false; + bool mCreateRunEnvDir = true; + bool mAcceptCurrentTF = false; + bool mRejectDEADBEEF = false; + bool mRejectDistSTF = true; + int mVerbose = 0; + std::vector mTFOrbits{}; // 1st orbits of TF accumulated in current file + o2::framework::DataTakingContext mDataTakingContext{}; + o2::framework::TimingInfo mTimingInfo{}; + + std::string mTrigger{}; // external trigger input + std::string mExclTriggerSpecs{}; // trigger specs to ignore + std::string mHostName{}; + std::string mTFDir{}; + std::string mTFMetaFileDir = "/dev/null"; + std::string mCurrentTFFileName{}; + std::string mCurrentTFFileNameFull{}; + std::string mCurrentTFFileNameFullTmp{}; + std::string mMetaDataType{}; + + static constexpr size_t MiB = 1ul << 20; + static constexpr std::streamsize sBuffSize = MiB; // 1 MiB + static constexpr std::streamsize sChunkSize = 512; + static const std::string TMPFileEnding; + std::unique_ptr mFileBuf; + std::ofstream mFile; + std::uniform_real_distribution mUniformDist{0.0, 100.0}; + std::default_random_engine mRGen; + + // helper to make sure the written blocks are buffered + template < + typename pointer, + typename std::enable_if< + std::is_pointer::value && // pointers only + (std::is_void>::value || // void* or standard layout! + std::is_standard_layout>::value)>::type* = nullptr> + void buffered_write(const pointer p, std::streamsize pCount) + { + // make sure we're not doing a short write + assert((pCount % sizeof(std::conditional_t>::value, + char, std::remove_pointer_t>) == + 0) && + "Performing short write?"); + + const char* lPtr = reinterpret_cast(p); + // avoid the optimization if the write is large enough + if (pCount >= sBuffSize) { + mFile.write(lPtr, pCount); + } else { + // split the write to smaller chunks + while (pCount > 0) { + const auto lToWrite = std::min(pCount, sChunkSize); + assert(lToWrite > 0 && lToWrite <= sChunkSize && lToWrite <= pCount); + + mFile.write(lPtr, lToWrite); + lPtr += lToWrite; + pCount -= lToWrite; + } + } + } +}; + +const std::string RawTFDump::TMPFileEnding{".part"}; + +//________________________________________ +RawTFDump::RawTFDump(const std::string& trigger) : mTrigger{trigger} +{ + mTriggerFilter = select(trigger.c_str()); + mFileBuf = std::make_unique(sBuffSize); + mFile.rdbuf()->pubsetbuf(mFileBuf.get(), sBuffSize); + mFile.clear(); + mFile.exceptions(std::fstream::failbit | std::fstream::badbit); +} + +//________________________________________ +void RawTFDump::init(InitContext& ic) +{ + mRGen = std::default_random_engine(getpid()); + mTFMetaFileDir = ic.options().get("meta-output-dir"); + if (mTFMetaFileDir != "/dev/null") { + mTFMetaFileDir = o2::utils::Str::rectifyDirectory(mTFMetaFileDir); + mStoreMetaFile = true; + mFillMD5 = ic.options().get("md5-for-meta"); + } + + mTFDir = ic.options().get("output-dir"); + if (mTFDir != "/dev/null") { + mTFDir = o2::utils::Str::rectifyDirectory(mTFDir); + mWriteTF = true; + } else { + mWriteTF = false; + mStoreMetaFile = false; + } + mRejectDistSTF = !ic.options().get("include-dist-stf"); + mRejectDEADBEEF = !ic.options().get("include-deadbeef"); + mCreateRunEnvDir = !ic.options().get("ignore-partition-run-dir"); + mMinFileSize = ic.options().get("min-file-size"); + mMaxFileSize = ic.options().get("max-file-size"); + mMaxTFPerFile = ic.options().get("max-tf-per-file"); + mMaxAccRate = ic.options().get("max-dump-rate"); + float cl = ic.options().get("rate-est-conf-limit"); + if (mConfLim < 0.001 || mConfLim > 0.32) { + LOGP(warn, "Bad confidence limit {} for rate estimate, setting to default {}", cl, mConfLim); + } else { + mConfLim = cl; + } + mMaxWarnThrottle = ic.options().get("max-warn"); + mWarnThrottleTF = ic.options().get("mute-warn-period"); + + mVerbose = ic.options().get("verbosity-level"); + mExclTriggerSpecs = ic.options().get("exclude-trigger-specs"); + if (!mExclTriggerSpecs.empty()) { + mExclTriggerFilter = select(mExclTriggerSpecs.c_str()); + } + if (mTrigger.empty()) { + if (mMaxAccRate >= 0.f) { + LOGP(info, "Will accept randomly {}% of TFs", mMaxAccRate); + } else { + LOGP(info, "Will accept every {}-th TF", int(std::ceil(-100.f / mMaxAccRate))); + } + } else { + mMaxAccRate = std::abs(mMaxAccRate); + LOGP(info, "Will limit TFs triggered with {} by {}% at most", mTrigger, mMaxAccRate); + if (!mExclTriggerFilter.empty()) { + LOGP(info, "Inputs excluded from the trigger: {}", mExclTriggerSpecs); + } + } + + if (mWriteTF) { + if (mMinFileSize > 0) { + LOGP(info, "Multiple TFs will be accumulated in the file until its size exceeds {}{}", + mMinFileSize, mMaxFileSize > mMinFileSize ? fmt::format(" but does not exceed {} B", mMaxFileSize) : std::string{}); + } + } + + mCheckDiskFull = ic.options().get("require-free-disk"); + mWaitDiskFull = 1000 * ic.options().get("wait-for-free-disk"); + mWaitDiskFullMax = 1000 * ic.options().get("max-wait-for-free-disk"); + + char hostname[_POSIX_HOST_NAME_MAX]; + gethostname(hostname, _POSIX_HOST_NAME_MAX); + mHostName = hostname; + mHostName = mHostName.substr(0, mHostName.find('.')); +} + +//________________________________________ +void RawTFDump::run(ProcessingContext& pc) +{ + mNTFsSeen++; + updateTimeDependentParams(pc); + mAcceptCurrentTF = triggerTF(pc); + if (mAcceptCurrentTF) { + prepareTFForWriting(pc); + } else { + return; + } + + prepareTFFile(); + if (mWriteTF && checkFreeSpace(pc)) { // write data + try { + size_t lTFSizeInFile = getTFSizeInFile(); + SubTimeFrameFileMeta lTFFileMeta(lTFSizeInFile); + lTFFileMeta.mWriteTimeMs = mTimingInfo.creation; + + mFile << lTFFileMeta; // Write DataHeader + SubTimeFrameFileMeta + mFile << mTFDataIndex; // Write DataHeader + SubTimeFrameFileDataIndex + + for (const auto& eqEntry : mDataMap) { + auto& [lSize, lCnt, lEntry, lHeader] = eqEntry.second; + for (size_t part = 0; part < lCnt; part++) { + const auto& dataPtr = mTFData[lEntry + part]; + DataHeader hdToWrite = *reinterpret_cast(lHeader); // make a local DataHeader copy to clear flagsNextHeader bit and set the parts correctly + hdToWrite.flagsNextHeader = 0; + hdToWrite.splitPayloadIndex = part; + hdToWrite.payloadSize = dataPtr.first; + if (mVerbose > 2) { + LOGP(info, "Writing part:{}/{} of {} | TFCounter:{} part{}/{}, size:{}", part, lCnt, DataSpecUtils::describe(OutputSpec{hdToWrite.dataOrigin, hdToWrite.dataDescription, hdToWrite.subSpecification}), hdToWrite.firstTForbit, hdToWrite.splitPayloadIndex, hdToWrite.splitPayloadParts, hdToWrite.payloadSize); + } + buffered_write(reinterpret_cast(&hdToWrite), sizeof(DataHeader)); + buffered_write(dataPtr.second, hdToWrite.payloadSize); + } + } + mFile.flush(); // flush the buffer and check the state + mTFOrbits.push_back(mTimingInfo.firstTForbit); + mNTFsInFile++; + } catch (const std::ios_base::failure& eFailExc) { + LOGP(error, "Writing of TF {} to file {} failed. error={}", mTimingInfo.tfCounter, mCurrentTFFileNameFullTmp, eFailExc.what()); + } + } + // cleanup + mTFData.clear(); + mDataMap.clear(); + mTFDataIndex.clear(); + mTFSize = 0; +} + +//____________________________________________________________ +void RawTFDump::endOfStream(EndOfStreamContext&) +{ + closeTFFile(); + LOGP(info, "Dumped {} TFs to {} files", mNTFsAccepted, mNTFFiles); + if (!mTriggerFilter.empty()) { + LOGP(info, "External trigger summary: {}", reportRates()); + } +} + +//________________________________________ +size_t RawTFDump::getTFSizeInFile() const +{ + return SubTimeFrameFileMeta::getSizeInFile() + mTFDataIndex.getSizeInFile() + mTFSize; +} + +//________________________________________ +size_t RawTFDump::getCurrentFileSize() +{ + return mFile.is_open() ? size_t(mFile.tellp()) : 0; +} + +//___________________________________________________________________ +void RawTFDump::prepareTFFile() +{ + if (!mWriteTF) { + return; + } + bool needToOpen; + if (!mFile.is_open()) { + needToOpen = true; + } else { + auto currSize = getCurrentFileSize(); + if ((mNTFsInFile >= mMaxTFPerFile) || + (currSize >= mMinFileSize) || // min size exceeded, may close the file. + (currSize && mMaxFileSize > mMinFileSize && ((currSize + mTFSize) > mMaxFileSize))) { // this is not the 1st TF in the file and the new size will exceed allowed max + needToOpen = true; + } else { + LOGP(info, "Will add new TF of size {} to existing file of size {} with {} TFs", mTFSize, currSize, mNTFsInFile); + needToOpen = false; + } + } + if (needToOpen) { + closeTFFile(); + auto TFDir = mTFDir.empty() ? o2::utils::Str::rectifyDirectory("./") : mTFDir; + if (mCreateRunEnvDir && !mDataTakingContext.envId.empty() && (mDataTakingContext.envId != o2::framework::DataTakingContext::UNKNOWN)) { + TFDir += fmt::format("{}_{}tf/", mDataTakingContext.envId, mDataTakingContext.runNumber); + if (!TFDir.empty()) { + o2::utils::createDirectoriesIfAbsent(TFDir); + LOGP(info, "Created {} directory for TFs output", TFDir); + } + } + mCurrentTFFileName = o2::base::NameConf::getRawTFFileName(mTimingInfo.runNumber, mTimingInfo.firstTForbit, mTimingInfo.tfCounter, mHostName); + mCurrentTFFileNameFull = fmt::format("{}{}", TFDir, mCurrentTFFileName); + mCurrentTFFileNameFullTmp = TMPFileEnding.empty() ? mCurrentTFFileNameFull : o2::utils::Str::concat_string(mCurrentTFFileNameFull, TMPFileEnding); + mFile.open(mCurrentTFFileNameFullTmp.c_str(), ios::binary | ios::trunc | ios::out | ios::ate); + LOGP(info, "Opened new raw-tf dump file {}[{}]", mCurrentTFFileNameFull, TMPFileEnding); + mNTFFiles++; + } +} + +//___________________________________________________________________ +void RawTFDump::updateTimeDependentParams(ProcessingContext& pc) +{ + namespace GRPECS = o2::parameters::GRPECS; + mTimingInfo = pc.services().get(); + if (mTimingInfo.globalRunNumberChanged) { + mDataTakingContext = pc.services().get(); + // determine the output type for the TF metadata + mMetaDataType = GRPECS::getRawDataPersistencyMode(mDataTakingContext.runType, mDataTakingContext.forcedRaw); + } +} + +//___________________________________________________________________ +void RawTFDump::closeTFFile() +{ + if (!mFile.is_open()) { + return; + } + try { + LOGP(info, "Closing output file {}[{}]", mCurrentTFFileNameFull, TMPFileEnding); + mFile.close(); + // write TF file metaFile data + if (mStoreMetaFile) { + o2::dataformats::FileMetaData TFMetaData; + if (!TFMetaData.fillFileData(mCurrentTFFileNameFullTmp, mFillMD5, TMPFileEnding)) { + throw std::runtime_error("metadata file was requested but not created"); + } + TFMetaData.setDataTakingContext(mDataTakingContext); + TFMetaData.type = mMetaDataType; + TFMetaData.priority = "high"; + TFMetaData.tfOrbits.swap(mTFOrbits); + auto metaFileNameTmp = fmt::format("{}{}.tmp", mTFMetaFileDir, mCurrentTFFileName); + auto metaFileName = fmt::format("{}{}.done", mTFMetaFileDir, mCurrentTFFileName); + try { + std::ofstream metaFileOut(metaFileNameTmp); + metaFileOut << TFMetaData; + metaFileOut.close(); + if (!TMPFileEnding.empty()) { + std::filesystem::rename(mCurrentTFFileNameFullTmp, mCurrentTFFileNameFull); + } + std::filesystem::rename(metaFileNameTmp, metaFileName); + LOGP(info, "wrote meta file {}", metaFileName); + } catch (std::exception const& e) { + LOGP(error, "Failed to store TF meta data file {}, reason {}", metaFileName, e.what()); + } + } else if (!TMPFileEnding.empty()) { + std::filesystem::rename(mCurrentTFFileNameFullTmp, mCurrentTFFileNameFull); + } + } catch (std::exception const& e) { + LOGP(error, "Failed to finalize TF file {}, reason: ", mCurrentTFFileNameFull, e.what()); + } + mTFOrbits.clear(); + mNTFsInFile = 0; +} + +//________________________________________ +bool RawTFDump::checkFreeSpace(ProcessingContext& pc) +{ + int totalWait = 0, nwaitCycles = 0; + while (mCheckDiskFull) { + constexpr int showFirstN = 10, prsecaleWarnings = 50; + try { + const auto si = std::filesystem::space(mCurrentTFFileNameFullTmp); + std::string wmsg{}; + if (mCheckDiskFull > 0.f && si.available < mCheckDiskFull) { + nwaitCycles++; + wmsg = fmt::format("Disk has {} MiB available while at least {} MiB is requested, wait for {} ms (on top of {} ms)", si.available / MiB, size_t(mCheckDiskFull) / MiB, mWaitDiskFull, totalWait); + } else if (mCheckDiskFull < 0.f && float(si.available) / si.capacity < -mCheckDiskFull) { // relative margin requested + nwaitCycles++; + wmsg = fmt::format("Disk has {:.3f}% available while at least {:.3f}% is requested, wait for {} ms (on top of {} ms)", si.capacity ? float(si.available) / si.capacity * 100.f : 0., -mCheckDiskFull, mWaitDiskFull, totalWait); + } else { + nwaitCycles = 0; + } + if (nwaitCycles) { + if (mWaitDiskFullMax > 0 && totalWait > mWaitDiskFullMax) { + closeTFFile(); // try to save whatever we have + LOGP(fatal, "Disk has {} MiB available out of {} MiB after waiting for {} ms", si.available / MiB, si.capacity / MiB, mWaitDiskFullMax); + } + if (nwaitCycles < showFirstN + 1 || (prsecaleWarnings && (nwaitCycles % prsecaleWarnings) == 0)) { + LOGP(alarm, "{}", wmsg); + } + pc.services().get().waitFor((unsigned int)(mWaitDiskFull)); + totalWait += mWaitDiskFull; + continue; + } + } catch (std::exception const& e) { + LOGP(fatal, "unable to query disk space info for path {}, reason {}", mCurrentTFFileNameFull, e.what()); // do we want this? + } + break; + } + return true; +} + +//________________________________________ +bool RawTFDump::triggerTF(ProcessingContext& pc) +{ + bool trig = false; + if (mTrigger.empty()) { // random + if (mMaxAccRate > 0.f) { + trig = (mUniformDist(mRGen) <= mMaxAccRate); + } else if (mMaxAccRate < 0.f) { + trig = (mTimingInfo.tfCounter % int(std::ceil(-100.f / mMaxAccRate))) == 0; + } + } else { + for (auto const& ref : InputRecordWalker(pc.inputs(), mTriggerFilter)) { + auto const* dh = DataRefUtils::getHeader(ref); + if (!dh) { + LOGP(error, "Failed to extract header for trigger input"); + continue; + } + auto extTrig = DataRefUtils::as(ref); + if (mVerbose > 1 || (mVerbose > 0 && extTrig.size() > 0 && extTrig[0])) { + LOGP(info, "trigger input {}, part: {} of {}, payload {}, 1stTFOrbit: {} TF: {} | span size: {} span[0]={}", + DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}), + dh->splitPayloadIndex, dh->splitPayloadParts, dh->payloadSize, dh->firstTForbit, dh->tfCounter, extTrig.size(), extTrig.size() > 0 ? extTrig[0] : false); + } + if (extTrig.size() && extTrig[0]) { + // is the input with this trigger vetoed? + bool veto = false; + for (const auto& excl : mExclTriggerFilter) { + if (DataRefUtils::match(ref, excl)) { + if (mVerbose > 0) { + LOGP(info, "ignoring trigger from black-listed {}", DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification})); + } + veto = true; + break; + } + } + if (veto) { + continue; + } + trig = true; + break; + } + } + if (trig) { // do we need to throttle? + mNTFsExtTrig++; + mRateEstTrgLow = TMath::ChisquareQuantile(mConfLim, 2 * (mNTFsExtTrig)) / (2 * mNTFsSeen); + mRateEstTrgUpp = TMath::ChisquareQuantile(1. - mConfLim, 2 * (mNTFsExtTrig + 1)) / (2 * mNTFsSeen); + mRateEstAccLow = TMath::ChisquareQuantile(mConfLim, 2 * (mNTFsAccepted)) / (2 * mNTFsSeen); + mRateEstAccUpp = TMath::ChisquareQuantile(1. - mConfLim, 2 * (mNTFsAccepted + 1)) / (2 * mNTFsSeen); + if (mRateEstAccLow > 0.01 * mMaxAccRate) { // current lowest estimate on the acceptance rate exceeds desired limit -> ignore trigger + trig = false; + // do we need to warn? + if ((mNTFsSeen - mLastWarned) > mWarnThrottleTF && ((mNWarnThrottle < mMaxWarnThrottle) || mMaxWarnThrottle < 0)) { + mLastWarned = mNTFsSeen; + std::string swarn = reportRates(); + if (++mNWarnThrottle == mMaxWarnThrottle) { + swarn += " Will not warn anymore."; + } else { + swarn += fmt::format(" Will suppress this warnings for {} TFs", mWarnThrottleTF); + } + LOGP(alarm, "Ignoring TF triggered for dumping: {}", swarn); + } + } + } + } + if (trig) { + mNTFsAccepted++; + } + if (mVerbose > 0) { + LOGP(info, "TF#{} (slice#{}) will{} be written, {}", mTimingInfo.tfCounter, mTimingInfo.timeslice, trig ? "" : " not", reportRates()); + } + return trig; +} + +//________________________________________ +void RawTFDump::prepareTFForWriting(ProcessingContext& pc) +{ + for (auto const& ref : InputRecordWalker(pc.inputs(), mFilter)) { + auto const* dh = DataRefUtils::getHeader(ref); + if (!dh) { + LOGP(error, "Failed to extract header"); + continue; + } + if ((dh->subSpecification == 0xdeadbeef && mRejectDEADBEEF) || + (dh->dataOrigin == o2::header::gDataOriginFLP && dh->dataDescription == o2::header::gDataDescriptionDISTSTF && mRejectDistSTF)) { + if (mVerbose > 2) { + LOGP(info, "Rejecting {}", DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification})); + } + continue; + } + const auto payloadSize = DataRefUtils::getPayloadSize(ref); + const auto lHdrDataSize = sizeof(DataHeader) + payloadSize; + mTFSize += lHdrDataSize; + + auto& [lSize, lCnt, lEntry, lHeader] = mDataMap[EquipmentIdentifier(*dh)]; + if (!lCnt) { + lEntry = mTFData.size(); // flag where the data of this spec starts + lHeader = ref.header; + } + lSize += lHdrDataSize; + lCnt++; + mTFData.push_back({payloadSize, ref.payload}); + if (mVerbose > 2) { + const auto* dph = DataRefUtils::getHeader(ref); + LOGP(info, "{}, part: {}({}) of {}, payload {}({}), 1stTFOrbit: {} TF: {}, creation: {} | counter:{} size:{} entry:{}", + DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}), + dh->splitPayloadIndex, lCnt - 1, dh->splitPayloadParts, dh->payloadSize, payloadSize, dh->firstTForbit, dh->tfCounter, dph ? dph->creation : -1UL, lCnt, lSize, lEntry); + // if (o2::raw::RDHUtils::checkRDH(ref.payload)) { + // o2::raw::RDHUtils::printRDH(ref.payload); + // } + } + } + + // build the index + { + LOGP(info, "Creating dump image for TF {} of run {}, starting orbit {}, size = {}", mTimingInfo.tfCounter, mTimingInfo.runNumber, mTimingInfo.firstTForbit, mTFSize); + std::uint64_t lCurrOff = 0; + for (const auto& eqEntry : mDataMap) { + const auto& eq = eqEntry.first; + auto& [lSize, lCnt, lEntry, lHeader] = eqEntry.second; + assert(lSize > sizeof(DataHeader)); + + OutputSpec spec{eq.mDataOrigin, eq.mDataDescription, eq.mSubSpecification}; + if (mVerbose > 1) { + LOGP(info, "{} : {} parts of size {} entry {}| offset: {}", DataSpecUtils::describe(spec), lCnt, lSize, lEntry, lCurrOff); + } + mTFDataIndex.AddStfElement(eq, lCnt, lCurrOff, lSize); + lCurrOff += lSize; + } + } +} + +//____________________________________________________________ +std::string RawTFDump::reportRates() const +{ + std::string rep = fmt::format("{} TFs seen, {} accepted", mNTFsSeen, mNTFsAccepted); + if (!mTrigger.empty()) { + rep += fmt::format(", {} ext.triggered, est.rate: [{:.2e}:{:.2e}]/[{:.2e}:{:.2e}].", mNTFsExtTrig, mRateEstAccLow, mRateEstAccUpp, mRateEstTrgLow, mRateEstTrgUpp); + } + return rep; +} + +//__________________________________________________________ +DataProcessorSpec getRawTFDumpSpec(const std::string& inpconfig, const std::string& trigger) +{ + std::vector inputs = select(inpconfig.c_str()); + return DataProcessorSpec{ + "raw-tf-dump", + inputs, + {}, + AlgorithmSpec{adaptFromTask(trigger)}, + Options{ + {"include-deadbeef", VariantType::Bool, false, {"Include DPL-generated 0xdeadbeef subspecs for missing data"}}, + {"include-dist-stf", VariantType::Bool, false, {"Include FLP/DISTSUBTIMEFRAME input"}}, + {"exclude-trigger-specs", VariantType::String, "", {"Ignore trigger seen in these inputs of triggerspec"}}, + {"max-dump-rate", VariantType::Float, 0.f, {"%-age of TFs to dump. W/o external trigger: random(>0) or periodic(<0) rejection, with: max limit"}}, + {"rate-est-conf-limit", VariantType::Float, 0.05f, {"quantile for the lowest rate estimate confidence limit"}}, + {"max-warn", VariantType::Int, 5, {"max allowed warnings on throttling"}}, + {"mute-warn-period", VariantType::Int, 100, {"mute warnings on throttling for this number of TFs"}}, + {"output-dir", VariantType::String, "none", {"TF output directory, must exist"}}, + {"meta-output-dir", VariantType::String, "/dev/null", {"TF metadata output directory, must exist (if not /dev/null)"}}, + {"md5-for-meta", VariantType::Bool, false, {"fill CTF file MD5 sum in the metadata file"}}, + {"min-file-size", VariantType::Int64, 0l, {"accumulate TFs until given file size reached"}}, + {"max-file-size", VariantType::Int64, 0l, {"if > 0, try to avoid exceeding given file size, also used for space check"}}, + {"max-tf-per-file", VariantType::Int, 0, {"if > 0, avoid storing more than requested CTFs per file"}}, + {"require-free-disk", VariantType::Float, 0.f, {"pause writing op. if available disk space is below this margin, in bytes if >0, as a fraction of total if <0"}}, + {"wait-for-free-disk", VariantType::Float, 10.f, {"if paused due to the low disk space, recheck after this time (in s)"}}, + {"max-wait-for-free-disk", VariantType::Float, 60.f, {"produce fatal if paused due to the low disk space for more than this amount in s."}}, + {"verbosity-level", VariantType::Int, 0, {"Verbose mode: 1: decision on every TF, 2: details of saved TF, 3: more details"}}, + {"ignore-partition-run-dir", VariantType::Bool, false, {"Do not creare partition-run directory in output-dir"}}}}; +} + +} // namespace o2::rawdd diff --git a/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h new file mode 100644 index 0000000000000..a39cfb026ed52 --- /dev/null +++ b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h @@ -0,0 +1,23 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_RAW_TF_DUMP_SPEC_ +#define O2_RAW_TF_DUMP_SPEC_ + +#include "DetectorsCommonDataFormats/DetID.h" +#include "Framework/DeviceSpec.h" + +namespace o2::rawdd +{ +o2::framework::DataProcessorSpec getRawTFDumpSpec(const std::string& inpconfig, const std::string& trigger); +} + +#endif diff --git a/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx b/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx index f227390e67ef3..c8bc6ff374ead 100644 --- a/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx +++ b/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx @@ -45,8 +45,8 @@ namespace o2f = o2::framework; /// SubTimeFrameFileReader //////////////////////////////////////////////////////////////////////////////// -SubTimeFrameFileReader::SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask) - : mFileName(pFileName) +SubTimeFrameFileReader::SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask, int verb, bool sup0xccdb, bool repaireHeaders, bool rejectDistSTF) + : mFileName(pFileName), mVerbosity(verb), mSup0xccdb(sup0xccdb), mRepaireHeaders(repaireHeaders), mRejectDistSTF(rejectDistSTF) { mFileMap.open(mFileName); if (!mFileMap.is_open()) { @@ -115,7 +115,7 @@ std::size_t SubTimeFrameFileReader::getHeaderStackSize() // throws ios_base::fai LOGP(error, "FileReader: Reached max number of headers allowed: {}.", cMaxHeaders); return 0; } - + LOGP(debug, "getHeaderStackSize, pos = {}, size = {}", lFilePosStart, lStackSize); return lStackSize; } @@ -178,13 +178,21 @@ Stack SubTimeFrameFileReader::getHeaderStack(std::size_t& pOrigsize) return Stack(lStackMem); } +const std::string SubTimeFrameFileReader::describeHeader(const o2::header::DataHeader& hd, bool full) const +{ + std::string res = fmt::format("{}", o2f::DataSpecUtils::describe(o2::framework::OutputSpec{hd.dataOrigin, hd.dataDescription, hd.subSpecification})); + if (full) { + res += fmt::format(" part:{}/{} sz:{} TF:{} Orb:{} Run:{}", hd.splitPayloadIndex, hd.splitPayloadParts, hd.payloadSize, hd.tfCounter, hd.firstTForbit, hd.runNumber); + } + return res; +} + std::uint32_t sRunNumber = 0; // TODO: add id to files metadata std::uint32_t sFirstTForbit = 0; // TODO: add id to files metadata std::uint64_t sCreationTime = 0; std::mutex stfMtx; -std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* device, const std::vector& outputRoutes, - const std::string& rawChannel, size_t slice, bool sup0xccdb, int verbosity) +std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* device, const std::vector& outputRoutes, const std::string& rawChannel, size_t slice) { std::unique_ptr messagesPerRoute = std::make_unique(); auto& msgMap = *messagesPerRoute.get(); @@ -252,9 +260,15 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* return nullptr; } lStfMetaDataHdr = o2::header::DataHeader::Get(lMetaHdrStack.first()); + if (mVerbosity > 0) { + LOGP(info, "read filemeta, pos = {}, size = {}", position(), sizeof(SubTimeFrameFileMeta)); + } if (!read_advance(&lStfFileMeta, sizeof(SubTimeFrameFileMeta))) { return nullptr; } + if (mVerbosity > 0) { + LOGP(info, "TFMeta : {}", lStfFileMeta.info()); + } if (lStfFileMeta.mWriteTimeMs == 0 && creationFallBack != 0) { if (!creation0Notified) { creation0Notified = true; @@ -318,9 +332,9 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* std::int64_t lLeftToRead = lStfDataSize; STFHeader stfHeader{tfID, -1u, -1u}; + DataHeader prevHeader; // read pairs while (lLeftToRead > 0) { - // allocate and read the Headers std::size_t lDataHeaderStackSize = 0; Stack lDataHeaderStack = getHeaderStack(lDataHeaderStackSize); @@ -335,6 +349,25 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* return nullptr; } DataHeader locDataHeader(*lDataHeader); + + if (mRepaireHeaders) { + if (locDataHeader == prevHeader) { + if (prevHeader.tfCounter == locDataHeader.tfCounter && (prevHeader.splitPayloadIndex + 1) != locDataHeader.splitPayloadIndex) { + if (mVerbosity > 3) { + LOGP(warn, "Repairing wrong part index for {} to {}", describeHeader(locDataHeader, true), (prevHeader.splitPayloadIndex + 1) % prevHeader.splitPayloadParts); + } + locDataHeader.splitPayloadIndex = (++prevHeader.splitPayloadIndex) % prevHeader.splitPayloadParts; + } + } else { // new header + if (locDataHeader.splitPayloadIndex != 0) { + if (mVerbosity > 2) { + LOGP(warn, "Repairing wrong part index for new {} to {}", describeHeader(locDataHeader, true), (prevHeader.splitPayloadIndex + 1) % prevHeader.splitPayloadParts); + } + locDataHeader.splitPayloadIndex = 0; + } + } + prevHeader = locDataHeader; + } // sanity check if (int(locDataHeader.firstTForbit) == -1) { if (!negativeOrbitNotified) { @@ -350,6 +383,18 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* } locDataHeader.runNumber = runNumberFallBack; } + const std::uint64_t lDataSize = locDataHeader.payloadSize; + + if (locDataHeader.dataOrigin == o2::header::gDataOriginFLP && locDataHeader.dataDescription == o2::header::gDataDescriptionDISTSTF && mRejectDistSTF) { + if (mVerbosity > 0) { + LOGP(warn, "Ignoring stored {}", describeHeader(locDataHeader)); + } + if (!ignore_nbytes(lDataSize)) { + return nullptr; + } + lLeftToRead -= (lDataHeaderStackSize + lDataSize); // update the counter + continue; + } o2::header::Stack headerStack{locDataHeader, o2f::DataProcessingHeader{tfID, 1, lStfFileMeta.mWriteTimeMs}}; if (stfHeader.runNumber == -1) { stfHeader.id = locDataHeader.tfCounter; @@ -359,8 +404,6 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* sRunNumber = stfHeader.runNumber; sFirstTForbit = stfHeader.firstOrbit; } - - const std::uint64_t lDataSize = locDataHeader.payloadSize; // do we accept these data? auto detOrigStatus = mDetOrigMap.find(locDataHeader.dataOrigin); if (detOrigStatus != mDetOrigMap.end() && !detOrigStatus->second) { // this is a detector data and we don't want to read it @@ -398,14 +441,15 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* msgSW.Stop(); #endif memcpy(lHdrStackMsg->GetData(), headerStack.data(), headerStack.size()); + LOGP(debug, "read data, pos = {}, size = {} leftToRead {}", position(), lDataSize, lLeftToRead); if (!read_advance(lDataMsg->GetData(), lDataSize)) { return nullptr; } - if (verbosity > 0) { - if (verbosity > 1 || locDataHeader.splitPayloadIndex == 0) { + if (mVerbosity > 0) { + if (mVerbosity > 1 || locDataHeader.splitPayloadIndex == 0) { printStack(headerStack); - if (o2::raw::RDHUtils::checkRDH(lDataMsg->GetData()) && verbosity > 2) { + if (o2::raw::RDHUtils::checkRDH(lDataMsg->GetData()) && mVerbosity > 2) { o2::raw::RDHUtils::printRDH(lDataMsg->GetData()); } } @@ -413,6 +457,9 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* #ifdef _RUN_TIMING_MEASUREMENT_ addPartSW.Start(false); #endif + if (mVerbosity > 2) { + LOGP(info, "addPart {} to {} | HdrSize:{} DataSize:{}", describeHeader(locDataHeader, true), fmqChannel, lHdrStackMsg->GetSize(), lDataMsg->GetSize()); + } addPart(std::move(lHdrStackMsg), std::move(lDataMsg), fmqChannel); #ifdef _RUN_TIMING_MEASUREMENT_ addPartSW.Stop(); @@ -434,7 +481,7 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* } unsigned stfSS[2] = {0, 0xccdb}; - for (int iss = 0; iss < (sup0xccdb ? 1 : 2); iss++) { + for (int iss = 0; iss < (mSup0xccdb ? 1 : 2); iss++) { o2::header::DataHeader stfDistDataHeader(o2::header::gDataDescriptionDISTSTF, o2::header::gDataOriginFLP, stfSS[iss], sizeof(STFHeader), 0, 1); stfDistDataHeader.payloadSerializationMethod = o2::header::gSerializationMethodNone; stfDistDataHeader.firstTForbit = stfHeader.firstOrbit; @@ -444,7 +491,7 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* if (!fmqChannel.empty()) { // no output channel auto fmqFactory = device->GetChannel(fmqChannel, 0).Transport(); o2::header::Stack headerStackSTF{stfDistDataHeader, o2f::DataProcessingHeader{tfID, 1, lStfFileMeta.mWriteTimeMs}}; - if (verbosity > 0) { + if (mVerbosity > 0) { printStack(headerStackSTF); } auto hdMessageSTF = fmqFactory->CreateMessage(headerStackSTF.size(), fair::mq::Alignment{64}); @@ -454,6 +501,9 @@ std::unique_ptr SubTimeFrameFileReader::read(fair::mq::Device* #ifdef _RUN_TIMING_MEASUREMENT_ addPartSW.Start(false); #endif + if (mVerbosity > 2) { + LOGP(info, "addPart forced {} to {} | HdrSize:{} DataSize:{}", describeHeader(stfDistDataHeader, true), fmqChannel, hdMessageSTF->GetSize(), plMessageSTF->GetSize()); + } addPart(std::move(hdMessageSTF), std::move(plMessageSTF), fmqChannel); #ifdef _RUN_TIMING_MEASUREMENT_ addPartSW.Stop(); diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx index 919e76083f595..e9c37933a2e1c 100644 --- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx +++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx @@ -118,6 +118,9 @@ void TFReaderSpec::init(o2f::InitContext& ic) mInput.maxTFsPerFile = mInput.maxTFsPerFile > 0 ? mInput.maxTFsPerFile : 0x7fffffff; mInput.maxTFCache = std::max(1, ic.options().get("max-cached-tf")); mInput.maxFileCache = std::max(1, ic.options().get("max-cached-files")); + mInput.repairHeaders = !ic.options().get("ignore-repair-headers"); + mInput.rejectDistSTF = !ic.options().get("read-dist-stf"); + if (!mInput.fileRunTimeSpans.empty()) { loadRunTimeSpans(mInput.fileRunTimeSpans); } @@ -263,7 +266,11 @@ void TFReaderSpec::run(o2f::ProcessingContext& ctx) setTimingInfo(*tfPtr.get()); size_t nparts = 0, dataSize = 0; if (mInput.sendDummyForMissing) { + int cntAck = 0; for (auto& msgIt : *tfPtr.get()) { // complete with empty output for the specs which were requested but were not seen in the data + if (mInput.verbosity > 0) { + LOGP(info, "acknowledgeOutput {}", cntAck++); + } acknowledgeOutput(*msgIt.second.get(), true); } addMissingParts(*tfPtr.get()); @@ -409,7 +416,7 @@ void TFReaderSpec::TFBuilder() } LOG(info) << "Processing file " << tfFileName; - SubTimeFrameFileReader reader(tfFileName, mInput.detMask); + SubTimeFrameFileReader reader(tfFileName, mInput.detMask, mInput.verbosity, mInput.sup0xccdb, mInput.repairHeaders, mInput.rejectDistSTF); size_t locID = 0; // try { @@ -421,7 +428,7 @@ void TFReaderSpec::TFBuilder() std::this_thread::sleep_for(sleepTime); continue; } - auto tf = reader.read(mDevice, mOutputRoutes, mInput.rawChannelConfig, mAccTFCounter, mInput.sup0xccdb, mInput.verbosity); + auto tf = reader.read(mDevice, mOutputRoutes, mInput.rawChannelConfig, mAccTFCounter); bool acceptTF = true; if (tf) { if (mRunTimeRanges.size()) { @@ -610,6 +617,10 @@ o2f::DataProcessorSpec o2::rawdd::getTFReaderSpec(o2::rawdd::TFReaderInp& rinp) spec.outputs.emplace_back(o2f::OutputSpec{o2f::OutputSpec{DetID::getDataOrigin(DetID::CTP), "LUMI", 0}}); rinp.hdVec.emplace_back(o2h::DataHeader{"LUMI", DetID::getDataOrigin(DetID::CTP), 0, 0}); // in abcence of real data this will be sent } + if (id == DetID::TPC) { + spec.outputs.emplace_back(o2f::OutputSpec{o2f::ConcreteDataTypeMatcher{DetID::getDataOrigin(id), "CMVTRIGGER"}}); + rinp.hdVec.emplace_back("CMVTRIGGER", DetID::getDataOrigin(id), 0xDEADBEEF, 0); // in abcence of real data this will be sent + } if (id == DetID::TOF) { spec.outputs.emplace_back(o2f::OutputSpec{o2f::ConcreteDataTypeMatcher{DetID::getDataOrigin(DetID::TOF), "CRAWDATA"}}); rinp.hdVec.emplace_back(o2h::DataHeader{"CRAWDATA", DetID::getDataOrigin(DetID::TOF), 0xDEADBEEF, 0}); // in abcence of real data this will be sent @@ -675,6 +686,8 @@ o2f::DataProcessorSpec o2::rawdd::getTFReaderSpec(o2::rawdd::TFReaderInp& rinp) } spec.options.emplace_back(o2f::ConfigParamSpec{"select-tf-ids", o2f::VariantType::String, "", {"comma-separated list TF IDs to inject (from cumulative counter of TFs seen)"}}); spec.options.emplace_back(o2f::ConfigParamSpec{"fetch-failure-threshold", o2f::VariantType::Float, 0.f, {"Fatil if too many failures( >0: fraction, <0: abs number, 0: no threshold)"}}); + spec.options.emplace_back(o2f::ConfigParamSpec{"ignore-repair-headers", o2f::VariantType::Bool, false, {"do not check/repair headers"}}); + spec.options.emplace_back(o2f::ConfigParamSpec{"read-dist-stf", o2f::VariantType::Bool, false, {"do not ignore stored FLP/DISTSUBTIMEFRAME (will clash with injected one)"}}); spec.options.emplace_back(o2f::ConfigParamSpec{"max-tf", o2f::VariantType::Int, -1, {"max TF ID to process (<= 0 : infinite)"}}); spec.options.emplace_back(o2f::ConfigParamSpec{"max-tf-per-file", o2f::VariantType::Int, -1, {"max TFs to process per raw-tf file (<= 0 : infinite)"}}); spec.options.emplace_back(o2f::ConfigParamSpec{"max-cached-tf", o2f::VariantType::Int, 3, {"max TFs to cache in memory"}}); diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h index 2c1c62ecbb414..6ecce0d032c06 100644 --- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h +++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h @@ -49,6 +49,8 @@ struct TFReaderInp { bool sendDummyForMissing = true; bool sup0xccdb = false; bool invertIRFramesSelection = false; + bool repairHeaders = true; + bool rejectDistSTF = true; std::vector hdVec; std::vector tfIDs{}; }; diff --git a/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx b/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx new file mode 100644 index 0000000000000..fbade100d202f --- /dev/null +++ b/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx @@ -0,0 +1,46 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/CompletionPolicy.h" +#include "Framework/CompletionPolicyHelpers.h" + +using namespace o2::framework; + +void customize(std::vector& workflowOptions) +{ + std::vector options; + options.push_back(ConfigParamSpec{"dataspec", VariantType::String, "tst:TST/A", {"selection string for the data to be proxied"}}); + options.push_back(ConfigParamSpec{"triggerspec", VariantType::String, "", {"selection string for the trigger input (must be also in dataspec if non-empty)"}}); + options.push_back(ConfigParamSpec{"configKeyValues", VariantType::String, "", {"semicolon separated key=value strings"}}); + std::swap(workflowOptions, options); +} + +void customize(std::vector& policies) +{ + policies.push_back({CompletionPolicyHelpers::consumeWhenPastOldestPossibleTimeframe("raw-tf-dump", [](auto const&) -> bool { return true; })}); + // policies.push_back({CompletionPolicyHelpers::consumeWhenAllOrdered("raw-tf-dump", [](auto const&) -> bool { return true; })}); // RSTOREM +} + +// ------------------------------------------------------------------ + +#include "Framework/runDataProcessing.h" +#include "RawTFDumpSpec.h" + +WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) +{ + o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); + auto inpconfig = configcontext.options().get("dataspec"); + auto trigger = configcontext.options().get("triggerspec"); + WorkflowSpec specs{o2::rawdd::getRawTFDumpSpec(inpconfig, trigger)}; + return specs; +} diff --git a/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx b/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx index b424353531de7..a29b4dadfdb25 100644 --- a/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx +++ b/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx @@ -34,7 +34,7 @@ void customize(std::vector& workflowOptions) options.push_back(ConfigParamSpec{"copy-dir", VariantType::String, "/tmp/", {"copy base directory for remote files"}}); options.push_back(ConfigParamSpec{"tf-file-regex", VariantType::String, ".+\\.tf$", {"regex string to identify TF files"}}); options.push_back(ConfigParamSpec{"remote-regex", VariantType::String, "^(alien://|)/alice/data/.+", {"regex string to identify remote files"}}); // Use "^/eos/aliceo2/.+" for direct EOS access - options.push_back(ConfigParamSpec{"tf-reader-verbosity", VariantType::Int, 0, {"verbosity level (1 or 2: check RDH, print DH/DPH for 1st or all slices, >2 print RDH)"}}); + options.push_back(ConfigParamSpec{"tf-reader-verbosity", VariantType::Int, 0, {"verbosity level (1 or 2: check RDH, print DH/DPH for 1st or all slices, >2 print RDH), report repairs"}}); options.push_back(ConfigParamSpec{"raw-channel-config", VariantType::String, "", {"optional raw FMQ channel for non-DPL output"}}); options.push_back(ConfigParamSpec{"send-diststf-0xccdb", VariantType::Bool, false, {"send explicit FLP/DISTSUBTIMEFRAME/0xccdb output"}}); options.push_back(ConfigParamSpec{"disable-dummy-output", VariantType::Bool, false, {"Disable sending empty output if corresponding data is not found in the data"}}); diff --git a/Detectors/TPC/calibration/CMakeLists.txt b/Detectors/TPC/calibration/CMakeLists.txt index 675f15e89258b..6aeb497c1cf23 100644 --- a/Detectors/TPC/calibration/CMakeLists.txt +++ b/Detectors/TPC/calibration/CMakeLists.txt @@ -60,6 +60,7 @@ o2_add_library(TPCCalibration src/PressureTemperatureHelper.cxx src/CMVContainer.cxx src/CorrectionMapsLoader.cxx + src/CMVHelper.cxx PUBLIC_LINK_LIBRARIES O2::DataFormatsTPC O2::TPCBaseRecSim O2::TPCReconstruction ROOT::Minuit Microsoft.GSL::GSL @@ -119,7 +120,8 @@ o2_target_root_dictionary(TPCCalibration include/TPCCalibration/CorrectdEdxDistortions.h include/TPCCalibration/PressureTemperatureHelper.h include/TPCCalibration/CMVContainer.h - include/TPCCalibration/CorrectionMapsLoader.h) + include/TPCCalibration/CorrectionMapsLoader.h + include/TPCCalibration/CMVHelper.h) o2_add_test_root_macro(macro/comparePedestalsAndNoise.C PUBLIC_LINK_LIBRARIES O2::TPCBaseRecSim diff --git a/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h b/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h new file mode 100644 index 0000000000000..d687c6872b8df --- /dev/null +++ b/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h @@ -0,0 +1,52 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @file CMVHelper.h +/// @author Tuba Gündem, tuba.gundem@cern.ch +/// @brief Helper utilities for reading CMV ROOT files + +#ifndef ALICEO2_TPC_CMVHELPER_H_ +#define ALICEO2_TPC_CMVHELPER_H_ + +#include + +#include "TFile.h" +#include "TTree.h" + +namespace o2::tpc +{ + +struct CMVPerTF; +struct CMVPerTFCompressed; + +struct CMVFileHandle { + TFile* file{nullptr}; + TTree* tree{nullptr}; + bool isCompressed{false}; + CMVPerTFCompressed* tfCompressed{nullptr}; + CMVPerTF* tfRaw{nullptr}; + CMVPerTF* tfDecoded{nullptr}; ///< scratch buffer used when decompressing + long firstTFInTree{-1}; ///< first global TF index from tree UserInfo ("firstTF"); -1 if absent + long lastTFInTree{-1}; ///< last global TF index from tree UserInfo ("lastTF"); -1 if absent + + /// Open path and set up branch addresses. Returns false on any error + bool open(const std::string& path); + + /// Load entry iEntry and return a pointer to the decoded CMVPerTF, or nullptr on error + const CMVPerTF* getEntry(long long iEntry); + + /// Release all resources + void close(); +}; + +} // namespace o2::tpc + +#endif // ALICEO2_TPC_CMVHELPER_H_ diff --git a/Detectors/TPC/calibration/macro/drawCMV.C b/Detectors/TPC/calibration/macro/drawCMV.C index 4f74db16cecd6..78e951fcfd676 100644 --- a/Detectors/TPC/calibration/macro/drawCMV.C +++ b/Detectors/TPC/calibration/macro/drawCMV.C @@ -11,19 +11,19 @@ #if !defined(__CLING__) || defined(__ROOTCLING__) #include -#include #include +#include #include -#include "TFile.h" -#include "TParameter.h" #include "TTree.h" #include "TH1F.h" #include "TH2F.h" #include "TCanvas.h" -#include "TPCCalibration/CMVContainer.h" #include "TPCBase/Utils.h" +#include "TPCCalibration/CMVContainer.h" +#include "TPCCalibration/CMVHelper.h" + #endif using namespace o2::tpc; @@ -38,39 +38,20 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri arrCanvases->SetName("CMV"); // open file - TFile f(filename.data(), "READ"); - if (f.IsZombie()) { + CMVFileHandle fh; + if (!fh.open(std::string(filename))) { fmt::print("ERROR: cannot open '{}'\n", filename); return arrCanvases; } fmt::print("Opened file: {}\n", filename); + fmt::print("Tree 'ccdb_object' found, entries: {}\n", fh.tree->GetEntries()); - // get TTree - TTree* tree = nullptr; - f.GetObject("ccdb_object", tree); - if (!tree) { - fmt::print("ERROR: TTree 'ccdb_object' not found\n"); - return arrCanvases; - } - fmt::print("Tree 'ccdb_object' found, entries: {}\n", tree->GetEntries()); - - // read metadata - long firstTF = -1, lastTF = -1; - if (auto* userInfo = tree->GetUserInfo()) { - for (int i = 0; i < userInfo->GetSize(); ++i) { - if (auto* p = dynamic_cast*>(userInfo->At(i))) { - if (std::string(p->GetName()) == "firstTF") - firstTF = p->GetVal(); - if (std::string(p->GetName()) == "lastTF") - lastTF = p->GetVal(); - } - } - } - fmt::print("firstTF: {}, lastTF: {}\n", firstTF, lastTF); + fmt::print("firstTF: {}, lastTF: {}\n", fh.firstTFInTree, fh.lastTFInTree); - const int nEntries = tree->GetEntries(); + const int nEntries = fh.tree->GetEntries(); if (nEntries == 0) { fmt::print("ERROR: no entries in tree\n"); + fh.close(); return arrCanvases; } @@ -80,61 +61,62 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri TH2F* h2d = new TH2F("hCMVvsTimeBin", ";Timebin (200 ns);Common Mode Values (ADC)", 100, 0, nTimeBins, 110, -100.5, 9.5); + h2d->SetDirectory(nullptr); h2d->SetStats(1); TH1F* h1d = new TH1F("hCMV", ";Common Mode Values (ADC);Counts", 110, -100.5, 9.5); + h1d->SetDirectory(nullptr); h1d->SetStats(1); - // auto-detect branch format: compressed or raw - const bool isCompressed = (tree->GetBranch("CMVPerTFCompressed") != nullptr); - const bool isRaw = (tree->GetBranch("CMVPerTF") != nullptr); - if (!isCompressed && !isRaw) { - fmt::print("ERROR: no recognised branch found (expected 'CMVPerTFCompressed' or 'CMVPerTF')\n"); - return arrCanvases; - } - fmt::print("Branch format: {}\n", isCompressed ? "CMVPerTFCompressed" : "CMVPerTF (raw)"); - - o2::tpc::CMVPerTFCompressed* tfCompressed = nullptr; - o2::tpc::CMVPerTF* tfRaw = nullptr; - CMVPerTF* tfDecoded = isCompressed ? new CMVPerTF() : nullptr; + TH1F* h1dCRU = new TH1F("hCRU", ";CRU;Counts", + 360, -0.5, 359.5); + h1dCRU->SetDirectory(nullptr); + h1dCRU->SetStats(1); + TH2F* h2dCRU = new TH2F("hCMVvsCRU", ";CRU;Common Mode Values (ADC)", + 360, -0.5, 359.5, + 110, -100.5, 9.5); + h2dCRU->SetDirectory(nullptr); + h2dCRU->SetStats(0); - if (isCompressed) { - tree->SetBranchAddress("CMVPerTFCompressed", &tfCompressed); - } else { - tree->SetBranchAddress("CMVPerTF", &tfRaw); - } + fmt::print("Branch format: {}\n", fh.isCompressed ? "CMVPerTFCompressed" : "CMVPerTF (raw)"); long firstOrbit = -1; long firstOrbitDPL = -1; + // Pre-allocate fill arrays once; x-values (timebins) are constant across entries and CRUs + const int fillsPerEntry = nCRUs * nTimeBins; + std::vector xArr(fillsPerEntry), yArr(fillsPerEntry), wArr(fillsPerEntry, 1.0), cruArr(fillsPerEntry); + for (int cru = 0; cru < nCRUs; ++cru) { + for (int tb = 0; tb < nTimeBins; ++tb) { + xArr[cru * nTimeBins + tb] = tb; + cruArr[cru * nTimeBins + tb] = cru; + } + } + for (int i = 0; i < nEntries; ++i) { - tree->GetEntry(i); - - // Decompress if needed; resolve to a unified CMVPerTF pointer - const CMVPerTF* tf = nullptr; - if (isCompressed) { - tfCompressed->decompress(tfDecoded); - tf = tfDecoded; - } else { - tf = tfRaw; + const CMVPerTF* tf = fh.getEntry(i); + if (!tf) { + continue; } firstOrbit = tf->firstOrbit; firstOrbitDPL = tf->firstOrbitDPL; - fmt::print("firstOrbit: {}, firstOrbitDPL: {}\n", firstOrbit, firstOrbitDPL); + + fmt::print("Entry {}: firstOrbit: {}, firstOrbitDPL: {}\n", i, firstOrbit, firstOrbitDPL); for (int cru = 0; cru < nCRUs; ++cru) { for (int tb = 0; tb < nTimeBins; ++tb) { - const float cmvValue = tf->getCMVFloat(cru, tb); - h2d->Fill(tb, cmvValue); - h1d->Fill(cmvValue); + yArr[cru * nTimeBins + tb] = tf->getCMVFloat(cru, tb); + // fmt::print("Entry {}: cru: {}, tb: {}, cmv: {}\n", i, cru, tb, tf->getCMVFloat(cru, tb)); } } + h2d->FillN(fillsPerEntry, xArr.data(), yArr.data(), wArr.data()); + h1d->FillN(fillsPerEntry, yArr.data(), wArr.data()); + h2dCRU->FillN(fillsPerEntry, cruArr.data(), yArr.data(), wArr.data()); + h1dCRU->FillN(fillsPerEntry, cruArr.data(), wArr.data()); } - delete tfDecoded; - tree->ResetBranchAddresses(); - delete tfCompressed; + fh.close(); // draw auto* c = new TCanvas("cCMVvsTimeBin", ""); @@ -149,10 +131,20 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri arrCanvases->Add(c1); + auto* c2 = new TCanvas("cCRUDistribution", ""); + h1dCRU->Draw(); + + arrCanvases->Add(c2); + + auto* c3 = new TCanvas("cCMVvsCRU", ""); + c3->SetLogz(); + h2dCRU->Draw("colz"); + + arrCanvases->Add(c3); + if (outDir.size()) { utils::saveCanvases(*arrCanvases, outDir, "", rootFileName); } - f.Close(); return arrCanvases; } diff --git a/Detectors/TPC/calibration/src/CMVHelper.cxx b/Detectors/TPC/calibration/src/CMVHelper.cxx new file mode 100644 index 0000000000000..abcbd977a9acb --- /dev/null +++ b/Detectors/TPC/calibration/src/CMVHelper.cxx @@ -0,0 +1,98 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @file CMVHelper.cxx +/// @author Tuba Gündem, tuba.gundem@cern.ch +/// @brief Helper utilities for reading CMV ROOT files + +#include "TPCCalibration/CMVHelper.h" + +#include + +#include "TPCCalibration/CMVContainer.h" +#include "TParameter.h" + +namespace o2::tpc +{ + +bool CMVFileHandle::open(const std::string& path) +{ + file = TFile::Open(path.c_str()); + if (!file || file->IsZombie()) { + std::cerr << "CMVFileHandle: failed to open: " << path << "\n"; + return false; + } + file->GetObject("ccdb_object", tree); + if (!tree) { + std::cerr << "CMVFileHandle: TTree 'ccdb_object' not found in: " << path << "\n"; + close(); + return false; + } + + // Extract firstTF / lastTF from UserInfo if stored by the aggregation workflow + if (auto* ui = tree->GetUserInfo()) { + if (auto* p = dynamic_cast*>(ui->FindObject("firstTF"))) { + firstTFInTree = p->GetVal(); + } + if (auto* p = dynamic_cast*>(ui->FindObject("lastTF"))) { + lastTFInTree = p->GetVal(); + } + } + + isCompressed = (tree->GetBranch("CMVPerTFCompressed") != nullptr); + const bool isRaw = (tree->GetBranch("CMVPerTF") != nullptr); + if (!isCompressed && !isRaw) { + std::cerr << "CMVFileHandle: no recognised branch (CMVPerTFCompressed / CMVPerTF) in: " + << path << "\n"; + close(); + return false; + } + + if (isCompressed) { + tree->SetBranchAddress("CMVPerTFCompressed", &tfCompressed); + tfDecoded = new CMVPerTF(); + } else { + tree->SetBranchAddress("CMVPerTF", &tfRaw); + } + return true; +} + +const CMVPerTF* CMVFileHandle::getEntry(long long iEntry) +{ + tree->GetEntry(iEntry); + if (isCompressed) { + if (!tfCompressed) { + return nullptr; + } + tfCompressed->decompress(tfDecoded); + return tfDecoded; + } + return tfRaw; +} + +void CMVFileHandle::close() +{ + if (tree) { + tree->ResetBranchAddresses(); + tree = nullptr; + } + tfCompressed = nullptr; + tfRaw = nullptr; + delete tfDecoded; + tfDecoded = nullptr; + if (file) { + file->Close(); + delete file; + file = nullptr; + } +} + +} // namespace o2::tpc diff --git a/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx b/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx index 9569e0eb8abd2..c8bdfa0f99350 100644 --- a/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx +++ b/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx @@ -28,8 +28,13 @@ using namespace o2::framework; void CorrectionMapsLoader::extractCCDBInputs(ProcessingContext& pc, float tpcScaler) { pc.inputs().get("tpcCorrPar"); - pc.inputs().get("tpcCorrMap"); - pc.inputs().get("tpcCorrMapRef"); + const auto lumiMode = getLumiScaleMode(); + if (lumiMode != LumiScaleMode::NoCorrection && lumiMode != LumiScaleMode::StaticMapOnly) { + pc.inputs().get("tpcCorrMap"); + } + if (lumiMode != LumiScaleMode::NoCorrection) { + pc.inputs().get("tpcCorrMapRef"); + } const int maxDumRep = 5; int dumRep = 0; o2::ctp::LumiInfo lumiObj; @@ -97,6 +102,10 @@ void CorrectionMapsLoader::requestCCDBInputs(std::vector& inputs, con // for MC corrections addInput(inputs, {"tpcCorrMap", "TPC", "CorrMap", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrMapMC), {}, 1)}); // time-dependent addInput(inputs, {"tpcCorrMapRef", "TPC", "CorrMapRef", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrDerivMapMC), {}, 1)}); // time-dependent + } else if (gloOpts.lumiMode == LumiScaleMode::NoCorrection) { + // no correction maps needed — a dummy map is created at runtime + } else if (gloOpts.lumiMode == LumiScaleMode::StaticMapOnly) { + addInput(inputs, {"tpcCorrMapRef", "TPC", "CorrMapRef", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrMapRef), {}, 0)}); // load once } else { LOG(fatal) << "Correction mode unknown! Choose either 0 (default) or 1 (derivative map) for flag corrmap-lumi-mode."; } diff --git a/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx b/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx index 604b7c680385b..45c3771db57bf 100644 --- a/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx +++ b/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx @@ -21,13 +21,13 @@ CorrectionMapsGloOpts CorrectionMapsOptions::parseGlobalOptions(const o2::framew { CorrectionMapsGloOpts tpcopt; auto lumiTypeVal = opts.get("lumi-type"); - if (lumiTypeVal < -1 || lumiTypeVal > 2) { + if (lumiTypeVal < static_cast(LumiScaleType::Unset) || lumiTypeVal >= static_cast(LumiScaleType::Count)) { LOGP(fatal, "Invalid lumi-type value: {}", lumiTypeVal); } tpcopt.lumiType = static_cast(lumiTypeVal); auto lumiModeVal = opts.get("corrmap-lumi-mode"); - if (lumiModeVal < -1 || lumiModeVal > 2) { + if (lumiModeVal < static_cast(LumiScaleMode::Unset) || lumiModeVal >= static_cast(LumiScaleMode::Count)) { LOGP(fatal, "Invalid corrmap-lumi-mode value: {}", lumiModeVal); } tpcopt.lumiMode = static_cast(lumiModeVal); diff --git a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h index 14d3d0a8ffb8e..847ae5ad7d788 100644 --- a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h +++ b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h @@ -124,6 +124,7 @@ #pragma link C++ class std::vector < o2::tpc::DigitAdd> + ; #pragma link C++ class o2::tpc::PressureTemperatureHelper + ; +#pragma link C++ struct o2::tpc::CMVFileHandle + ; #pragma link C++ class o2::tpc::CMVPerTF + ; #pragma link C++ class o2::tpc::CMVPerTFCompressed + ; diff --git a/Detectors/TPC/workflow/CMakeLists.txt b/Detectors/TPC/workflow/CMakeLists.txt index 37ac398db40ec..f64a223f683d8 100644 --- a/Detectors/TPC/workflow/CMakeLists.txt +++ b/Detectors/TPC/workflow/CMakeLists.txt @@ -309,4 +309,9 @@ o2_add_executable(cmv-aggregate SOURCES src/tpc-aggregate-cmv.cxx PUBLIC_LINK_LIBRARIES O2::TPCWorkflow) +o2_add_executable(cmv-trigger + COMPONENT_NAME tpc + SOURCES test/test_cmv-trigger.cxx + PUBLIC_LINK_LIBRARIES O2::TPCWorkflow) + add_subdirectory(readers) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h index add37af5706e5..2f9209ee07da8 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h @@ -23,7 +23,7 @@ namespace o2::tpc /// create a processor spec /// convert CMV raw values to a vector in a CRU -o2::framework::DataProcessorSpec getCMVToVectorSpec(const std::string inputSpec, std::vector const& crus); +o2::framework::DataProcessorSpec getCMVToVectorSpec(std::string const& inputSpec, std::vector const& crus); } // end namespace o2::tpc diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h index b46f2169f06c9..3383da527cccf 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h @@ -49,9 +49,6 @@ #include "CommonUtils/StringUtils.h" #include "DetectorsCommonDataFormats/FileMetaData.h" -using namespace o2::framework; -using o2::header::gDataOriginTPC; - namespace o2::tpc { @@ -114,7 +111,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task initIntervalTree(); } - void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) final + void finaliseCCDB(o2::framework::ConcreteDataMatcher& matcher, void* obj) final { o2::base::GRPGeomHelper::instance().finaliseCCDB(matcher, obj); } @@ -136,7 +133,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task } if (mSetDataTakingCont) { - mDataTakingContext = pc.services().get(); + mDataTakingContext = pc.services().get(); mSetDataTakingCont = false; } @@ -147,7 +144,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task const auto currTF = processing_helpers::getCurrentTF(pc); if (mTFFirst == -1) { - for (auto& ref : InputRecordWalker(pc.inputs(), mFirstTFFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFirstTFFilter)) { mTFFirst = pc.inputs().get(ref); mIntervalFirstTF = mTFFirst; mHasIntervalFirstTF = true; @@ -203,7 +200,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task // Capture orbit info first so setTimestampCCDB can use the measured stride if (!mOrbitInfoSeen[relTF]) { // all CRUs within a batch carry identical timing, so the first one is sufficient - for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) { mOrbitInfo[relTF] = pc.inputs().get(ref); const auto batchFirstOrbit = static_cast(mOrbitInfo[relTF] >> 32); // TimingInfo.firstTForbit is the orbit of the last real TF in the batch (the TF that triggered the FLP to send). @@ -222,8 +219,8 @@ class TPCAggregateCMVDevice : public o2::framework::Task setTimestampCCDB(relTF, mOrbitStep[relTF], pc); } - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { - auto const* hdr = DataRefUtils::getHeader(ref); + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { + auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = hdr->subSpecification; if (!(std::binary_search(mCRUs.begin(), mCRUs.end(), cru))) { LOGP(debug, "Received CMV data from CRU {} which is not part of this aggregate lane", cru); @@ -233,7 +230,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task continue; } - auto cmvVec = pc.inputs().get>(ref); + auto cmvVec = pc.inputs().get>(ref); mRawCMVs[relTF][cru] = std::vector(cmvVec.begin(), cmvVec.end()); mProcessedCRUs[relTF][cru] = true; ++mProcessedCRU[relTF]; @@ -257,7 +254,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task materializeBufferedTFs(true); materializeEOSBuffer(); sendOutput(ec.outputs()); - ec.services().get().readyToQuit(QuitRequest::Me); + ec.services().get().readyToQuit(o2::framework::QuitRequest::Me); } static constexpr header::DataDescription getDataDescriptionCCDBCMV() { return header::DataDescription{"TPC_CMV"}; } @@ -314,18 +311,18 @@ class TPCAggregateCMVDevice : public o2::framework::Task std::unique_ptr mIntervalTree{}; ///< in-memory TTree accumulating one entry per real TF; serialised to CCDB/disk at interval end CMVPerTF mCurrentTF{}; ///< staging object written to the TTree branch for the uncompressed path CMVPerTFCompressed mCurrentCompressedTF{}; ///< staging object written to the TTree branch when any compression flags are set - const std::vector mFilter{ + const std::vector mFilter{ {"cmvagg", - ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(mLaneId)}, - Lifetime::Sporadic}}; - const std::vector mOrbitFilter{ + o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(mLaneId)}, + o2::framework::Lifetime::Sporadic}}; + const std::vector mOrbitFilter{ {"cmvorbit", - ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(mLaneId), header::DataHeader::SubSpecificationType{static_cast(mLaneId)}}, - Lifetime::Sporadic}}; - const std::vector mFirstTFFilter{ + o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(mLaneId), header::DataHeader::SubSpecificationType{static_cast(mLaneId)}}, + o2::framework::Lifetime::Sporadic}}; + const std::vector mFirstTFFilter{ {"firstTF", - ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast(mLaneId)}}, - Lifetime::Sporadic}}; + o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast(mLaneId)}}, + o2::framework::Lifetime::Sporadic}}; uint8_t buildCompressionFlags() const { @@ -360,7 +357,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task void collectEOSInputs(o2::framework::ProcessingContext& pc) { if (mEOSFirstOrbit == 0) { - for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) { const auto orbitBC = pc.inputs().get(ref); mEOSFirstOrbit = static_cast(orbitBC >> 32); mEOSFirstBC = static_cast(orbitBC & 0xFFFFu); @@ -368,13 +365,13 @@ class TPCAggregateCMVDevice : public o2::framework::Task } } - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { - auto const* hdr = DataRefUtils::getHeader(ref); + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { + auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = hdr->subSpecification; if (!(std::binary_search(mCRUs.begin(), mCRUs.end(), cru))) { continue; } - auto cmvVec = pc.inputs().get>(ref); + auto cmvVec = pc.inputs().get>(ref); auto& buffer = mEOSRawCMVs[cru]; buffer.insert(buffer.end(), cmvVec.begin(), cmvVec.end()); } @@ -548,7 +545,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task } } - void sendOutput(DataAllocator& output) + void sendOutput(o2::framework::DataAllocator& output) { using timer = std::chrono::high_resolution_clock; @@ -619,8 +616,8 @@ class TPCAggregateCMVDevice : public o2::framework::Task } LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {}", ccdbInfoCMV.getPath(), ccdbInfoCMV.getFileName(), image->size(), ccdbInfoCMV.getStartValidityTimestamp(), ccdbInfoCMV.getEndValidityTimestamp()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBCMV(), 0}, *image); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBCMV(), 0}, ccdbInfoCMV); + output.snapshot(o2::framework::Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBCMV(), 0}, *image); + output.snapshot(o2::framework::Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBCMV(), 0}, ccdbInfoCMV); auto stop = timer::now(); std::chrono::duration elapsed = stop - start; @@ -666,25 +663,25 @@ class TPCAggregateCMVDevice : public o2::framework::Task /// Build a DataProcessorSpec for one aggregate lane /// Each lane receives CMV data from one distribute output lane (matched by lane index) and expects the full CRU list — the distribute stage already routes per-CRU data to the correct lane -inline DataProcessorSpec getTPCAggregateCMVSpec(const int lane, - const std::vector& crus, - const unsigned int timeframes, - const bool sendCCDB, - const bool usePreciseTimestamp, - const int nTFsBuffer = 1) +inline o2::framework::DataProcessorSpec getTPCAggregateCMVSpec(const int lane, + const std::vector& crus, + const unsigned int timeframes, + const bool sendCCDB, + const bool usePreciseTimestamp, + const int nTFsBuffer = 1) { - std::vector outputSpecs; + std::vector outputSpecs; if (sendCCDB) { - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, o2::framework::Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, o2::framework::Lifetime::Sporadic); } - std::vector inputSpecs; - inputSpecs.emplace_back(InputSpec{"cmvagg", ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, Lifetime::Sporadic}); - inputSpecs.emplace_back(InputSpec{"cmvorbit", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{static_cast(lane)}, Lifetime::Sporadic}); - inputSpecs.emplace_back(InputSpec{"firstTF", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast(lane)}, Lifetime::Sporadic}); + std::vector inputSpecs; + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvagg", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, o2::framework::Lifetime::Sporadic}); + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{static_cast(lane)}, o2::framework::Lifetime::Sporadic}); + inputSpecs.emplace_back(o2::framework::InputSpec{"firstTF", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast(lane)}, o2::framework::Lifetime::Sporadic}); if (usePreciseTimestamp) { - inputSpecs.emplace_back(InputSpec{"orbitreset", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{static_cast(lane)}, Lifetime::Sporadic}); + inputSpecs.emplace_back(o2::framework::InputSpec{"orbitreset", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{static_cast(lane)}, o2::framework::Lifetime::Sporadic}); } // Request GRPECS from CCDB so that GRPGeomHelper::getNHBFPerTF() is valid in this (separate) process @@ -696,21 +693,21 @@ inline DataProcessorSpec getTPCAggregateCMVSpec(const int lane, o2::base::GRPGeomRequest::None, // geometry inputSpecs); - DataProcessorSpec spec{ + o2::framework::DataProcessorSpec spec{ fmt::format("tpc-aggregate-cmv-{:02}", lane).data(), inputSpecs, outputSpecs, - AlgorithmSpec{adaptFromTask(lane, crus, timeframes, sendCCDB, usePreciseTimestamp, nTFsBuffer, ccdbRequest)}, - Options{{"output-dir", VariantType::String, "/dev/null", {"CMV output directory, must exist (if not /dev/null)"}}, - {"meta-output-dir", VariantType::String, "/dev/null", {"calibration metadata output directory, must exist (if not /dev/null)"}}, - {"nthreads-compression", VariantType::Int, 1, {"Number of threads used for CMV per timeframe preprocessing and compression"}}, - {"use-sparse", VariantType::Bool, false, {"Sparse encoding (skip zero time bins). Alone: raw uint16 values. With --use-compression-varint: varint exact values. With --use-compression-huffman: Huffman exact values"}}, - {"use-compression-varint", VariantType::Bool, false, {"Delta+zigzag+varint compression (all values). Combined with --use-sparse: sparse positions + varint encoded exact CMV values"}}, - {"use-compression-huffman", VariantType::Bool, false, {"Huffman encoding. Combined with --use-sparse: sparse positions + Huffman-encoded exact CMV values"}}, - {"cmv-zero-threshold", VariantType::Float, 0.f, {"Zero out CMV values whose float magnitude is below this threshold after optional integer rounding and before compression; 0 disables"}}, - {"cmv-round-integers-threshold", VariantType::Int, 0, {"Round values to nearest integer ADC for |v| <= N ADC before compression; 0 disables"}}, - {"cmv-dynamic-precision-mean", VariantType::Float, 1.f, {"Gaussian centre in |CMV| ADC where the strongest fractional bit trimming is applied"}}, - {"cmv-dynamic-precision-sigma", VariantType::Float, 0.f, {"Gaussian width in ADC for smooth CMV fractional bit trimming; 0 disables"}}}}; + o2::framework::AlgorithmSpec{o2::framework::adaptFromTask(lane, crus, timeframes, sendCCDB, usePreciseTimestamp, nTFsBuffer, ccdbRequest)}, + o2::framework::Options{{"output-dir", o2::framework::VariantType::String, "/dev/null", {"CMV output directory, must exist (if not /dev/null)"}}, + {"meta-output-dir", o2::framework::VariantType::String, "/dev/null", {"calibration metadata output directory, must exist (if not /dev/null)"}}, + {"nthreads-compression", o2::framework::VariantType::Int, 1, {"Number of threads used for CMV per timeframe preprocessing and compression"}}, + {"use-sparse", o2::framework::VariantType::Bool, false, {"Sparse encoding (skip zero time bins). Alone: raw uint16 values. With --use-compression-varint: varint exact values. With --use-compression-huffman: Huffman exact values"}}, + {"use-compression-varint", o2::framework::VariantType::Bool, false, {"Delta+zigzag+varint compression (all values). Combined with --use-sparse: sparse positions + varint encoded exact CMV values"}}, + {"use-compression-huffman", o2::framework::VariantType::Bool, false, {"Huffman encoding. Combined with --use-sparse: sparse positions + Huffman-encoded exact CMV values"}}, + {"cmv-zero-threshold", o2::framework::VariantType::Float, 0.f, {"Zero out CMV values whose float magnitude is below this threshold after optional integer rounding and before compression; 0 disables"}}, + {"cmv-round-integers-threshold", o2::framework::VariantType::Int, 0, {"Round values to nearest integer ADC for |v| <= N ADC before compression; 0 disables"}}, + {"cmv-dynamic-precision-mean", o2::framework::VariantType::Float, 1.f, {"Gaussian centre in |CMV| ADC where the strongest fractional bit trimming is applied"}}, + {"cmv-dynamic-precision-sigma", o2::framework::VariantType::Float, 0.f, {"Gaussian width in ADC for smooth CMV fractional bit trimming; 0 disables"}}}}; spec.rank = lane; return spec; } diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h index f3373070ab7bb..af576b2f30a5b 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h @@ -36,10 +36,6 @@ #include "DetectorsBase/GRPGeomHelper.h" #include "CommonDataFormat/Pair.h" -using namespace o2::framework; -using o2::header::gDataOriginTPC; -using namespace o2::tpc; - namespace o2::tpc { @@ -78,8 +74,8 @@ class TPCDistributeCMVSpec : public o2::framework::Task } } - mFilter.emplace_back(InputSpec{"cmvsgroup", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, Lifetime::Sporadic}); - mOrbitFilter.emplace_back(InputSpec{"cmvorbit", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, Lifetime::Sporadic}); + mFilter.emplace_back(o2::framework::InputSpec{"cmvsgroup", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, o2::framework::Lifetime::Sporadic}); + mOrbitFilter.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, o2::framework::Lifetime::Sporadic}); } void init(o2::framework::InitContext& ic) final @@ -97,13 +93,13 @@ class TPCDistributeCMVSpec : public o2::framework::Task } } - void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) final + void finaliseCCDB(o2::framework::ConcreteDataMatcher& matcher, void* obj) final { o2::base::GRPGeomHelper::instance().finaliseCCDB(matcher, obj); - if (matcher == ConcreteDataMatcher("CTP", "ORBITRESET", 0)) { + if (matcher == o2::framework::ConcreteDataMatcher("CTP", "ORBITRESET", 0)) { LOGP(debug, "Updating ORBITRESET"); std::fill(mSendCCDBOutputOrbitReset.begin(), mSendCCDBOutputOrbitReset.end(), true); - } else if (matcher == ConcreteDataMatcher("GLO", "GRPECS", 0)) { + } else if (matcher == o2::framework::ConcreteDataMatcher("GLO", "GRPECS", 0)) { // check if received object is valid if (o2::base::GRPGeomHelper::instance().getGRPECS()->getRun() != 0) { LOGP(debug, "Updating GRPECS"); @@ -175,18 +171,18 @@ class TPCDistributeCMVSpec : public o2::framework::Task if (mSendOutputStartInfo[currentBuffer]) { mSendOutputStartInfo[currentBuffer] = false; - pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[currentBuffer]); + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[currentBuffer]); } if (mSendCCDBOutputOrbitReset[currentOutLane] && mSendCCDBOutputGRPECS[currentOutLane]) { mSendCCDBOutputOrbitReset[currentOutLane] = false; mSendCCDBOutputGRPECS[currentOutLane] = false; - pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()}); + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()}); } forwardOrbitInfo(pc, currentBuffer, relTF, currentOutLane); - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = tpcCRUHeader->subSpecification >> 7; @@ -204,7 +200,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task // to keep track of processed CRUs mProcessedCRUs[currentBuffer][relTF][cru] = true; - sendOutput(pc, currentOutLane, cru, pc.inputs().get>(ref)); + sendOutput(pc, currentOutLane, cru, pc.inputs().get>(ref)); } LOGP(detail, "Number of received CRUs for current TF: {} Needed a total number of processed CRUs of: {} Current TF: {}", mProcessedCRU[currentBuffer][relTF], mCRUs.size(), tf); @@ -223,7 +219,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task } } - void endOfStream(o2::framework::EndOfStreamContext& ec) final { ec.services().get().readyToQuit(QuitRequest::Me); } + void endOfStream(o2::framework::EndOfStreamContext& ec) final { ec.services().get().readyToQuit(o2::framework::QuitRequest::Me); } /// Return data description for aggregated CMVs for a given lane static header::DataDescription getDataDescriptionCMV(const unsigned int lane) @@ -267,8 +263,8 @@ class TPCDistributeCMVSpec : public o2::framework::Task std::array mStartNTFsDataDrop{0}; ///< first relative TF index to check for missing data in each buffer long mProcessedTotalData{0}; ///< call counter used to throttle checkIntervalsForMissingData checks int mCheckEveryNData{1}; ///< check for missing data every N run() calls (0 → default = mTimeFrames/2) - std::vector mFilter{}; ///< filter for looping over CMVGROUP input data from FLPs - std::vector mOrbitFilter{}; ///< filter for CMVORBITINFO input from FLPs + std::vector mFilter{}; ///< filter for looping over CMVGROUP input data from FLPs + std::vector mOrbitFilter{}; ///< filter for CMVORBITINFO input from FLPs std::vector mDataDescrOut{}; ///< per-output-lane CMV data descriptions (CMVAGG0, CMVAGG1, …) std::vector mOrbitDescrOut{}; ///< per-output-lane orbit-info data descriptions (CMVORB0, CMVORB1, …) std::array, 2> mOrbitInfoForwarded{}; ///< tracks whether orbit/BC has been forwarded to the aggregate lane per (buffer, relTF) @@ -280,12 +276,12 @@ class TPCDistributeCMVSpec : public o2::framework::Task void sendOutput(o2::framework::ProcessingContext& pc, const unsigned int currentOutLane, const unsigned int cru, o2::pmr::vector cmvs) { - pc.outputs().adoptContainer(Output{gDataOriginTPC, mDataDescrOut[currentOutLane], header::DataHeader::SubSpecificationType{cru}}, std::move(cmvs)); + pc.outputs().adoptContainer(o2::framework::Output{o2::header::gDataOriginTPC, mDataDescrOut[currentOutLane], header::DataHeader::SubSpecificationType{cru}}, std::move(cmvs)); } void sendOrbitInfo(o2::framework::ProcessingContext& pc, const unsigned int outLane, const uint64_t orbitInfo) { - pc.outputs().snapshot(Output{gDataOriginTPC, mOrbitDescrOut[outLane], header::DataHeader::SubSpecificationType{outLane}}, orbitInfo); + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, mOrbitDescrOut[outLane], header::DataHeader::SubSpecificationType{outLane}}, orbitInfo); } void forwardOrbitInfo(o2::framework::ProcessingContext& pc, const bool currentBuffer, const unsigned int relTF, const unsigned int currentOutLane) @@ -294,7 +290,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task return; } - for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) { auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = hdr->subSpecification >> 7; if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) { @@ -313,17 +309,17 @@ class TPCDistributeCMVSpec : public o2::framework::Task if (mSendOutputStartInfo[mBuffer] && (mTFStart[mBuffer] >= 0)) { mSendOutputStartInfo[mBuffer] = false; - pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[mBuffer]); + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[mBuffer]); } if (mSendCCDBOutputOrbitReset[currentOutLane] && mSendCCDBOutputGRPECS[currentOutLane]) { mSendCCDBOutputOrbitReset[currentOutLane] = false; mSendCCDBOutputGRPECS[currentOutLane] = false; - pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()}); + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()}); } if (!mOrbitInfoForwarded[mBuffer].empty()) { - for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) { auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = hdr->subSpecification >> 7; if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) { @@ -334,13 +330,13 @@ class TPCDistributeCMVSpec : public o2::framework::Task } } - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const unsigned int cru = hdr->subSpecification >> 7; if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) { continue; } - sendOutput(pc, currentOutLane, cru, pc.inputs().get>(ref)); + sendOutput(pc, currentOutLane, cru, pc.inputs().get>(ref)); } } @@ -398,7 +394,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task for (auto& it : mProcessedCRUs[currentBuffer][iTF]) { if (!it.second) { it.second = true; - sendOutput(pc, outLane, it.first, pmr::vector()); + sendOutput(pc, outLane, it.first, o2::pmr::vector()); } } @@ -417,7 +413,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task mNFactorTFs = 0; // ToDo: Find better fix. Set oldestForChannel to a very large value so the DPL dispatcher does not block waiting for older TF data that will never arrive for (unsigned int ilane = 0; ilane < mOutLanes; ++ilane) { - auto& deviceProxy = pc.services().get(); + auto& deviceProxy = pc.services().get(); auto& state = deviceProxy.getOutputChannelState({static_cast(ilane)}); size_t oldest = std::numeric_limits::max() - 1; state.oldestForChannel = {oldest}; @@ -431,18 +427,18 @@ class TPCDistributeCMVSpec : public o2::framework::Task } }; -DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector& crus, const unsigned int timeframes, const unsigned int outlanes, const int firstTF, const bool sendPrecisetimeStamp = false, const int nTFsBuffer = 1) +o2::framework::DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector& crus, const unsigned int timeframes, const unsigned int outlanes, const int firstTF, const bool sendPrecisetimeStamp = false, const int nTFsBuffer = 1) { - std::vector inputSpecs; - inputSpecs.emplace_back(InputSpec{"cmvsgroup", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, Lifetime::Sporadic}); - inputSpecs.emplace_back(InputSpec{"cmvorbit", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, Lifetime::Sporadic}); + std::vector inputSpecs; + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvsgroup", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, o2::framework::Lifetime::Sporadic}); + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, o2::framework::Lifetime::Sporadic}); - std::vector outputSpecs; + std::vector outputSpecs; outputSpecs.reserve(3 * outlanes); for (unsigned int lane = 0; lane < outlanes; ++lane) { - outputSpecs.emplace_back(ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{lane}}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{lane}}, Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, o2::framework::Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{lane}}, o2::framework::Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{lane}}, o2::framework::Lifetime::Sporadic); } // Only lane 0 fetches CCDB orbit-reset/GRPECS objects and broadcasts them to all aggregate lanes, the other distribute lanes do not need them, avoiding redundant CCDB requests @@ -450,7 +446,7 @@ DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector(crus, timeframes, nTFsBuffer, outlanes, firstTF, ccdbRequest)}, - Options{{"drop-data-after-nTFs", VariantType::Int, 0, {"Number of TFs after which to drop the data."}}, - {"check-data-every-n", VariantType::Int, 0, {"Number of run function called after which to check for missing data (-1 for no checking, 0 for default checking)."}}, - {"nFactorTFs", VariantType::Int, 1000, {"Number of TFs to skip for sending oldest TF."}}}}; + o2::framework::AlgorithmSpec{o2::framework::adaptFromTask(crus, timeframes, nTFsBuffer, outlanes, firstTF, ccdbRequest)}, + o2::framework::Options{{"drop-data-after-nTFs", o2::framework::VariantType::Int, 0, {"Number of TFs after which to drop the data."}}, + {"check-data-every-n", o2::framework::VariantType::Int, 0, {"Number of run function called after which to check for missing data (-1 for no checking, 0 for default checking)."}}, + {"nFactorTFs", o2::framework::VariantType::Int, 1000, {"Number of TFs to skip for sending oldest TF."}}}}; spec.rank = ilane; return spec; } diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h index 9931c27c9d3fa..d86356234a1c2 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h @@ -13,8 +13,8 @@ /// @author Tuba Gündem, tuba.gundem@cern.ch /// @brief TPC device for processing CMVs on FLPs -#ifndef O2_TPCFLPIDCSPEC_H -#define O2_TPCFLPIDCSPEC_H +#ifndef O2_TPCFLPCMVSPEC_H +#define O2_TPCFLPCMVSPEC_H #include #include @@ -28,24 +28,27 @@ #include "Headers/DataHeader.h" #include "TPCWorkflow/ProcessingHelpers.h" #include "TPCBase/CRU.h" +#include "DataFormatsTPC/CMV.h" #include "TFile.h" -using namespace o2::framework; -using o2::header::gDataOriginTPC; -using namespace o2::tpc; - namespace o2::tpc { class TPCFLPCMVDevice : public o2::framework::Task { public: - TPCFLPCMVDevice(const int lane, const std::vector& crus, const int nTFsBuffer) - : mLane{lane}, mCRUs{crus}, mNTFsBuffer{nTFsBuffer} {} + TPCFLPCMVDevice(const int lane, const std::vector& crus, const bool triggerPerFlp, const int nTFsBuffer) + : mLane{lane}, mCRUs{crus}, mTriggerPerFLP{triggerPerFlp}, mNTFsBuffer{nTFsBuffer} {} void init(o2::framework::InitContext& ic) final { mDumpCMVs = ic.options().get("dump-cmvs-flp"); + mEnableTrigger = ic.options().get("trigger"); + mTriggerThresholdCMV = ic.options().get("trigger-threshold-cmv"); + mTriggerThresholdMeanMax = ic.options().get("trigger-threshold-cmvMeanMax"); + mTriggerThresholdMeanMin = ic.options().get("trigger-threshold-cmvMeanMin"); + mTriggerTimebinMin = ic.options().get("trigger-threshold-timebinMin"); + mTriggerTimebinMax = ic.options().get("trigger-threshold-timebinMax"); } void run(o2::framework::ProcessingContext& pc) final @@ -56,7 +59,7 @@ class TPCFLPCMVDevice : public o2::framework::Task // Capture heartbeatOrbit / heartbeatBC from the first TF in the buffer if (mCountTFsForBuffer == 1) { - for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) { auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); const uint32_t cru = hdr->subSpecification >> 7; if (mFirstOrbitBC.find(cru) == mFirstOrbitBC.end()) { @@ -68,11 +71,23 @@ class TPCFLPCMVDevice : public o2::framework::Task } } - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { + bool triggered = false; + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader(ref); - const int cru = tpcCRUHeader->subSpecification >> 7; + const uint32_t cru = tpcCRUHeader->subSpecification >> 7; auto vecCMVs = pc.inputs().get>(ref); mCMVs[cru].insert(mCMVs[cru].end(), vecCMVs.begin(), vecCMVs.end()); + + const bool cruTriggered = mEnableTrigger && evaluateTrigger(vecCMVs); + if (!mTriggerPerFLP) { + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVTrigger(), tpcCRUHeader->subSpecification}, cruTriggered); + } else { + triggered |= cruTriggered; + } + } + if (mTriggerPerFLP) { + const header::DataHeader::SubSpecificationType trigSubSpec{mCRUs.front() << 7}; + pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVTrigger(), trigSubSpec}, triggered); } if (mCountTFsForBuffer >= mNTFsBuffer) { @@ -86,7 +101,7 @@ class TPCFLPCMVDevice : public o2::framework::Task if (mDumpCMVs) { TFile fOut(fmt::format("CMVs_{}_tf_{}.root", mLane, processing_helpers::getCurrentTF(pc)).data(), "RECREATE"); - for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) { + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader(ref); const int cru = tpcCRUHeader->subSpecification >> 7; auto vec = pc.inputs().get>(ref); @@ -103,7 +118,7 @@ class TPCFLPCMVDevice : public o2::framework::Task sendOutput(ec.outputs(), cru); } } - ec.services().get().readyToQuit(QuitRequest::Me); + ec.services().get().readyToQuit(o2::framework::QuitRequest::Me); } static constexpr header::DataDescription getDataDescriptionCMVGroup() { return header::DataDescription{"CMVGROUP"}; } @@ -111,21 +126,65 @@ class TPCFLPCMVDevice : public o2::framework::Task /// Data description for the packed (orbit<<32|bc) scalar forwarded alongside each CRU's CMVGROUP. static constexpr header::DataDescription getDataDescriptionCMVOrbitInfo() { return header::DataDescription{"CMVORBITINFO"}; } + /// Data description for the per-CRU per-TF trigger flag (empty span = not triggered or disabled; {1} = triggered). + static constexpr header::DataDescription getDataDescriptionCMVTrigger() { return header::DataDescription{"CMVTRIGGER"}; } + private: const int mLane{}; ///< lane number of processor const std::vector mCRUs{}; ///< CRUs to process in this instance int mNTFsBuffer{1}; ///< number of TFs to buffer before sending bool mDumpCMVs{}; ///< dump CMVs to file for debugging + bool mTriggerPerFLP{false}; ///< send per-FLP trigger decision aggregated over CRUs int mCountTFsForBuffer{0}; ///< counts TFs to track when to send output std::unordered_map> mCMVs{}; ///< buffered raw 16-bit CMV values per CRU std::unordered_map mFirstOrbitBC{}; ///< first packed orbit/BC per CRU for the current buffer window + bool mEnableTrigger{false}; ///< enable CMV trigger evaluation + float mTriggerThresholdCMV{-10.f}; ///< CMV value threshold: trigger sequence starts when value drops below this + float mTriggerThresholdMeanMax{-40.f}; ///< upper bound on trigger-sequence mean CMV value + float mTriggerThresholdMeanMin{-80.f}; ///< lower bound on trigger-sequence mean CMV value + int mTriggerTimebinMin{4}; ///< minimum trigger-sequence length (timebins) to accept + int mTriggerTimebinMax{-1}; ///< maximum trigger-sequence length (timebins) to accept; -1 disables /// Filter for CMV float vectors (one CMVVECTOR message per CRU per TF) - const std::vector mFilter = {{"cmvs", ConcreteDataTypeMatcher{gDataOriginTPC, "CMVVECTOR"}, Lifetime::Timeframe}}; + const std::vector mFilter = {{"cmvs", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CMVVECTOR"}, o2::framework::Lifetime::Timeframe}}; /// Filter for CMV packet timing info (one CMVORBITS message per CRU per TF, sent by CMVToVectorSpec) - const std::vector mOrbitFilter = {{"cmvorbits", ConcreteDataTypeMatcher{gDataOriginTPC, "CMVORBITS"}, Lifetime::Timeframe}}; + const std::vector mOrbitFilter = {{"cmvorbits", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CMVORBITS"}, o2::framework::Lifetime::Timeframe}}; + + // Scan a CRU's CMV vector for contiguous below-threshold sequences. + // Returns true as soon as one sequence satisfies both the length and mean criteria. + bool evaluateTrigger(const o2::pmr::vector& cmvs) const + { + float seqSum = 0.f; + int seqLen = 0; + + auto checkSequence = [&]() -> bool { + if (seqLen == 0) { + return false; + } + const float mean = seqSum / seqLen; + return (seqLen >= mTriggerTimebinMin) && + (mTriggerTimebinMax < 0 || seqLen <= mTriggerTimebinMax) && + (mean >= mTriggerThresholdMeanMin) && + (mean <= mTriggerThresholdMeanMax); + }; + + for (const auto raw : cmvs) { + const float val = cmv::Data{raw}.getCMVFloat(); + if (val < mTriggerThresholdCMV) { + seqSum += val; + ++seqLen; + } else { + if (checkSequence()) { + return true; + } + seqLen = 0; + seqSum = 0.f; + } + } + return checkSequence(); // trailing sequence that reached end of buffer + } - void sendOutput(DataAllocator& output, const uint32_t cru) + void sendOutput(o2::framework::DataAllocator& output, const uint32_t cru) { const header::DataHeader::SubSpecificationType subSpec{cru << 7}; @@ -134,39 +193,54 @@ class TPCFLPCMVDevice : public o2::framework::Task if (auto it = mFirstOrbitBC.find(cru); it != mFirstOrbitBC.end()) { orbitBC = it->second; } - output.snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitInfo(), subSpec}, orbitBC); + output.snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitInfo(), subSpec}, orbitBC); - output.adoptContainer(Output{gDataOriginTPC, getDataDescriptionCMVGroup(), subSpec}, std::move(mCMVs[cru])); + output.adoptContainer(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVGroup(), subSpec}, std::move(mCMVs[cru])); } }; -DataProcessorSpec getTPCFLPCMVSpec(const int ilane, const std::vector& crus, const int nTFsBuffer = 1) +o2::framework::DataProcessorSpec getTPCFLPCMVSpec(const int ilane, const std::vector& crus, const bool triggerPerFlp, const int nTFsBuffer = 1) { - std::vector outputSpecs; - std::vector inputSpecs; - outputSpecs.reserve(crus.size()); - inputSpecs.reserve(crus.size()); + std::vector outputSpecs; + std::vector inputSpecs; + outputSpecs.reserve(crus.size() * 2 + 1); + inputSpecs.reserve(crus.size() * 2); for (const auto& cru : crus) { const header::DataHeader::SubSpecificationType subSpec{cru << 7}; // Inputs from CMVToVectorSpec - inputSpecs.emplace_back(InputSpec{"cmvs", gDataOriginTPC, "CMVVECTOR", subSpec, Lifetime::Timeframe}); - inputSpecs.emplace_back(InputSpec{"cmvorbits", gDataOriginTPC, "CMVORBITS", subSpec, Lifetime::Timeframe}); + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvs", o2::header::gDataOriginTPC, "CMVVECTOR", subSpec, o2::framework::Lifetime::Timeframe}); + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbits", o2::header::gDataOriginTPC, "CMVORBITS", subSpec, o2::framework::Lifetime::Timeframe}); // Outputs to TPCDistributeCMVSpec - outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup(), subSpec}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo(), subSpec}, Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup(), subSpec}, o2::framework::Lifetime::Sporadic); + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo(), subSpec}, o2::framework::Lifetime::Sporadic); + + if (!triggerPerFlp) { + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVTrigger(), subSpec}, o2::framework::Lifetime::Timeframe); + } + } + if (triggerPerFlp) { // Single per-FLP trigger output, subspec keyed on the first CRU + const header::DataHeader::SubSpecificationType trigSubSpec{crus.front() << 7}; + outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVTrigger(), trigSubSpec}, o2::framework::Lifetime::Timeframe); } const auto id = fmt::format("tpc-flp-cmv-{:02}", ilane); - return DataProcessorSpec{ + return o2::framework::DataProcessorSpec{ id.data(), inputSpecs, outputSpecs, - AlgorithmSpec{adaptFromTask(ilane, crus, nTFsBuffer)}, - Options{{"dump-cmvs-flp", VariantType::Bool, false, {"Dump CMVs to file"}}}}; + o2::framework::AlgorithmSpec{o2::framework::adaptFromTask(ilane, crus, triggerPerFlp, nTFsBuffer)}, + o2::framework::Options{ + {"dump-cmvs-flp", o2::framework::VariantType::Bool, false, {"Dump CMVs to file"}}, + {"trigger", o2::framework::VariantType::Bool, false, {"Enable CMV trigger evaluation"}}, + {"trigger-threshold-cmv", o2::framework::VariantType::Float, -10.f, {"CMV threshold: sequence starts when value drops below this (ADC units)"}}, + {"trigger-threshold-cmvMeanMax", o2::framework::VariantType::Float, -40.f, {"Upper bound on trigger-sequence mean CMV value"}}, + {"trigger-threshold-cmvMeanMin", o2::framework::VariantType::Float, -80.f, {"Lower bound on trigger-sequence mean CMV value"}}, + {"trigger-threshold-timebinMin", o2::framework::VariantType::Int, 4, {"Minimum trigger-sequence length in timebins"}}, + {"trigger-threshold-timebinMax", o2::framework::VariantType::Int, -1, {"Maximum trigger-sequence length in timebins (-1 disables upper bound)"}}}}; } } // namespace o2::tpc -#endif \ No newline at end of file +#endif diff --git a/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx b/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx index 81ce358d1a809..86cf4ca97aa19 100644 --- a/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx +++ b/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx @@ -76,7 +76,6 @@ class CMVToVectorDevice : public o2::framework::Task { const auto runNumber = processing_helpers::getRunNumber(pc); std::vector filter = {{"check", ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "RAWDATA"}, Lifetime::Timeframe}}; - const auto& mapper = Mapper::instance(); // open files if necessary if ((mWriteDebug || mWriteDebugOnError) && !mDebugStream) { @@ -95,10 +94,7 @@ class CMVToVectorDevice : public o2::framework::Task mRawOutputFile.open(rawFileName, std::ios::binary); } - uint32_t heartbeatOrbit = 0; - uint16_t heartbeatBC = 0; uint32_t tfCounter = 0; - bool first = true; bool hasErrors = false; for (auto const& ref : InputRecordWalker(pc.inputs(), filter)) { @@ -149,7 +145,7 @@ class CMVToVectorDevice : public o2::framework::Task LOGP(debug, "Processing firstTForbit {:9}, tfCounter {:5}, run {:6}, feeId {:6}, cruID {:3}, link {:2}", dh->firstTForbit, dh->tfCounter, dh->runNumber, feeId, cruID, link); if (std::find(mCRUs.begin(), mCRUs.end(), cruID) == mCRUs.end()) { - LOGP(warning, "CMV CRU {:3} not configured in CRUs, skipping", cruID); + // LOGP(debug, "CMV CRU {:3} not configured in CRUs, skipping", cruID); continue; } @@ -171,7 +167,7 @@ class CMVToVectorDevice : public o2::framework::Task cmvVec.reserve(cmvVec.size() + cmv::NTimeBinsPerPacket); for (uint32_t tb = 0; tb < cmv::NTimeBinsPerPacket; ++tb) { cmvVec.push_back(cmvs.getCMV(tb)); - // LOGP(debug, "Appended CMV {} for timebin {}, CRU {}, orbit {}, bc {}", cmvs.getCMV(tb), tb, cruID, orbit, bc); + // LOGP(debug, "For CRU {}, timebin {}, orbit {}, bc {}, appended CMV {} float: {}", cruID, tb, orbit, bc, cmvs.getCMV(tb), cmvs.getCMVFloat(tb)); } } } catch (const std::exception& e) { @@ -204,7 +200,7 @@ class CMVToVectorDevice : public o2::framework::Task } } - hasErrors |= snapshotCMVs(pc.outputs(), tfCounter); + hasErrors |= snapshotCMVs(pc.outputs()); if (mWriteDebug || (mWriteDebugOnError && hasErrors)) { writeDebugOutput(tfCounter); @@ -274,7 +270,7 @@ class CMVToVectorDevice : public o2::framework::Task std::string mRawOutputFileName; ///< name of the raw output file //____________________________________________________________________________ - bool snapshotCMVs(DataAllocator& output, uint32_t tfCounter) + bool snapshotCMVs(DataAllocator& output) { bool hasErrors = false; @@ -321,12 +317,8 @@ class CMVToVectorDevice : public o2::framework::Task //____________________________________________________________________________ void writeDebugOutput(uint32_t tfCounter) { - const auto& mapper = Mapper::instance(); - mDebugStream->GetFile()->cd(); auto& stream = (*mDebugStream) << "cmvs"; - uint32_t seen = 0; - static uint32_t firstOrbit = std::numeric_limits::max(); for (auto cru : mCRUs) { if (mCMVInfos.find(cru) == mCMVInfos.end()) { @@ -404,7 +396,7 @@ class CMVToVectorDevice : public o2::framework::Task } }; -o2::framework::DataProcessorSpec getCMVToVectorSpec(const std::string inputSpec, std::vector const& crus) +o2::framework::DataProcessorSpec getCMVToVectorSpec(std::string const& inputSpec, std::vector const& crus) { using device = o2::tpc::CMVToVectorDevice; diff --git a/Detectors/TPC/workflow/src/TPCScalerSpec.cxx b/Detectors/TPC/workflow/src/TPCScalerSpec.cxx index 8e2a78d69757b..1df192dd5ec00 100644 --- a/Detectors/TPC/workflow/src/TPCScalerSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCScalerSpec.cxx @@ -183,40 +183,45 @@ class TPCScalerSpec : public Task void buildMap(ProcessingContext& pc) { - // reference map - auto* corrMap = mTPCCorrMapsLoader.getCorrMap(); - - // // new correction map + const auto lumiMode = mTPCCorrMapsLoader.getLumiScaleMode(); o2::gpu::TPCFastTransform finalMap; - finalMap.cloneFromObject(*corrMap, nullptr); - finalMap.setApplyCorrectionOn(); - - const auto* corrMapRef = mTPCCorrMapsLoader.getCorrMapRef(); - const float lumiScale = mTPCCorrMapsLoader.getLumiScale(); std::vector> additionalCorrections; - // if standard scaling is used: map(lumi) = (mean_map - ref_map) * lumiScale + ref_map - if (mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::Linear) { - const std::vector> step0{{&(corrMapRef->getCorrection()), -1.f}}; - // finalMap = (mean_map - finalMap) - TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, step0, true); - - // finalMap = finalMap * lumiScale + ref_map - const std::vector> step1{{&(corrMapRef->getCorrection()), 1.f}}; - TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), lumiScale, step1, true); - - } else if (mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::DerivativeMap || mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::DerivativeMapMC) { - additionalCorrections.emplace_back(&(corrMapRef->getCorrection()), lumiScale); - } + if (lumiMode == LumiScaleMode::NoCorrection) { + std::unique_ptr dummy(TPCFastTransformHelperO2::instance()->create(0)); + finalMap.cloneFromObject(*dummy, nullptr); + finalMap.setApplyCorrectionOff(); + } else { + auto* corrMap = mTPCCorrMapsLoader.getCorrMap(); + const auto* corrMapRef = mTPCCorrMapsLoader.getCorrMapRef(); + finalMap.cloneFromObject(lumiMode == LumiScaleMode::StaticMapOnly && corrMapRef ? *corrMapRef : *corrMap, nullptr); + finalMap.setApplyCorrectionOn(); + + const float lumiScale = mTPCCorrMapsLoader.getLumiScale(); + + // if standard scaling is used: map(lumi) = (mean_map - ref_map) * lumiScale + ref_map + if (lumiMode == LumiScaleMode::Linear) { + const std::vector> step0{{&(corrMapRef->getCorrection()), -1.f}}; + // finalMap = (mean_map - finalMap) + TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, step0, true); + + // finalMap = finalMap * lumiScale + ref_map + const std::vector> step1{{&(corrMapRef->getCorrection()), 1.f}}; + TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), lumiScale, step1, true); + + } else if (lumiMode == LumiScaleMode::DerivativeMap || lumiMode == LumiScaleMode::DerivativeMapMC) { + additionalCorrections.emplace_back(&(corrMapRef->getCorrection()), lumiScale); + } - // if mshape map valid - if (!mTPCCorrMapsLoader.isCorrMapMShapeDummy()) { - LOGP(info, "Adding M-shape correction to the final map with scaling factor {}", mMShapeScalingFac); - additionalCorrections.emplace_back(&(mTPCCorrMapsLoader.getCorrMapMShape()->getCorrection()), 1.f); - } + // if mshape map valid + if (!mTPCCorrMapsLoader.isCorrMapMShapeDummy()) { + LOGP(info, "Adding M-shape correction to the final map with scaling factor {}", mMShapeScalingFac); + additionalCorrections.emplace_back(&(mTPCCorrMapsLoader.getCorrMapMShape()->getCorrection()), 1.f); + } - if (!additionalCorrections.empty()) { - TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, additionalCorrections, true); + if (!additionalCorrections.empty()) { + TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, additionalCorrections, true); + } } Output corrMapOutput{header::gDataOriginTPC, "TPCCORRMAP", 0}; diff --git a/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx b/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx index f41fe5b8fbd15..b7734c5d0b24f 100644 --- a/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx +++ b/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx @@ -32,6 +32,7 @@ void customize(std::vector& workflowOptions) {"time-lanes", VariantType::Int, 1, {"Number of parallel processing lanes (timeframes are split per device)"}}, {"crus", VariantType::String, cruDefault.c_str(), {"List of CRUs, comma separated ranges, e.g. 0-3,7,9-15"}}, {"n-TFs-buffer", VariantType::Int, 1, {"Buffer n-TFs before sending output"}}, + {"trigger-per-flp", VariantType::Bool, false, {"Aggregate triggers of CRUs on FLP to a single trigger"}}, {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; std::swap(workflowOptions, options); @@ -48,6 +49,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) const auto nLanes = std::min(static_cast(config.options().get("lanes")), nCRUs); const auto time_lanes = static_cast(config.options().get("time-lanes")); const auto crusPerLane = nCRUs / nLanes + ((nCRUs % nLanes) != 0); + const bool triggerPerFLP = config.options().get("trigger-per-flp"); const int nTFsBuffer = config.options().get("n-TFs-buffer"); o2::conf::ConfigurableParam::updateFromFile(config.options().get("configFile")); @@ -65,8 +67,8 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) } const auto last = std::min(tpcCRUs.end(), first + crusPerLane); const std::vector rangeCRUs(first, last); - workflow.emplace_back(timePipeline(getTPCFLPCMVSpec(ilane, rangeCRUs, nTFsBuffer), time_lanes)); + workflow.emplace_back(timePipeline(getTPCFLPCMVSpec(ilane, rangeCRUs, triggerPerFLP, nTFsBuffer), time_lanes)); } return workflow; -} \ No newline at end of file +} diff --git a/Detectors/TPC/workflow/test/test_cmv-trigger.cxx b/Detectors/TPC/workflow/test/test_cmv-trigger.cxx new file mode 100644 index 0000000000000..c102a5ae531f4 --- /dev/null +++ b/Detectors/TPC/workflow/test/test_cmv-trigger.cxx @@ -0,0 +1,85 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @file test_cmv-trigger.cxx +/// @author Tuba Gündem, tuba.gundem@cern.ch +/// @brief Test workflow: reads CMVTRIGGER packets from tpc-flp-cmv and logs results + +#include +#include +#include "Framework/WorkflowSpec.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Task.h" +#include "Framework/ControlService.h" +#include "Framework/Logger.h" +#include "Framework/DataProcessorSpec.h" +#include "Framework/InputRecordWalker.h" +#include "Framework/DataRefUtils.h" +#include "Headers/DataHeader.h" +#include "TPCWorkflow/ProcessingHelpers.h" +#include "TPCWorkflow/TPCFLPCMVSpec.h" + +using namespace o2::framework; + +void customize(std::vector&) {} + +#include "Framework/runDataProcessing.h" + +namespace o2::tpc +{ + +class CMVTriggerDevice : public o2::framework::Task +{ + public: + void run(o2::framework::ProcessingContext& pc) final + { + const auto tf = processing_helpers::getCurrentTF(pc); + + for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) { + auto const* hdr = o2::framework::DataRefUtils::getHeader(ref); + const uint32_t firstCRU = hdr->subSpecification >> 7; + const bool triggered = pc.inputs().get(ref); + if (triggered) { + LOGP(info, "TF {:6} first CRU {:3}: {}", tf, firstCRU, "triggered"); + } + } + } + + void endOfStream(o2::framework::EndOfStreamContext& ec) final + { + ec.services().get().readyToQuit(o2::framework::QuitRequest::Me); + } + + private: + const std::vector mFilter = { + {"cmvtrigger", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, o2::tpc::TPCFLPCMVDevice::getDataDescriptionCMVTrigger()}, o2::framework::Lifetime::Timeframe}}; +}; + +o2::framework::DataProcessorSpec getCMVTriggerSpec() +{ + std::vector inputSpecs; + inputSpecs.emplace_back(o2::framework::InputSpec{"cmvtrigger", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, o2::tpc::TPCFLPCMVDevice::getDataDescriptionCMVTrigger()}, o2::framework::Lifetime::Timeframe}); + + return o2::framework::DataProcessorSpec{ + "tpc-cmv-trigger", + inputSpecs, + {}, + o2::framework::AlgorithmSpec{o2::framework::adaptFromTask()}}; +} + +} // namespace o2::tpc + +WorkflowSpec defineDataProcessing(ConfigContext const& config) +{ + WorkflowSpec workflow; + workflow.emplace_back(o2::tpc::getCMVTriggerSpec()); + return workflow; +} diff --git a/Detectors/TRD/calibration/src/DCSProcessor.cxx b/Detectors/TRD/calibration/src/DCSProcessor.cxx index f110ba844791e..6f719b71e10c3 100644 --- a/Detectors/TRD/calibration/src/DCSProcessor.cxx +++ b/Detectors/TRD/calibration/src/DCSProcessor.cxx @@ -382,7 +382,7 @@ bool DCSProcessor::updateGasDPsCCDB() } std::map md; md["responsible"] = "Ole Schmidt"; - o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSGas, mCcdbGasDPsInfo, "TRD/Calib/DCSDPsGas", md, mGasStartTS, mGasStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY); + o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSGas, mCcdbGasDPsInfo, "TRD/Calib/DCSDPsGas", md, mGasStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } @@ -410,7 +410,7 @@ bool DCSProcessor::updateCurrentsDPsCCDB() } std::map md; md["responsible"] = "Ole Schmidt"; - o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSCurrents, mCcdbCurrentsDPsInfo, "TRD/Calib/DCSDPsI", md, mCurrentsStartTS, mCurrentsStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY); + o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSCurrents, mCcdbCurrentsDPsInfo, "TRD/Calib/DCSDPsI", md, mCurrentsStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } @@ -437,7 +437,7 @@ bool DCSProcessor::updateVoltagesDPsCCDB() } std::map md; md["responsible"] = "Ole Schmidt"; - o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSVoltages, mCcdbVoltagesDPsInfo, "TRD/Calib/DCSDPsU", md, mVoltagesStartTS, mVoltagesStartTS + 7 * o2::ccdb::CcdbObjectInfo::DAY); + o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSVoltages, mCcdbVoltagesDPsInfo, "TRD/Calib/DCSDPsU", md, mVoltagesStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } @@ -465,7 +465,7 @@ bool DCSProcessor::updateEnvDPsCCDB() } std::map md; md["responsible"] = "Leonardo Barreto"; - o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSEnv, mCcdbEnvDPsInfo, "TRD/Calib/DCSDPsEnv", md, mEnvStartTS, mEnvStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY); + o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSEnv, mCcdbEnvDPsInfo, "TRD/Calib/DCSDPsEnv", md, mEnvStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } @@ -498,7 +498,7 @@ bool DCSProcessor::updateFedChamberStatusDPsCCDB() // LB: set start timestamp 30000 miliseconds before DPs are received o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSFedChamberStatus, mCcdbFedChamberStatusDPsInfo, "TRD/Calib/DCSDPsFedChamberStatus", md, mFedChamberStatusStartTS - 30000, - mFedChamberStatusStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY); + mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } @@ -531,7 +531,7 @@ bool DCSProcessor::updateFedCFGtagDPsCCDB() // LB: set start timestamp 30000 seconds before DPs are received o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSFedCFGtag, mCcdbFedCFGtagDPsInfo, "TRD/Calib/DCSDPsFedCFGtag", md, mFedCFGtagStartTS - 30000, - mFedCFGtagStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY); + mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY); return retVal; } diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index db57e7ec06d4b..be88973561e0a 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -354,6 +354,11 @@ GPUdi() uint8_t warp_broadcast_FUNC(uint8_t v, int32_t i) #define warp_scan_inclusive_add(v) warp_scan_inclusive_add_FUNC(v) #define warp_broadcast(v, i) warp_broadcast_FUNC(v, i) +[[nodiscard]] GPUdi() int32_t work_group_count(bool pred) +{ + return work_group_reduce_add((int32_t)pred); +} + #elif (defined(__CUDACC__) || defined(__HIPCC__)) // CUDA and HIP work the same way using cub, need just different header @@ -416,6 +421,16 @@ GPUdi() T warp_broadcast_FUNC(T v, int32_t i) #endif } +[[nodiscard]] GPUdi() bool work_group_any(bool pred) +{ + return __syncthreads_or(pred); +} + +[[nodiscard]] GPUdi() uint32_t work_group_count(bool pred) +{ + return __syncthreads_count(pred); +} + #else // Trivial implementation for the CPU @@ -449,6 +464,16 @@ GPUdi() T warp_broadcast(T v, int32_t i) return v; } +[[nodiscard]] GPUdi() bool work_group_any(bool pred) +{ + return pred; +} + +[[nodiscard]] GPUdi() uint32_t work_group_count(bool pred) +{ + return pred; +} + #endif #ifdef GPUCA_ALGORITHM_STD diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 3d8e749e84147..918b2d459a2d6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -128,9 +128,11 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* tmpClusters[k] = clustersNative->clusters[i][j][k]; if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) { GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param); - GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param); GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param); - GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param); + if (!tmpClusters[k].isSaturated()) [[likely]] { + GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param); + GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param); + } } } std::sort(tmpClusters.begin(), tmpClusters.end()); diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h index 4efaa7f33257c..8450c3ee59210 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h @@ -44,7 +44,7 @@ class GPUTPCClusterStatistics bool mDecodingError = false; static constexpr uint32_t P_MAX_QMAX = GPUTPCCompression::P_MAX_QMAX; - static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_QTOT; + static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_SATURATED_QTOT; static constexpr uint32_t P_MAX_TIME = GPUTPCCompression::P_MAX_TIME; static constexpr uint32_t P_MAX_PAD = GPUTPCCompression::P_MAX_PAD; static constexpr uint32_t P_MAX_SIGMA = GPUTPCCompression::P_MAX_SIGMA; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index 82e44eda6f3cc..5efe3936067b7 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -47,14 +47,15 @@ class GPUTPCCompression : public GPUProcessor #endif static constexpr uint32_t P_MAX_QMAX = 1 << 10; - static constexpr uint32_t P_MAX_QTOT = 5 * 5 * P_MAX_QMAX; + static constexpr uint32_t P_MAX_REGULAR_QTOT = 5 * 5 * P_MAX_QMAX; + static constexpr uint32_t P_MAX_SATURATED_QTOT = 1 << 16; // Need two different limits as saturated clusters use full u16 range for qTot static constexpr uint32_t P_MAX_TIME = 1 << 24; static constexpr uint32_t P_MAX_PAD = 1 << 16; static constexpr uint32_t P_MAX_SIGMA = 1 << 8; static constexpr uint32_t P_MAX_FLAGS = 1 << 8; static constexpr uint32_t P_MAX_QPT = 1 << 8; - GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QTOT); } + GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_REGULAR_QTOT); } GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); } GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index b98f5c28f57b0..bd42c2a2472d4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -121,9 +121,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread +#include + +namespace o2::gpu +{ +struct GPUTPCExtraADC { + std::array, tpc::constants::MAXSECTOR> digitsBySector; +}; +} // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 43a5f4f79abdc..eb7d67a913ceb 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -112,6 +112,9 @@ AddOptionRTC(trackletMinSharedNormFactor, float, 0.f, "", 0, "Max shared defined AddOptionRTC(maxTimeBinAboveThresholdIn1000Bin, uint16_t, 500, "", 0, "Except pad from cluster finding if total number of charges in a fragment is above this baseline (disable = 0)") AddOptionRTC(maxConsecTimeBinAboveThreshold, uint16_t, 200, "", 0, "Except pad from cluster finding if number of consecutive charges in a fragment is above this baseline (disable = 0)") AddOptionRTC(noisyPadSaturationThreshold, uint16_t, 700, "", 0, "Threshold where a timebin is considered saturated, disabling the noisy pad check for that pad") +AddOptionRTC(hipTailFilter, uint8_t, 0, "", 0, "Enable Highly Ionising Particle tail filter in CheckPadBaseline (0 = disable, 1 = filter tails)") +AddOptionRTC(hipTailFilterThreshold, uint16_t, 100, "", 0, "Threshold that must be exceeded for a timebin to be counted towards Highly Ionising Particle tail") +AddOptionRTC(hipTailFilterAlpha, float, 0.5f, "", 0, "Smoothing factor for the exponential Highly Ionising Particle tail filter") AddOptionRTC(occupancyMapTimeBins, uint16_t, 16, "", 0, "Number of timebins per histogram bin of occupancy map (0 = disable occupancy map)") AddOptionRTC(occupancyMapTimeBinsAverage, uint16_t, 0, "", 0, "Number of timebins +/- to use for the averaging") AddOptionRTC(trackFitCovLimit, uint16_t, 1000, "", 0, "Abort fit when y/z cov exceed the limit") diff --git a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv index ef215ba5ca870..823a70b24565b 100644 --- a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv +++ b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv @@ -60,6 +60,8 @@ GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,,256 GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,,608 GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,,608 GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,,"[576, 2]" +GPUTPCCFHIPTailConnector,256,,256,256,,,,,,256, +GPUTPCCFHIPClusterizer,256,,256,256,,,,,,256, GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,,448 GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,,448 GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,,448 diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 9913762ae34df..78a43856f00f1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -71,6 +71,7 @@ struct CfFragment; class GPUTPCClusterFinder; struct GPUSettingsProcessing; struct GPUSettingsRec; +struct GPUTPCExtraADC; class GPUChainTracking : public GPUChain { @@ -298,13 +299,15 @@ class GPUChainTracking : public GPUChain int32_t RunChainFinalize(); void OutputSanityCheck(); int32_t RunTPCTrackingSectors_internal(); - int32_t RunTPCClusterizer_prepare(bool restorePointers); + int32_t RunTPCClusterizer_prepare(bool restorePointers, const GPUTPCExtraADC& extraADCs); #ifndef GPUCA_RUN2 - std::pair RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane); + std::pair RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane, const GPUTPCExtraADC& extraADCs); void RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int32_t stage, bool doGPU, int32_t lane); std::pair TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment); std::pair TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment); void TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment); + void TPCClusterizerTransferExtraADC(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs); + void TPCClusterizerCheckExtraADCZeros(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs); #endif void RunTPCTrackingMerger_MergeBorderTracks(uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType); void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index a2a07be7832ca..750cbee7051bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -17,6 +17,7 @@ #include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" +#include "GPUTPCExtraADC.h" #include "GPUMemorySizeScalers.h" #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" @@ -56,10 +57,13 @@ #include "utils/VcShim.h" #include "utils/strtag.h" -#include +#include "utils/vecpod.h" #include +#include #include +// #define INSERT_SATURATED_SIGNALS + using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -155,11 +159,171 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfF } } +void GPUChainTracking::TPCClusterizerTransferExtraADC(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs) +{ + const int32_t iSector = clusterer.mISector; + const auto& fragment = clusterer.mPmemory->fragment; + const auto& digits = extraADCs.digitsBySector[iSector]; + + if (fragment.index != 0) { + return; + } + + if (digits.empty()) { + return; + } + + const size_t chargeMapSize = TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen); + const size_t chargeMapSizeBytes = chargeMapSize * sizeof(PackedCharge); + + vecpod chargeMapHostData; + chargeMapHostData.resize(chargeMapSize); + + CfArray2D chargeMapHost(reinterpret_cast(chargeMapHostData.data())); + + vecpod extraPositions; + extraPositions.reserve(digits.size()); + + GPUMemCpy(RecoStep::TPCClusterFinding, chargeMapHostData.data(), clustererShadow.mPchargeMap, chargeMapSizeBytes, lane, false); + SynchronizeStream(lane); + + for (const auto& d : digits) { + if (!fragment.contains(d.getTimeStamp())) { + continue; + } + + CfChargePos pos{(tpccf::Row)d.getRow(), (tpccf::Pad)d.getPad(), (tpccf::TPCFragmentTime)(d.getTimeStamp() - fragment.start)}; + chargeMapHost[pos] = PackedCharge(d.getChargeFloat()); + + extraPositions.push_back(pos); + } + + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPchargeMap, chargeMapHostData.data(), chargeMapSizeBytes, lane, true); + + const size_t nPositions = clusterer.mPmemory->counters.nPositions; + const size_t extraPositionsOffset = nPositions - extraPositions.size(); + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPpositions + extraPositionsOffset, extraPositions.data(), extraPositions.size() * sizeof(CfChargePos), lane, true); +} + +void GPUChainTracking::TPCClusterizerCheckExtraADCZeros(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs) +{ + const int32_t iSector = clusterer.mISector; + const auto& fragment = clusterer.mPmemory->fragment; + const auto& digits = extraADCs.digitsBySector[iSector]; + + if (fragment.index != 0) { + return; + } + + if (digits.empty()) { + return; + } + + const size_t chargeMapSize = TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen); + const size_t chargeMapSizeBytes = chargeMapSize * sizeof(PackedCharge); + + vecpod chargeMapHostData; + chargeMapHostData.resize(chargeMapSize); + + CfArray2D chargeMapHost(reinterpret_cast(chargeMapHostData.data())); + + GPUMemCpy(RecoStep::TPCClusterFinding, chargeMapHostData.data(), clustererShadow.mPchargeMap, chargeMapSizeBytes, lane, false); + SynchronizeStream(lane); + + size_t nNonZeroADCs = 0; + + for (const auto& d : digits) { + if (!fragment.contains(d.getTimeStamp())) { + continue; + } + + CfChargePos pos{(tpccf::Row)d.getRow(), (tpccf::Pad)d.getPad(), (tpccf::TPCFragmentTime)(d.getTimeStamp() - fragment.start)}; + + auto adc = chargeMapHost[pos].unpack(); + + if (adc != 0) { + nNonZeroADCs++; + } + } + + if (nNonZeroADCs > 0) { + GPUInfo("Non Zero ADCs: %zu", nNonZeroADCs); + } else { + GPUInfo("Cleared all extra ADC values!", nNonZeroADCs); + } +} + namespace { struct TPCCFDecodeScanTmp { int32_t zsPtrFirst, zsPageFirst, zsPtrLast, zsPageLast, hasData, pageCounter; }; + +// Additional ADC values must be generated at start of clusterizer +// This is required, so enough memory is allocated for the charge points +// And ADCs can be injected by "simply" +// -> copying chargeMap + chargePositions to host +// -> writing additional adcs to chargeMap + positions +// -> copying values to device +GPUTPCExtraADC GenerateSaturatedSignals(size_t seed = 42) +{ + constexpr int32_t MinTailLength = 50; + constexpr int32_t MaxTailLength = 200; + constexpr int32_t TailWidth = 3; // Assume tails are 3 pads wide at the moment + + constexpr GPUTPCGeometry geo; + + GPUTPCExtraADC adcs; + + const int32_t nHIPs = 50; + const int32_t firstTB = 0; // Place all HIPs in first fragment for now + const int32_t lastTB = 4000 - MaxTailLength; // Don't allow cut off tails at fragment borders + const int32_t tailADC = 250; // charge should decrease over time, but for now just hardcode ADC above the threshold + + std::mt19937 gen{(uint32_t)seed}; + std::uniform_int_distribution<> randomRow(0, GPUTPCGeometry::NROWS - 1); + std::uniform_int_distribution<> randomTB(firstTB, lastTB); + std::uniform_int_distribution<> randomTailLength(MinTailLength, MaxTailLength); + // std::normal_distribution<> tailLengthNoise(8, 2.0); + + for (int32_t iHIP = 0; iHIP < nHIPs; iHIP++) { + + const int32_t row = randomRow(gen); + const int32_t nPads = geo.NPads(row); + std::uniform_int_distribution<> randomPad(0, nPads - 1); + + const int32_t basePad = randomPad(gen); + const int32_t baseTb = randomTB(gen); + + auto& digits = adcs.digitsBySector[0]; + + const int32_t tailLength = randomTailLength(gen); + + for (int32_t dPad = -TailWidth; dPad <= TailWidth; dPad++) { + const int32_t iPad = basePad + dPad; + if (iPad < 0 || iPad >= nPads) { + continue; + } + + for (int32_t dTime = 0; dTime < tailLength; dTime++) { + const int32_t iTime = baseTb + dTime; + + if (iTime >= 4000) { + break; + } + + const auto adc = dTime == 0 && dPad == 0 ? 1023 : tailADC; + + digits.emplace_back(0, adc, row, iPad, iTime); + } + } + } + + GPUInfo("Generated %zu ADCs!", adcs.digitsBySector[0].size()); + + return adcs; +} + } // namespace std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment) @@ -437,13 +601,16 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust } } -std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane) +std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane, const GPUTPCExtraADC& extraADCs) { bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding; if (mCFContext->abandonTimeframe) { return {0, 0}; } - const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment); + auto retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment); + if (fragment.index == 0) { + retVal.first += extraADCs.digitsBySector[iSector].size(); + } if (doGPU) { GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; @@ -473,7 +640,7 @@ std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int return retVal; } -int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) +int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers, const GPUTPCExtraADC& extraADCs) { bool doGPU = mRec->GetRecoStepsGPU() & gpudatatypes::RecoStep::TPCClusterFinding; if (restorePointers) { @@ -569,7 +736,7 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) mCFContext->fragmentFirst = CfFragment{std::max(mCFContext->tpcMaxTimeBin + 1, maxFragmentLen), maxFragmentLen}; for (int32_t iSector = 0; iSector < GetProcessingSettings().nTPCClustererLanes && iSector < NSECTORS; iSector++) { if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { - mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector); + mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector, extraADCs); } } @@ -595,7 +762,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) mRec->PushNonPersistentMemory(qStr2Tag("TPCCLUST")); const auto& threadContext = GetThreadContext(); const bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding; - if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer)) { + + GPUTPCExtraADC extraADCs; +#ifdef INSERT_SATURATED_SIGNALS + extraADCs = GenerateSaturatedSignals(); +#endif + + if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer, extraADCs)) { return 1; } if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { @@ -625,7 +798,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) SetupGPUProcessor(&processors()->tpcClusterer[iSector], true); // Now we allocate } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { - RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) + RunTPCClusterizer_prepare(true, extraADCs); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) } if (doGPU && mIOPtrs.tpcZS) { @@ -769,6 +942,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) const bool propagateMCLabels = buildNativeHost && GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC; const bool sortClusters = buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4); + if (GetProcessingSettings().runMC && (!processors()->ioPtrs.tpcPackedDigits || !processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC)) { + GPUWarning("Requested to process MC labels, but no labels present"); + } + auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr; mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor; @@ -938,7 +1115,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) f = mCFContext->fragmentFirst; } if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { - mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane); + mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane, extraADCs); } } GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; @@ -949,9 +1126,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (!mIOPtrs.tpcZS) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { - clusterer.DumpChargeMap(*mDebugFile, "Charges"); - } + + TPCClusterizerTransferExtraADC(clusterer, clustererShadow, lane, extraADCs); if (propagateMCLabels) { runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}); @@ -960,14 +1136,33 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; + // TODO Move hipTailFilter flag to ProcessingSettings? + // TODO Add some warning when re enabling pad filter with this flag, so it's not just silently enabled when disabling was requested + checkForNoisyPads |= rec()->GetParam().rec.tpc.hipTailFilter; + + if (rec()->GetParam().rec.tpc.hipTailFilter && !doGPU) { + GPUError("HIP tail filter enabled, but this is currently not supported on CPU"); + } if (checkForNoisyPads) { + if (rec()->GetParam().rec.tpc.hipTailFilter) { + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPhipTailsByRow, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPhipTailsByRow) * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow); + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPnHIPTails, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPnHIPTails)); + } const int32_t nBlocks = GPUTPCCFCheckPadBaseline::GetNBlocks(doGPU); runKernel({GetGridBlk(nBlocks, lane), {iSector}}); getKernelTimer(RecoStep::TPCClusterFinding, iSector, TPC_REAL_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false); } + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile); + // Avoid additional sync when also dumping digits + const bool debugSyncChargeMap = !(GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCClustererDigits); + // DumpChargeMap should run after noisy pad filter to avoid yet another dump of intermediate data. When chargemap without pad filter is required, disable pad filter instead. + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, debugSyncChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Charges"); + + TPCClusterizerCheckExtraADCZeros(clusterer, clustererShadow, lane, extraADCs); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Peaks"); @@ -1002,6 +1197,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (clusterer.mPmemory->counters.nPositions == 0) { + return; + } + if (doGPU) { SynchronizeStream(lane); } @@ -1015,156 +1214,183 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPclusterInRow)); } - if (clusterer.mPmemory->counters.nClusters == 0) { - return; - } - - if (GetProcessingSettings().nn.applyNNclusterizer) { + const auto nRegularClusters = clusterer.mPmemory->counters.nClusters; + if (nRegularClusters != 0) { + if (GetProcessingSettings().nn.applyNNclusterizer) { #ifdef GPUCA_HAS_ONNX - GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane]; - GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; - GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane]; + GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; - // int withMC = (doGPU && propagateMCLabels); + // int withMC = (doGPU && propagateMCLabels); - if (nn_settings.nnClusterizerApplyCfDeconvolution) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); - } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false); - } - - // float time_clusterizer = 0, time_fill = 0, time_networks = 0; - if (nn_settings.nnClusterizerVerbosity > 2) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Starting loop over batched data. clustererNNShadow.mNnClusterizerBatchedMode=" << clustererNNShadow.mNnClusterizerBatchedMode << ", numLoops=" << std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode) << ", numClusters=" << clusterer.mPmemory->counters.nClusters << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; - } - for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) { - if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Start. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + if (nn_settings.nnClusterizerApplyCfDeconvolution) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); + } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false); } - uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode; - size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); - // Filling the data - if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) { - // Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations - runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeThreads , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); - } else { - // Fills the whole input matrix at once -> better performance on CPU, but worse parallelizability - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); - } - if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done filling data. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + // float time_clusterizer = 0, time_fill = 0, time_networks = 0; + if (nn_settings.nnClusterizerVerbosity > 2) { + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Starting loop over batched data. clustererNNShadow.mNnClusterizerBatchedMode=" << clustererNNShadow.mNnClusterizerBatchedMode << ", numLoops=" << std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode) << ", numClusters=" << clusterer.mPmemory->counters.nClusters << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } - - if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Publishing the deconvolution flags + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) { if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done setting deconvolution flags. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Start. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } - } - - // NN evaluations - if(clustererNNShadow.mNnClusterizerUseClassification) { - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); } - if (clustererNNShadow.mNnInferenceInputDType == 0) { - if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16); - } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32); - } - } else if (clustererNNShadow.mNnInferenceInputDType == 1) { - if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16); - } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32); - } + uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + + // Filling the data + if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) { + // Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations + runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeThreads , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); + } else { + // Fills the whole input matrix at once -> better performance on CPU, but worse parallelizability + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); } - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time... if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done filling data. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } - } - if (!clustererNNShadow.mNnClusterizerUseCfRegression) { - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); } - if (clustererNNShadow.mNnInferenceInputDType == 0) { - if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16); - } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32); + + if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Publishing the deconvolution flags + if (nn_settings.nnClusterizerVerbosity > 3) { + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done setting deconvolution flags. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + } + } + + // NN evaluations + if(clustererNNShadow.mNnClusterizerUseClassification) { + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); } + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32); + } + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32); + } } - } else if (clustererNNShadow.mNnInferenceInputDType == 1) { - if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16); - } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32); + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time... + if (nn_settings.nnClusterizerVerbosity > 3) { + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } } - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); } - if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); } + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); } if (clustererNNShadow.mNnInferenceInputDType == 0) { if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16); + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16); } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32); + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32); } } else if (clustererNNShadow.mNnInferenceInputDType == 1) { if (clustererNNShadow.mNnInferenceOutputDType == 0) { - (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16); + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16); } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { - (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32); + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32); + } + } + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); } + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); } + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32); + } + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32); + } } + if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); } + } + if (nn_settings.nnClusterizerVerbosity > 3) { + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + } + } + + // Publishing kernels for class labels and regression results + // In case classification should not be used, this kernel should still be executed to fill the mOutputDataClass array with default values + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels + } else { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels + } + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 2 regression results } - if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); } } if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done publishing. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } } - // Publishing kernels for class labels and regression results - // In case classification should not be used, this kernel should still be executed to fill the mOutputDataClass array with default values - if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels - } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels - } - if (!clustererNNShadow.mNnClusterizerUseCfRegression) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results - if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 2 regression results + if (clustererNNShadow.mNnClusterizerUseCfRegression) { + if(!nn_settings.nnClusterizerApplyCfDeconvolution) { // If it is already applied don't do it twice, otherwise apply now + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); + } + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMapSplit, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + if (nn_settings.nnClusterizerVerbosity > 3) { + LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with CF regression. (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; } } - if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done publishing. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; +#else + GPUFatal("Project not compiled with neural network clusterization. Aborting."); +#endif + } else { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMapSplit, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); + } // if (GetProcessingSettings().nn.applyNNclusterizer) + + if (doGPU && propagateMCLabels) { + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); + if (doGPU) { + SynchronizeStream(lane); } + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels } + } // if (nRegularClusters != 0) { - if (clustererNNShadow.mNnClusterizerUseCfRegression) { - if(!nn_settings.nnClusterizerApplyCfDeconvolution) { // If it is already applied don't do it twice, otherwise apply now - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); - } - DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 - if (nn_settings.nnClusterizerVerbosity > 3) { - LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with CF regression. (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")"; - } + + // TODO: Move this right after CheckPadBaseline once tail zeroing is moved into this kernel. + if (rec()->GetParam().rec.tpc.hipTailFilter) { + runKernel({GetGridBlk(GPUTPCGeometry::NROWS, lane), {iSector}}); + runKernel({GetGridBlk(GPUTPCGeometry::NROWS, lane), {iSector}}); + if (doGPU && (nRegularClusters == 0 || GetProcessingSettings().debugLevel >= 3)) { + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + SynchronizeStream(lane); } -#else - GPUFatal("Project not compiled with neural network clusterization. Aborting."); -#endif - } else { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true); - DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); } - if (doGPU && propagateMCLabels) { - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); - if (doGPU) { - SynchronizeStream(lane); + bool hasClusters = nRegularClusters != 0; + + // Paranoid edge case: If no regular clusters were found, need to still check that no HIP clusters were created + // HIPClusterizer kernel doesn't update counters.nClusters, because: + // - 64bit atomic support in OpenCL is flaky + // - nClusters is only used internally by clusterizer to track #peaks that will probably become clusters, + // so storing the number of HIP clusters there is only asking for trouble anyway + if (rec()->GetParam().rec.tpc.hipTailFilter && nRegularClusters == 0) { + for (uint32_t row = 0; row < GPUTPCGeometry::NROWS; row++) { + hasClusters |= clusterer.mPclusterInRow[row] != 0; } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels + } + + if (!hasClusters) { + return; } if (GetProcessingSettings().debugLevel >= 3) { @@ -1173,8 +1399,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); laneHasData[lane] = true; - // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } @@ -1192,6 +1416,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (laneHasData[lane]) { anyLaneHasData = true; + // Include clusters in default debug mask, exclude other debug output by default. + // The cluster buffers are accumulated per sector, so dump them once after all fragments. + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off if (buildNativeGPU && GetProcessingSettings().tpccfGatherKernel) { runKernel({GetGridBlk(GPUTPCGeometry::NROWS, mRec->NStreams() - 1), {iSector}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index dda15d403407e..6ca50f24351c4 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -16,6 +16,7 @@ #include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" +#include "GPUTPCExtraADC.h" #include "GPUTrackingInputProvider.h" #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" @@ -63,7 +64,7 @@ int32_t GPUChainTracking::RunTPCCompression() if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) { SynchronizeEventAndRelease(mEvents->single); auto* foreignChain = (GPUChainTracking*)GetNextChainInQueue(); - foreignChain->RunTPCClusterizer_prepare(false); + foreignChain->RunTPCClusterizer_prepare(false, {}); foreignChain->mCFContext->ptrClusterNativeSave = processorsShadow()->ioPtrs.clustersNative; } #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index a0be9d833d5a9..39e48f9f14d9a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -45,7 +45,8 @@ enum GPUChainTrackingDebugFlags : uint32_t { TPCClustererPeaks = 1 << 19, TPCClustererSuppressedPeaks = 1 << 20, TPCClustererChargeMap = 1 << 21, - TPCClustererZeroedCharges = 1 << 22 + TPCClustererChargeMapSplit = 1 << 22, + TPCClustererZeroedCharges = 1 << 23 }; template diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 204d9d6a8b81a..105df23453624 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -16,14 +16,187 @@ #include "CfArray2D.h" #include "PackedCharge.h" #include "clusterFinderDefs.h" +#include "DataFormatsTPC/ClusterNative.h" #ifndef GPUCA_GPUCODE #include "utils/VcShim.h" #endif +#if 0 +#define DPRINT(...) printf(__VA_ARGS__) +#define DPRINTB(...) \ + if (iThread == 0) \ + printf(__VA_ARGS__) +#define DPRINTB_IF(test, ...) \ + if (iThread == 0 && (test)) \ + printf(__VA_ARGS__) +#else +#define DPRINT(...) ((void)0) +#define DPRINTB(...) ((void)0) +#define DPRINTB_IF(test, ...) ((void)0) +#endif + using namespace o2::gpu; using namespace o2::gpu::tpccf; +using Kernel = GPUTPCCFCheckPadBaseline; + +static GPUdi() HIPTailDescriptor* GetHIPTails(GPUTPCClusterFinder& clusterer, int32_t row) +{ + // HIP TAILS: indexing starts at 1, so 0 index indicates no connection + return clusterer.mPhipTailsByRow + row * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow; +} + +static GPUdi() Charge UpdateHIPTailFilter(Charge filteredCharge, Charge charge, Charge alpha) +{ + return filteredCharge + alpha * (charge - filteredCharge); +} + +static GPUdi() float HIPTailTimeMean(const HIPTailDescriptor& tail) +{ + const float length = tail.tailEnd > tail.tailStart ? float(tail.tailEnd - tail.tailStart) : 1.f; + return tail.tailStart + 0.5f * (length - 1.f); +} + +static GPUdi() float HIPTailTimeVariance(const HIPTailDescriptor& tail) +{ + const float length = tail.tailEnd > tail.tailStart ? float(tail.tailEnd - tail.tailStart) : 1.f; + return (length * length - 1.f) * (1.f / 12.f); +} + +// Collect tails marked for closing across the workgroup using a prefix scan, +// then cooperatively zero the charge map entries for each closed tail. +// Caller must set acc.activeHIPTail.end before calling if the tail is open. +static GPUdi() uint16_t CloseHIPTails( + Kernel::GPUSharedMemory& smem, + GPUTPCClusterFinder& clusterer, + int32_t iThread, int32_t nThreads, + int16_t iPadHandle, + CfChargePos basePos, + CfArray2D& chargeMap, + Kernel::PadChargeAccu& acc, + bool shouldCloseTail) +{ + const uint32_t row = basePos.row(); + const uint16_t nClosedTails = work_group_count(shouldCloseTail); + + auto* nHIPTails = clusterer.mPnHIPTails; + auto* hipTails = GetHIPTails(clusterer, row); + + if (nClosedTails > 0) { + int16_t iClosedTail = work_group_scan_inclusive_add((int16_t)shouldCloseTail) - 1; + const bool shouldStoreTail = shouldCloseTail && acc.activeHIPTail.Length() > 0; + uint16_t nStoredTails = work_group_count(shouldStoreTail); + int16_t iStoredTail = work_group_scan_inclusive_add((int16_t)shouldStoreTail) - 1; + + // Use exactly one atomic add per closing call to reduce differences in + // tail ordering between runs. + if (nStoredTails > 0) { + if (iThread == 0) { + smem.tailStoreBase = CAMath::AtomicAdd(&nHIPTails[row], (uint32_t)nStoredTails); + } + GPUbarrier(); + } + if (shouldCloseTail) { + smem.tailsClosedPad[iClosedTail] = iPadHandle; + smem.tailsClosed[iClosedTail] = acc.activeHIPTail; + smem.tailsClosedStoreIdx[iClosedTail] = GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow; + + if (shouldStoreTail) { + const uint32_t idx = smem.tailStoreBase + iStoredTail + 1; + smem.tailsClosedStoreIdx[iClosedTail] = idx; + if (idx < GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow) { + hipTails[idx] = {0, 0, (uint16_t)iPadHandle, + (uint16_t)acc.activeHIPTail.start, (uint16_t)acc.activeHIPTail.end, + 0.f, 0.f}; + } + } + + acc.tailFilterCharge = 0; + acc.activeHIPTail.Reset(); + } + + GPUbarrier(); + } + + // TODO: performance improvement -> parallelize this loop across tails + for (uint16_t iTail = 0; iTail < nClosedTails; iTail++) { + const auto tailPad = smem.tailsClosedPad[iTail]; + const auto tail = smem.tailsClosed[iTail]; + const uint32_t tailStoreIdx = smem.tailsClosedStoreIdx[iTail]; + + Charge qTot = 0.f; + Charge qMax = 0.f; + for (uint16_t iTime = iThread; iTime < tail.Length(); iTime += nThreads) { + const int16_t time = tail.start + iTime; + auto pos = basePos.delta({tailPad, time}); + const Charge q = chargeMap[pos].unpack(); + qTot += q; + qMax = CAMath::Max(qMax, q); + chargeMap[pos] = PackedCharge{0}; + } + + smem.tailQTotScratch[iThread] = qTot; + smem.tailQMaxScratch[iThread] = qMax; + GPUbarrier(); + for (uint16_t active = nThreads; active > 1;) { + const uint16_t stride = (active + 1) / 2; + if (iThread < active - stride) { + smem.tailQTotScratch[iThread] += smem.tailQTotScratch[iThread + stride]; + smem.tailQMaxScratch[iThread] = CAMath::Max(smem.tailQMaxScratch[iThread], smem.tailQMaxScratch[iThread + stride]); + } + active = stride; + GPUbarrier(); + } + + if (iThread == 0 && tailStoreIdx < GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow) { + HIPTailDescriptor& tailDescriptor = hipTails[tailStoreIdx]; + tailDescriptor.qTot = smem.tailQTotScratch[0]; + tailDescriptor.qMax = smem.tailQMaxScratch[0]; + } + } + + return nClosedTails; +} + +template +static GPUdi() void ScanCachedCharges(Kernel::GPUSharedMemory& smem, uint16_t timeOffset, uint16_t pad, Charge hipTailThreshold, Charge hipTailFilterAlpha, Kernel::PadChargeAccu& acc) +{ + for (int32_t i = 0; i < Kernel::NumOfCachedTBs; i++) { + const Charge qs = smem.charges[i][pad]; + const int16_t curTB = timeOffset + i; + + acc.totalCharges += qs > 0; + acc.consecCharges = qs > 0 ? acc.consecCharges + 1 : 0; + acc.maxConsecCharges = CAMath::Max(acc.consecCharges, acc.maxConsecCharges); + acc.maxCharge = CAMath::Max(qs, acc.maxCharge); + + if (qs >= hipTailThreshold) { + if (acc.aboveThresholdStart < 0) { + acc.aboveThresholdStart = curTB; + } + } else { + acc.aboveThresholdStart = -1; + } + + if constexpr (CheckHIPTrigger) { + if (acc.HIPtb < 0 && qs >= Charge(Kernel::MaxADC)) { + acc.HIPtb = acc.aboveThresholdStart; // start of rising edge, not first sat TB + smem.tails[pad] = {acc.HIPtb, 0}; // Broadcast HIP start TB to neighboring pads / threads + } + } + + if constexpr (CheckHIPTailEnd) { + if (acc.activeHIPTail.IsOpen()) { + acc.tailFilterCharge = UpdateHIPTailFilter(acc.tailFilterCharge, qs, hipTailFilterAlpha); + if (acc.tailFilterCharge < hipTailThreshold) { + acc.activeHIPTail.end = curTB; + } + } + } + } +} + template <> GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { @@ -49,6 +222,9 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t } const CfFragment& fragment = clusterer.mPmemory->fragment; + const bool hipFilterOn = clusterer.Param().rec.tpc.hipTailFilter; + const Charge hipTailThreshold = clusterer.Param().rec.tpc.hipTailFilterThreshold; + const Charge hipTailFilterAlpha = clusterer.Param().rec.tpc.hipTailFilterAlpha; CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); constexpr GPUTPCGeometry geo; @@ -57,45 +233,139 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t const auto nPads = geo.NPads(iRow); const CfChargePos basePos{(Row)iRow, 0, 0}; - int32_t totalCharges = 0; - int32_t consecCharges = 0; - int32_t maxConsecCharges = 0; - Charge maxCharge = 0; + PadChargeAccu acc; const int16_t iPadOffset = iThread % MaxNPadsPerRow; const int16_t iTimeOffset = iThread / MaxNPadsPerRow; const int16_t iPadHandle = iThread; const bool handlePad = iPadHandle < nPads; - const auto firstTB = fragment.firstNonOverlapTimeBin(); - const auto lastTB = fragment.lastNonOverlapTimeBin(); + if (iPadHandle < MaxNPadsPerRow) { + smem.tails[iPadHandle] = {-1, -1}; + } + GPUbarrier(); + + // Pad filter scans the entire fragments including overlap. + // Minimal runtime overhead and prevents headaches later on as + // saturated signal in overlap region can create tails in the next fragment + // even when cleared in current fragment as they're decoded twice + const TPCFragmentTime firstTB = 0; + const TPCFragmentTime lastTB = fragment.length; - for (auto t = firstTB; t < lastTB; t += NumOfCachedTBs) { + for (uint16_t t = firstTB; t < lastTB; t += NumOfCachedTBs) { - const TPCFragmentTime iTime = t + iTimeOffset; + bool thisThreadHasTrigger = false; + for (uint16_t tt = 0; tt < NumOfCachedTBs; tt += TimebinsPerCacheline) { + const TPCFragmentTime iTimeLoad = t + tt + iTimeOffset; - const CfChargePos pos = basePos.delta({iPadOffset, iTime}); + const CfChargePos pos = basePos.delta({iPadOffset, iTimeLoad}); - smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0; + const Charge ql = iTimeLoad < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0; + smem.charges[tt + iTimeOffset][iPadOffset] = ql; - GPUbarrier(); + thisThreadHasTrigger |= ql >= Charge(MaxADC); + } + + bool hasHIPTrigger = false; + if (hipFilterOn) { + hasHIPTrigger = work_group_any(thisThreadHasTrigger); + } else { + // Need a barrier here even if HIP filter is disabled + GPUbarrier(); + } + + acc.HIPtb = -1; if (handlePad) { - for (int32_t i = 0; i < NumOfCachedTBs; i++) { - const Charge q = smem.charges[i][iPadHandle]; - totalCharges += (q > 0); - consecCharges = (q > 0) ? consecCharges + 1 : 0; - maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges); - maxCharge = CAMath::Max(q, maxCharge); + + // TODO: is this really necessary? + // Why is the old version so much slower, when we just add short branches to the loop??? + if (!hasHIPTrigger) [[likely]] { + if (!acc.activeHIPTail.IsOpen()) { + ScanCachedCharges(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc); + } else { + ScanCachedCharges(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc); + } + } else { + if (!acc.activeHIPTail.IsOpen()) { + ScanCachedCharges(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc); + } else { + ScanCachedCharges(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc); + } } } GPUbarrier(); - } + + if (hasHIPTrigger) [[unlikely]] { + + DPRINTB("%d: Trigger!\n", iBlock); + + if (handlePad && acc.HIPtb < 0) { + + // Search neighboring pads for trigger + for (int16_t i = -SSClusterPadWidth; i < 0; i++) { + const auto p = iPadHandle + i; + if (p > -1) { + acc.HIPtb = CAMath::Max(smem.tails[p].start, acc.HIPtb); + } + } + + for (int16_t i = 1; i <= SSClusterPadWidth; i++) { + const auto p = iPadHandle + i; + if (p < MaxNPadsPerRow) { + acc.HIPtb = CAMath::Max(smem.tails[p].start, acc.HIPtb); + } + } + } + + bool shouldCloseTail = acc.HIPtb > -1 && acc.activeHIPTail.HasValue(); + if (shouldCloseTail && acc.activeHIPTail.IsOpen()) { + DPRINT("%d: end = %d\n", iThread, acc.HIPtb); + acc.activeHIPTail.end = acc.HIPtb; + } + + CloseHIPTails(smem, clusterer, iThread, nThreads, iPadHandle, basePos, chargeMap, acc, shouldCloseTail); + + GPUbarrier(); + + if (acc.HIPtb > -1) { + DPRINT("%d: start = %d\n", iThread, acc.HIPtb); + acc.activeHIPTail.SetOpen(acc.HIPtb); + acc.tailFilterCharge = Charge(MaxADC); + } + + // Clear smem between iterations to prevent stale entries + if (handlePad) { + smem.tails[iPadHandle].Reset(); + } + + GPUbarrier(); + + } // if (hipTriggerFound) + + } // for (uint16_t t = firstTB; t < lastTB; t += NumOfCachedTBs) if (handlePad) { - updatePadBaseline(basePos.gpad + iPadHandle, clusterer, totalCharges, maxConsecCharges, maxCharge); + updatePadBaseline(basePos.gpad + iPadHandle, clusterer, acc.totalCharges, acc.maxConsecCharges, acc.maxCharge); + } + + // --- Close remaining tails + const bool shouldCloseTail = acc.activeHIPTail.HasValue(); + + // Call `work_group_any` here, instead of always counting. + // This is important as `work_group_count` is a lot slower + // and has a lot of overhead if no HIPs were found. + if (work_group_any(shouldCloseTail)) { + if (shouldCloseTail && acc.activeHIPTail.IsOpen()) { + acc.activeHIPTail.end = lastTB; + } + + [[maybe_unused]] const uint16_t nClosedTails = CloseHIPTails(smem, clusterer, iThread, nThreads, iPadHandle, basePos, chargeMap, acc, shouldCloseTail); + + DPRINTB_IF(nClosedTails > 0, "%d: Close remaining tails (%d)\n", iBlock, nClosedTails); } + #endif } @@ -172,3 +442,138 @@ GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTP clusterer.mPpadIsNoisy[pad] = true; } } + +// ======== HIP Tail Connector Kernel ======== + +template <> +GPUd() void GPUTPCCFHIPTailConnector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) +{ + if (iBlock >= (int32_t)GPUTPCGeometry::NROWS) { + return; + } + const uint32_t row = iBlock; + + const uint32_t nTails = CAMath::Min(clusterer.mPnHIPTails[row], (uint32_t)MaxHIPTailsPerRow - 1); + + // HIP TAILS: indexing starts at 1, so 0 index indicates no connection + HIPTailDescriptor* tails = GetHIPTails(clusterer, row); + +#ifdef GPUCA_DETERMINISTIC_MODE + // Races in tail comparisons and atomic swap can lead to slightly different clusters. + // So need a sequential fallback for deterministic mode + if (iThread > 0) { + return; + } + nThreads = 1; + GPUCommonAlgorithm::sortInBlock(tails + 1, tails + nTails + 1, [](auto&& t1, auto&& t2) { + if (t1.pad != t2.pad) { + return t1.pad < t2.pad; + } + return t1.tailStart < t2.tailStart; + }); +#endif + + for (uint32_t iTail = iThread + 1; iTail <= nTails; iTail += nThreads) { + auto* tail = &tails[iTail]; + + // TODO: this is needed because tailStarts may vary due to rising edge + // Better approach would be to also track the triggered timebin and match that instead + uint16_t overlapWindowStart = tail->tailStart >= 5 ? tail->tailStart - 5 : 0; + uint16_t overlapWindowEnd = tail->tailStart + 5; + + for (uint32_t jTail = iTail + 1; jTail <= nTails; jTail++) { + auto* tailNext = &tails[jTail]; + if (tailNext->iPrev > 0) { + continue; + } + + const bool overlapPad = tailNext->pad >= tail->pad - GPUTPCCFCheckPadBaseline::SSClusterPadWidth && tailNext->pad <= tail->pad + GPUTPCCFCheckPadBaseline::SSClusterPadWidth; + const bool overlapTime = tailNext->tailStart >= overlapWindowStart && tailNext->tailStart < overlapWindowEnd; + + if (overlapPad && overlapTime) { + if (CAMath::AtomicCAS(&tailNext->iPrev, 0u, iTail)) { + tail->iNext = jTail; + break; + } + } + } + } +} + +// ======== HIP Clusterizer Kernel ======== + +template <> +GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) +{ + if (iBlock >= (int32_t)GPUTPCGeometry::NROWS) { + return; + } + + const uint32_t row = iBlock; + uint32_t nTails = clusterer.mPnHIPTails[row]; + nTails = CAMath::Min(nTails, (uint32_t)MaxHIPTailsPerRow - 1); + + HIPTailDescriptor* tails = GetHIPTails(clusterer, row); + const auto& fragment = clusterer.mPmemory->fragment; + + for (uint32_t iTail = iThread + 1; iTail <= nTails; iTail += nThreads) { + + auto* tail = &tails[iTail]; + + if (tail->iPrev != 0) { + continue; + } + + float qTot = tail->qTot; + float qMax = tail->qMax; + const float firstWeight = tail->qTot; + const float firstPad = tail->pad; + const float firstTime = HIPTailTimeMean(*tail); + float padSum = firstWeight * firstPad; + float padSqSum = firstWeight * firstPad * firstPad; + float timeSum = firstWeight * firstTime; + + uint32_t tailStart = tail->tailStart; + uint32_t tailEnd = tail->tailEnd; + + while (tail->iNext != 0) { + + tail = &tails[tail->iNext]; + + const float tailWeight = tail->qTot; + const float tailPad = tail->pad; + const float tailTime = HIPTailTimeMean(*tail); + qMax = CAMath::Max(qMax, tail->qMax); + qTot += tail->qTot; + padSum += tailWeight * tailPad; + padSqSum += tailWeight * tailPad * tailPad; + timeSum += tailWeight * tailTime; + tailStart = CAMath::Min(tailStart, tail->tailStart); + tailEnd = CAMath::Max(tailEnd, tail->tailEnd); + } + + const float weightSum = CAMath::Max(qTot, 1.f); + float padMean = padSum / weightSum; + float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails. + float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean)); + + tpc::ClusterNative cn; + cn.qMax = qMax; + cn.setSaturatedQtot(qTot); + cn.setSaturatedTailLength(tailEnd - tailStart); + float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer; + cn.setTimeFlags(clusterTime, 0); + cn.setPad(padMean); + cn.setSigmaPad(padSigma); + + if (cn.qMax >= 1023) { + // Cut off clusters where the tail connection failed for some reason + // TODO: Deduplicate with GPUTPCCFClusterizer::sortIntoBuckets (can't call cross-kernel). + // TODO: Add error reporting for row cluster overflow. + uint32_t index = CAMath::AtomicAdd(&clusterer.mPclusterInRow[row], 1u); + if (index < clusterer.mNMaxClusterPerRow) { + clusterer.mPclusterByRow[clusterer.mNMaxClusterPerRow * row + index] = cn; + } + } + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index 7638b95ee7f0b..f78f91a548ac9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -15,6 +15,11 @@ /// Kernel identifies noisy TPC pads by analyzing charge patterns over time. /// A pad is marked noisy if it exceeds thresholds for total or consecutive /// time bins with charge, unless the charge exceeds a saturation threshold. +/// +/// Optionally detects Highly Ionising Particle (HIP) tails: when a saturated +/// ADC value (1023) is found, the tail region on the triggering pad and its +/// neighbors is zeroed in the charge map until an exponential charge filter +/// drops below a configurable threshold. #ifndef O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H #define O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H @@ -29,6 +34,16 @@ namespace o2::gpu { +struct HIPTailDescriptor { + uint32_t iPrev; + uint32_t iNext; + uint16_t pad; + uint16_t tailStart; + uint16_t tailEnd; + float qTot; + float qMax; +}; + class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate { @@ -39,15 +54,65 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline, NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline, NumCLsPerWarp = GPUCA_WARP_SIZE / EntriesPerCacheline, - NumOfCachedTBs = TimebinsPerCacheline, + NumOfCachedTBs = TimebinsPerCacheline * 8, // Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow]. // Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs // for all threads, avoiding out-of-bounds access. MaxNPadsPerRow = CAMath::nextMultipleOf(GPUTPCGeometry::MaxNPadsPerRow()), + + MaxADC = 1023, + + NThreads = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFCheckPadBaseline), + SSClusterPadWidth = 5, }; - struct GPUSharedMemory { + union HipTailRange { + struct { + int16_t start; + int16_t end; + }; + + // Be careful with using default initialized values. + // Need default constructor, so can be placed in shared memory. + // Might be zero initialized, but invalid tail needs start = end = -1 instead. + GPUdDefault() HipTailRange() = default; + GPUdi() HipTailRange(int16_t st, int16_t e) : start(st), end(e) {} + + GPUdi() bool HasValue() const { return start > -1; } + GPUdi() bool IsOpen() const { return start > -1 && end < 0; } + + GPUdi() void SetOpen(int16_t st) + { + start = st; + end = -1; + } + + GPUdi() int16_t Length() const { return end - start; } + + GPUdi() void Reset() { start = end = -1; } + }; + + struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { tpccf::Charge charges[NumOfCachedTBs][MaxNPadsPerRow]; + HipTailRange tails[MaxNPadsPerRow]; + uint8_t tailsClosedPad[MaxNPadsPerRow]; + HipTailRange tailsClosed[MaxNPadsPerRow]; + uint32_t tailsClosedStoreIdx[MaxNPadsPerRow]; + tpccf::Charge tailQTotScratch[NThreads]; + tpccf::Charge tailQMaxScratch[NThreads]; + uint32_t tailStoreBase; + }; + + // Accumulated values from scanning cached charges in a pad + struct PadChargeAccu { + int32_t totalCharges = 0; + int32_t consecCharges = 0; + int32_t maxConsecCharges = 0; + tpccf::Charge maxCharge = 0; + int16_t HIPtb = -1; + int16_t aboveThresholdStart = -1; // first TB of current above-hipTailThreshold streak; used to extend the tail back over the rising edge before saturation + HipTailRange activeHIPTail{-1, -1}; + tpccf::Charge tailFilterCharge = 0; }; typedef GPUTPCClusterFinder processorType; @@ -79,6 +144,58 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge); }; +class GPUTPCCFHIPTailConnector : public GPUKernelTemplate +{ + public: + enum { + MaxHIPTails = 1 << 15, + MaxHIPTailsPerRow = MaxHIPTails, + }; + + struct GPUSharedMemory { + }; + + typedef GPUTPCClusterFinder processorType; + GPUhdi() static processorType* Processor(GPUConstantMem& processors) + { + return processors.tpcClusterer; + } + + GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep() + { + return gpudatatypes::RecoStep::TPCClusterFinding; + } + + template + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer); +}; + +class GPUTPCCFHIPClusterizer : public GPUKernelTemplate +{ + public: + enum { + MaxHIPTails = GPUTPCCFHIPTailConnector::MaxHIPTails, + MaxHIPTailsPerRow = GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow, + }; + + struct GPUSharedMemory { + }; + + typedef GPUTPCClusterFinder processorType; + GPUhdi() static processorType* Processor(GPUConstantMem& processors) + { + return processors.tpcClusterer; + } + + GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep() + { + return gpudatatypes::RecoStep::TPCClusterFinding; + } + + template + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer); +}; + } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 7fef277138632..3d1ebbd54490e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -477,7 +477,10 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa CfChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); positions[positionOffset] = pos; - charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); + // Only apply gain correction if ADC not fully saturated + if (charge < 1023.f) { + charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); + } chargeMap[pos] = PackedCharge(charge); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h index 3ad463f469cd6..74b76f6bf7598 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h @@ -132,7 +132,7 @@ class GPUTPCCFDecodeZSLink : public GPUTPCCFDecodeZSLinkBase { public: // constants for decoding - static inline constexpr int32_t DECODE_BITS = o2::tpc::TPCZSHDRV2::TPC_ZS_NBITS_V34; + static inline constexpr int32_t DECODE_BITS = tpc::TPCZSHDRV2::TPC_ZS_NBITS_V34; static inline constexpr float DECODE_BITS_FACTOR = 1.f / (1 << (DECODE_BITS - 10)); static inline constexpr uint32_t DECODE_MASK = (1 << DECODE_BITS) - 1; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx index d79cdc2333b76..3248185a8be00 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx @@ -46,13 +46,20 @@ template <> GPUd() void GPUTPCCFMCLabelFlattener::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory&, processorType& clusterer) { #if !defined(GPUCA_GPUCODE) - Row row = get_global_id(0); + const Row row = get_global_id(0); + const size_t clusterInRow = clusterer.mPclusterInRow[row]; + + // Label Flattener assumes 1 label container per cluster, + // but HIP clusters don't support MC labels yet and containers are missing for those clusters. + // So append empty label container for each HIP cluster. + // Note: This assumes that HIP cluster are store behind regular clusters! + auto& labels = clusterer.mPlabelsByRow[row].data; + labels.resize(std::max(labels.size(), clusterInRow)); - uint32_t clusterInRow = clusterer.mPclusterInRow[row]; uint32_t labelCount = 0; - for (uint32_t i = 0; i < clusterInRow; i++) { - auto& interim = clusterer.mPlabelsByRow[row].data[i]; + for (size_t i = 0; i < clusterInRow; i++) { + auto& interim = labels[i]; labelCount += interim.labels.size(); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index e34163d3803fe..67be936ab4627 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -25,6 +25,7 @@ #include "CfChargePos.h" #include "CfArray2D.h" +#include "GPUTPCCFCheckPadBaseline.h" using namespace o2::gpu; using namespace o2::tpc; @@ -95,6 +96,10 @@ void* GPUTPCClusterFinder::SetPointersScratch(void* mem) if ((mRec->GetRecoStepsGPU() & gpudatatypes::RecoStep::TPCClusterFinding)) { computePointerWithAlignment(mem, mPscanBuf, mBufSize * mNBufs); } + // TODO: Use memory scalers for MaxHIPTails. + // NOTE: Always allocate since Param() is not available during size computation. + computePointerWithAlignment(mem, mPhipTailsByRow, GPUTPCGeometry::NROWS * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow); + computePointerWithAlignment(mem, mPnHIPTails, GPUTPCGeometry::NROWS); return mem; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 4d036c2056cc5..bc49d225133fa 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -46,6 +46,7 @@ namespace o2::gpu { struct GPUTPCClusterMCInterimArray; struct TPCPadGainCalib; +struct HIPTailDescriptor; struct CfChargePos; @@ -113,6 +114,8 @@ class GPUTPCClusterFinder : public GPUProcessor tpc::ClusterNative* mPclusterByRow = nullptr; GPUTPCClusterMCInterimArray* mPlabelsByRow = nullptr; int32_t* mPscanBuf = nullptr; + HIPTailDescriptor* mPhipTailsByRow = nullptr; + uint32_t* mPnHIPTails = nullptr; // one counter per row Memory* mPmemory = nullptr; GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPscanBuf + iBuf * mBufSize; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index 1e5030956df01..2b21af6a08bed 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -15,7 +15,6 @@ #include "GPUTPCClusterFinder.h" #include "GPUReconstruction.h" #include "CfArray2D.h" -#include "DataFormatsTPC/Digit.h" #include "DataFormatsTPC/ClusterNative.h" #include "GPUSettings.h" @@ -155,7 +154,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out) void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { - out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - Clusters - Sector " << mISector << " - All Fragments\n"; for (uint32_t i = 0; i < GPUTPCGeometry::NROWS; i++) { size_t N = mPclusterInRow[i]; @@ -167,7 +166,13 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out) out << "Row: " << i << ": " << N << "\n"; for (const auto& cl : sortedCluster) { - out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n"; + uint32_t qTot = cl.qTot; + uint32_t sigmaTime = cl.sigmaTimePacked; + if (cl.isSaturated()) { + qTot = cl.getSaturatedQtot(); + sigmaTime = cl.getSaturatedTailLength(); + } + out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << sigmaTime << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << qTot << "\n"; } } } diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 2176ea2dc3804..3041c2b869de2 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -103,6 +103,8 @@ o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTP o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFHIPTailConnector" "GPUTPCCFCheckPadBaseline TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFHIPClusterizer" "GPUTPCCFCheckPadBaseline TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) diff --git a/GPU/GPUTracking/utils/VcShim.h b/GPU/GPUTracking/utils/VcShim.h index 21a9a6a5c95c2..2bbc1d471bbbb 100644 --- a/GPU/GPUTracking/utils/VcShim.h +++ b/GPU/GPUTracking/utils/VcShim.h @@ -19,7 +19,7 @@ #ifndef GPUCA_NO_VC -#include +#include // IWYU pragma: export #else diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx index 7f7deddafe1c8..4bfedc117dec7 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx @@ -38,6 +38,10 @@ void CorrectionMapsHelper::setCorrMapMShape(std::unique_ptr&& void CorrectionMapsHelper::updateLumiScale(bool report) { if (!canUseCorrections()) { + if (mLumiScaleMode != LumiScaleMode::NoCorrection) { + LOGP(warning, "Negative meanLumi={} detected, switching to NoCorrection mode for backward compatibility", mMeanLumi); + mLumiScaleMode = LumiScaleMode::NoCorrection; + } mLumiScale = -1.f; } else if ((mLumiScaleMode == LumiScaleMode::DerivativeMap) || (mLumiScaleMode == LumiScaleMode::DerivativeMapMC)) { mLumiScale = mMeanLumiRef ? (mInstLumi - mMeanLumi) / mMeanLumiRef : 0.f; @@ -54,7 +58,40 @@ void CorrectionMapsHelper::updateLumiScale(bool report) //________________________________________________________ void CorrectionMapsHelper::reportScaling() { - LOGP(info, "Map scaling update: LumiScaleType={} instLumi(CTP)={} instLumi(scaling)={} meanLumiRef={}, meanLumi={} -> LumiScale={} lumiScaleMode={}, M-Shape map valid: {}, M-Shape default: {}", - mLumiScaleType == LumiScaleType::NoScaling ? "NoScaling" : (mLumiScaleType == LumiScaleType::CTPLumi ? "LumiCTP" : "TPCScaler"), getInstLumiCTP(), getInstLumi(), getMeanLumiRef(), getMeanLumi(), getLumiScale(), - mLumiScaleMode == LumiScaleMode::Linear ? "Linear" : "Derivative", (mCorrMapMShape != nullptr), isCorrMapMShapeDummy()); + auto lumiTypeName = [](LumiScaleType t) { + switch (t) { + case LumiScaleType::NoScaling: + return "NoScaling"; + case LumiScaleType::CTPLumi: + return "CTPLumi"; + case LumiScaleType::TPCScaler: + return "TPCScaler"; + default: + return "Unknown"; + } + }; + + const bool mshapeValid = (mCorrMapMShape != nullptr) && !isCorrMapMShapeDummy(); + + if (mLumiScaleMode == LumiScaleMode::NoCorrection) { + LOGP(info, "Map scaling update: mode=NoCorrection (corrections disabled, dummy map in use)"); + } else if (mLumiScaleMode == LumiScaleMode::StaticMapOnly) { + LOGP(info, "Map scaling update: mode=StaticMapOnly (static reference map, no lumi scaling), M-Shape correction: {}", mshapeValid ? "applied" : "not applied"); + } else { + auto lumiModeName = [](LumiScaleMode m) { + switch (m) { + case LumiScaleMode::Linear: + return "Linear"; + case LumiScaleMode::DerivativeMap: + return "DerivativeMap"; + case LumiScaleMode::DerivativeMapMC: + return "DerivativeMapMC"; + default: + return "Unknown"; + } + }; + LOGP(info, "Map scaling update: LumiScaleType={} instLumi(CTP)={} instLumi(scaling)={} meanLumiRef={} meanLumi={} -> LumiScale={} lumiScaleMode={}, M-Shape correction: {}", + lumiTypeName(mLumiScaleType), getInstLumiCTP(), getInstLumi(), getMeanLumiRef(), getMeanLumi(), getLumiScale(), + lumiModeName(mLumiScaleMode), mshapeValid ? "applied" : "not applied"); + } } diff --git a/GPU/TPCFastTransformation/CorrectionMapsTypes.h b/GPU/TPCFastTransformation/CorrectionMapsTypes.h index e239b668ab751..092a2927ebe3e 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsTypes.h +++ b/GPU/TPCFastTransformation/CorrectionMapsTypes.h @@ -22,14 +22,18 @@ enum class LumiScaleType : int { Unset = -1, ///< init value NoScaling = 0, ///< no scaling, use map as is CTPLumi = 1, ///< use CTP luminosity for scaling - TPCScaler = 2 ///< use TPC scaler for scaling + TPCScaler = 2, ///< use TPC scaler for scaling + Count ///< sentinel - keep last }; enum class LumiScaleMode : int { - Unset = -1, ///< init value - Linear = 0, ///< map(lumi) = (mean_map - referenceMap) * lumiScale + referenceMap - DerivativeMap = 1, ///< map(lumi) = mean_map + lumiScale * (derivativeMap) where derivativeMap = (mean_map_A - mean_map_B) - DerivativeMapMC = 2 ///< same DerivativeMap, but for MC + Unset = -1, ///< init value + Linear = 0, ///< map(lumi) = (mean_map - referenceMap) * lumiScale + referenceMap + DerivativeMap = 1, ///< map(lumi) = mean_map + lumiScale * (derivativeMap) where derivativeMap = (mean_map_A - mean_map_B) + DerivativeMapMC = 2, ///< same DerivativeMap, but for MC + NoCorrection = 3, ///< no corrections at all + StaticMapOnly = 4, ///< use only static map instead of main map + Count ///< sentinel - keep last }; struct CorrectionMapsGloOpts { diff --git a/log.txt b/log.txt deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/prodtests/full-system-test/aggregator-workflow.sh b/prodtests/full-system-test/aggregator-workflow.sh index 0dc30df93669d..e8469465d0572 100755 --- a/prodtests/full-system-test/aggregator-workflow.sh +++ b/prodtests/full-system-test/aggregator-workflow.sh @@ -155,25 +155,35 @@ if workflow_has_parameter CALIB_PROXIES; then if [[ -n ${CALIBDATASPEC_BARREL_SPORADIC:-} ]]; then add_W o2-dpl-raw-proxy "--dataspec \"$CALIBDATASPEC_BARREL_SPORADIC\" $(get_proxy_connection barrel_sp input sporadic)" "" 0 fi - elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]]; then + elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == TPC_CMV ]]; then if [[ $EPNSYNCMODE != 1 ]]; then - echo "ERROR: TPC IDC / SAC calib workflow enabled without EPNSYNCMODE, please note that there will not be input data for it" 1>&2 + echo "ERROR: TPC IDC / SAC / CMV calib workflow enabled without EPNSYNCMODE, please note that there will not be input data for it" 1>&2 fi CHANNELS_LIST= [[ $EPNSYNCMODE == 0 ]] && FLP_ADDRESS="tcp://localhost:29950" if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]] || [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]] || [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then - # define port for FLP - : ${TPC_IDC_FLP_PORT:=29950} + # define port for FLP and channel prefix + TPC_FLP_PORT= + TPC_FLP_CHAN_PREFIX= + if [[ $AGGREGATOR_TASKS == TPC_CMV ]] && [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then + TPC_FLP_PORT=29952 + TPC_FLP_CHAN_PREFIX=tpccmv + elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]] && [[ -n ${CALIBDATASPEC_TPCIDC_A:-} || -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then + TPC_FLP_PORT=29950 + TPC_FLP_CHAN_PREFIX=tpcidc + fi # expand FLPs; TPC uses from 001 to 145, but 145 is reserved for SAC - if [[ "${GEN_TOPO_DEPLOYMENT_TYPE:-}" == "ALICE_STAGING" ]]; then - FLP_ADDRESS="tcp://alio2-cr1-mvs03-ib:${TPC_IDC_FLP_PORT}" - CHANNELS_LIST+="type=pull,name=tpcidc_flp,transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;" - else - for flp in $(seq -f "%03g" 1 144); do - [[ ! $FLP_IDS =~ (^|,)"$flp"(,|$) ]] && continue - [[ $EPNSYNCMODE == 1 ]] && FLP_ADDRESS="tcp://alio2-cr1-flp${flp}-ib:${TPC_IDC_FLP_PORT}" - CHANNELS_LIST+="type=pull,name=tpcidc_flp${flp},transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;" - done + if [[ -n $TPC_FLP_PORT ]]; then + if [[ "${GEN_TOPO_DEPLOYMENT_TYPE:-}" == "ALICE_STAGING" ]]; then + FLP_ADDRESS="tcp://alio2-cr1-mvs03-ib:${TPC_FLP_PORT}" + CHANNELS_LIST+="type=pull,name=${TPC_FLP_CHAN_PREFIX}_flp,transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;" + else + for flp in $(seq -f "%03g" 1 144); do + [[ ! $FLP_IDS =~ (^|,)"$flp"(,|$) ]] && continue + [[ $EPNSYNCMODE == 1 ]] && FLP_ADDRESS="tcp://alio2-cr1-flp${flp}-ib:${TPC_FLP_PORT}" + CHANNELS_LIST+="type=pull,name=${TPC_FLP_CHAN_PREFIX}_flp${flp},transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;" + done + fi fi fi if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then @@ -184,22 +194,25 @@ if workflow_has_parameter CALIB_PROXIES; then fi if [[ -n $CHANNELS_LIST ]]; then DATASPEC_LIST= - if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]]; then - add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_A\"" - fi - if [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then - add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_C\"" - fi - if [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then - add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCCMV\"" - fi - if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then - add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCSAC\"" + if [[ $AGGREGATOR_TASKS == TPC_CMV ]]; then + if [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then + add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCCMV\"" + fi + else + if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]]; then + add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_A\"" + fi + if [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then + add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_C\"" + fi + if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then + add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCSAC\"" + fi fi if [[ -z ${O2_TPC_IDC_CMV_IO_THREADS:-} ]]; then O2_TPC_IDC_CMV_IO_THREADS=4; - fi - add_W o2-dpl-raw-proxy "--proxy-name tpcidc --io-threads ${O2_TPC_IDC_CMV_IO_THREADS} --dataspec \"$DATASPEC_LIST\" --sporadic-outputs --channel-config \"$CHANNELS_LIST\" ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} $TIMEFRAME_SHM_LIMIT" "" 0 + fi + add_W o2-dpl-raw-proxy "--proxy-name ${TPC_FLP_CHAN_PREFIX} --io-threads ${O2_TPC_IDC_CMV_IO_THREADS} --dataspec \"$DATASPEC_LIST\" --sporadic-outputs --channel-config \"$CHANNELS_LIST\" ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} $TIMEFRAME_SHM_LIMIT" "" 0 fi elif [[ $AGGREGATOR_TASKS == CALO_TF ]]; then if [[ -n ${CALIBDATASPEC_CALO_TF:-} ]]; then @@ -308,7 +321,10 @@ nTFs=$((1000 * 128 / ${NHBPERTF})) nTFs_SAC=$((10000 * 128 / ${NHBPERTF})) nBuffer=$((100 * 128 / ${NHBPERTF})) nBuffer_cmv=$((50 * 128 / ${NHBPERTF})) -lanesCMVaggregate=${O2_TPC_CMV_AGGREGATE_NLANES:-8} +lanesCMVaggregate=${O2_TPC_CMV_AGGREGATE_NLANES:-4} +lanesCMVdistribute=${O2_TPC_CMV_DISTRIBUTE_NLANES:-2} +cmvCompression=${O2_TPC_CMV_COMPRESSION:---use-sparse --cmv-zero-threshold 1.0 --cmv-dynamic-precision-mean 1.0 --cmv-dynamic-precision-sigma 8.0 --use-compression-huffman} +cmvTimeframes=${O2_TPC_CMV_TIMEFRAMES:-4000} IDC_DELTA="--disable-IDCDelta true" # off by default # deltas are on by default; you need to request explicitly to switch them off; if [[ "${DISABLE_IDC_DELTA:-}" == "1" ]]; then IDC_DELTA=""; fi @@ -316,24 +332,25 @@ if [[ "${ENABLE_IDC_DELTA_FILE:-}" == "1" ]]; then IDC_DELTA+=" --dump-IDCDelta- if [[ "${DISABLE_IDC_PAD_MAP_WRITING:-}" == 1 ]]; then TPC_WRITING_PAD_STATUS_MAP=""; else TPC_WRITING_PAD_STATUS_MAP="--enableWritingPadStatusMap true"; fi -if ! workflow_has_parameter CALIB_LOCAL_INTEGRATED_AGGREGATOR && [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == ALL ]]; then - if [[ $CALIB_TPC_IDC == 1 ]]; then - add_W o2-tpc-idc-distribute "--crus ${crus} --timeframes ${nTFs} --output-lanes ${lanesFactorize} --send-precise-timestamp true --condition-tf-per-query ${nTFs} --n-TFs-buffer ${nBuffer}" - add_W o2-tpc-idc-factorize "--n-TFs-buffer ${nBuffer} --input-lanes ${lanesFactorize} --crus ${crus} --timeframes ${nTFs} --nthreads-grouping ${threadFactorize} --nthreads-IDC-factorization ${threadFactorize} --sendOutputFFT true --enable-CCDB-output true --enablePadStatusMap true ${TPC_WRITING_PAD_STATUS_MAP} --use-precise-timestamp true $IDC_DELTA" "TPCIDCGroupParam.groupPadsSectorEdges=32211" - add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --inputLanes ${lanesFactorize} --nFourierCoeff 40 --nthreads 8" - fi - if [[ $CALIB_TPC_CMV == 1 ]]; then - if [[ -z ${O2_TPC_CMV_COMPRESSION:-} ]]; then O2_TPC_CMV_COMPRESSION="--use-sparse --cmv-zero-threshold 1.0 --cmv-dynamic-precision-mean 1.0 --cmv-dynamic-precision-sigma 8.0 --use-compression-huffman"; fi - if [[ -z ${O2_TPC_CMV_TIMEFRAMES:-} ]]; then O2_TPC_CMV_TIMEFRAMES="2000"; fi - add_W o2-tpc-cmv-distribute "--crus ${crus} --lanes 1 --output-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --timeframes ${O2_TPC_CMV_TIMEFRAMES} --send-precise-timestamp " - add_W o2-tpc-cmv-aggregate "--crus ${crus} --input-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --nthreads-compression 4 --timeframes ${O2_TPC_CMV_TIMEFRAMES} --use-precise-timestamp ${O2_TPC_CMV_COMPRESSION} --output-dir $CALIB_DIR --meta-output-dir $EPN2EOS_METAFILES_DIR " - fi - if [[ $CALIB_TPC_SAC == 1 ]]; then - add_W o2-tpc-sac-distribute "--timeframes ${nTFs_SAC} --output-lanes 1 " - add_W o2-tpc-sac-factorize "--timeframes ${nTFs_SAC} --nthreads-SAC-factorization 4 --input-lanes 1 --compression 2" - add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --nFourierCoeff 40 --process-SACs true --inputLanes 1" +if ! workflow_has_parameter CALIB_LOCAL_INTEGRATED_AGGREGATOR; then + if [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == ALL ]]; then + if [[ $CALIB_TPC_IDC == 1 ]]; then + add_W o2-tpc-idc-distribute "--crus ${crus} --timeframes ${nTFs} --output-lanes ${lanesFactorize} --send-precise-timestamp true --condition-tf-per-query ${nTFs} --n-TFs-buffer ${nBuffer}" + add_W o2-tpc-idc-factorize "--n-TFs-buffer ${nBuffer} --input-lanes ${lanesFactorize} --crus ${crus} --timeframes ${nTFs} --nthreads-grouping ${threadFactorize} --nthreads-IDC-factorization ${threadFactorize} --sendOutputFFT true --enable-CCDB-output true --enablePadStatusMap true ${TPC_WRITING_PAD_STATUS_MAP} --use-precise-timestamp true $IDC_DELTA" "TPCIDCGroupParam.groupPadsSectorEdges=32211" + add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --inputLanes ${lanesFactorize} --nFourierCoeff 40 --nthreads 8" + fi + if [[ $CALIB_TPC_SAC == 1 ]]; then + add_W o2-tpc-sac-distribute "--timeframes ${nTFs_SAC} --output-lanes 1 " + add_W o2-tpc-sac-factorize "--timeframes ${nTFs_SAC} --nthreads-SAC-factorization 4 --input-lanes 1 --compression 2" + add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --nFourierCoeff 40 --process-SACs true --inputLanes 1" + fi + elif [[ $AGGREGATOR_TASKS == TPC_CMV || $AGGREGATOR_TASKS == ALL ]]; then + if [[ $CALIB_TPC_CMV == 1 ]]; then + add_W o2-tpc-cmv-distribute "--crus ${crus} --lanes ${lanesCMVdistribute} --output-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --timeframes ${cmvTimeframes} --send-precise-timestamp " + add_W o2-tpc-cmv-aggregate "--crus ${crus} --input-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --nthreads-compression 8 --timeframes ${cmvTimeframes} --use-precise-timestamp ${cmvCompression} --output-dir $CALIB_DIR --meta-output-dir $EPN2EOS_METAFILES_DIR " + CCDB_POPULATOR_UPLOAD_PATH=none + fi fi - [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]] && [[ $CALIB_TPC_IDC == 0 && $CALIB_TPC_SAC == 0 && $CALIB_TPC_CMV == 1 ]] && CCDB_POPULATOR_UPLOAD_PATH="none" fi # Calo cal diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 5ab54f9cf4b43..6351f2236d1a1 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -71,6 +71,50 @@ elif [[ -z ${SYNCRAWMODE:-} ]]; then SYNCRAWMODE=0 fi +# --------------------------------------------------------------------------------------------------------------------- +# build incoming raw inputs specs +define_raw_inputs() +{ + PROXY_INSPEC="dd:FLP/DISTSUBTIMEFRAME/0" + PROXY_IN_N=0 + for i in ${INPUT_DETECTOR_LIST//,/ }; do + if has_detector_flp_processing $i; then + case $i in + TOF) + PROXY_INTYPE="CRAWDATA";; + FT0 | FV0 | FDD) + PROXY_INTYPE="DIGITSBC/0 DIGITSCH/0";; + PHS) + PROXY_INTYPE="CELLS CELLTRIGREC";; + CPV) + PROXY_INTYPE="DIGITS/0 DIGITTRIGREC/0 RAWHWERRORS";; + EMC) + PROXY_INTYPE="CELLS/0 CELLSTRGR/0 DECODERERR";; + CTP) + PROXY_INTYPE="LUMI/0 RAWDATA" + CTP_CONFIG=" --no-lumi " + ;; + *) + echo Input type for detector $i with FLP processing not defined 1>&2 + exit 1;; + esac + else + PROXY_INTYPE=RAWDATA + fi + for j in $PROXY_INTYPE; do + PROXY_INNAME="RAWIN$PROXY_IN_N" + let PROXY_IN_N=$PROXY_IN_N+1 + PROXY_INSPEC+=";$PROXY_INNAME:$i/$j" + done + done + # do we have DPL_RAWTFDUMP_TRIGGER trigger (e.g. TPC/CMVTRIGGER)? If so, add its spec + if has_detector TPC && [[ -n ${DPL_RAWTFDUMP_TRIGGER:-} ]]; then + PROXY_INNAME="RAWIN$PROXY_IN_N" + let PROXY_IN_N=$PROXY_IN_N+1 + PROXY_INSPEC+=";$PROXY_INNAME:${DPL_RAWTFDUMP_TRIGGER}" + fi +} + # --------------------------------------------------------------------------------------------------------------------- # Set some individual workflow arguments depending on configuration GPU_INPUT=zsraw @@ -109,6 +153,7 @@ EVE_OPT=" --jsons-folder $EDJSONS_DIR" : ${ALPIDE_ERR_DUMPS:=} : ${ITSSTAGGERED:=} : ${MFTSTAGGERED:=} +: ${PROXY_INSPEC:=} [[ -z $ALPIDE_ERR_DUMPS ]] && [[ $EPNSYNCMODE == 1 && $RUNTYPE == "PHYSICS" ]] && ALPIDE_ERR_DUMPS=1 || ALPIDE_ERR_DUMPS=0 @@ -483,38 +528,7 @@ if [[ -n $INPUT_DETECTOR_LIST ]]; then add_W o2-raw-tf-reader-workflow "--delay $TFDELAY $TFRAWOPT --loop $TFLOOP $NTIMEFRAMES_CMD --input-data ${TFName} ${INPUT_FILE_COPY_CMD+--copy-cmd} ${INPUT_FILE_COPY_CMD:-} --onlyDet $INPUT_DETECTOR_LIST ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} ${TIMEFRAME_SHM_LIMIT:-}" elif [[ $EXTINPUT == 1 ]]; then PROXY_CHANNEL="name=readout-proxy,type=pull,method=connect,address=ipc://${UDS_PREFIX}${INRAWCHANNAME},transport=shmem,rateLogging=$EPNSYNCMODE" - PROXY_INSPEC="dd:FLP/DISTSUBTIMEFRAME/0" - PROXY_IN_N=0 - for i in ${INPUT_DETECTOR_LIST//,/ }; do - if has_detector_flp_processing $i; then - case $i in - TOF) - PROXY_INTYPE="CRAWDATA";; - FT0 | FV0 | FDD) - PROXY_INTYPE="DIGITSBC/0 DIGITSCH/0";; - PHS) - PROXY_INTYPE="CELLS CELLTRIGREC";; - CPV) - PROXY_INTYPE="DIGITS/0 DIGITTRIGREC/0 RAWHWERRORS";; - EMC) - PROXY_INTYPE="CELLS/0 CELLSTRGR/0 DECODERERR";; - CTP) - PROXY_INTYPE="LUMI/0 RAWDATA" - CTP_CONFIG=" --no-lumi " - ;; - *) - echo Input type for detector $i with FLP processing not defined 1>&2 - exit 1;; - esac - else - PROXY_INTYPE=RAWDATA - fi - for j in $PROXY_INTYPE; do - PROXY_INNAME="RAWIN$PROXY_IN_N" - let PROXY_IN_N=$PROXY_IN_N+1 - PROXY_INSPEC+=";$PROXY_INNAME:$i/$j" - done - done + define_raw_inputs [[ -n ${TIMEFRAME_RATE_LIMIT:-} ]] && [[ $TIMEFRAME_RATE_LIMIT != 0 ]] && PROXY_CHANNEL+=";name=metric-feedback,type=pull,method=connect,address=ipc://${UDS_PREFIX}metric-feedback-${O2JOBID:-$NUMAID},transport=shmem,rateLogging=0" if [[ $EPNSYNCMODE == 1 ]]; then RAWPROXY_CONFIG="--print-input-sizes 1000" @@ -544,8 +558,18 @@ if [[ -z ${WORKFLOW_DETECTORS_USE_GLOBAL_READER_TRACKS} ]] && [[ -z ${WORKFLOW_D fi # --------------------------------------------------------------------------------------------------------------------- -# Raw decoder workflows - disabled in async mode + if [[ $CTFINPUT == 0 && $DIGITINPUT == 0 ]]; then +# Check if raw TF data dump was requested, RAWTF_DUMPRATE must be in % + if [[ ${DPL_RAWTFDUMP:-} == 1 ]]; then + [[ -z ${PROXY_INSPEC} ]] && define_raw_inputs + CONFIG_RAWTFDUMP="--dataspec \"${PROXY_INSPEC}\" --output-dir \"${RAWTF_DIR:-$CTF_DIR}\" --meta-output-dir \"${EPN2EOS_METAFILES_DIR}\" --max-dump-rate ${RAWTF_DUMPRATE:-0.1} " + CONFIG_RAWTFDUMP+=" --min-file-size ${RAWTF_MINSIZE:-$CTF_MINSIZE} --max-tf-per-file ${RAWTF_MAX_PER_FILE:-$CTF_MAX_PER_FILE} --mute-warn-period ${RAWTF_MUTE_PERIOD:-200} --max-warn ${RAWTF_MAX_WARN:-5} " + [[ -n ${DPL_RAWTFDUMP_TRIGGER:-} ]] && CONFIG_RAWTFDUMP+=" --triggerspec \"DMPTRG:${DPL_RAWTFDUMP_TRIGGER}\" " + add_W o2-raw-tf-dump-workflow "$CONFIG_RAWTFDUMP" + fi + +# Raw decoder workflows - disabled in async mode if has_detector TPC && [[ "${TPC_CONVERT_LINKZS_TO_RAW:-}" == "1" ]]; then GPU_INPUT=zsonthefly RAWTODIGITOPTIONS=