diff --git a/Common/Utils/include/CommonUtils/NameConf.h b/Common/Utils/include/CommonUtils/NameConf.h
index fb10f929c9782..8d4c0a2c1c4f8 100644
--- a/Common/Utils/include/CommonUtils/NameConf.h
+++ b/Common/Utils/include/CommonUtils/NameConf.h
@@ -100,6 +100,9 @@ class NameConf : public o2::conf::ConfigurableParamHelper<NameConf>
   // CTF Dictionary
   static std::string getCTFDictFileName();
 
+  // O2 Raw TF Filename
+  static std::string getRawTFFileName(uint32_t run, uint32_t orb, uint32_t id, const std::string& host, const std::string_view prefix = "o2_rawtf_dump");
+
   // Default CCDB server
   static std::string getCCDBServer();
 
diff --git a/Common/Utils/src/NameConf.cxx b/Common/Utils/src/NameConf.cxx
index 45646284a878b..48cefacaf14c7 100644
--- a/Common/Utils/src/NameConf.cxx
+++ b/Common/Utils/src/NameConf.cxx
@@ -95,6 +95,11 @@ std::string NameConf::getCTFFileName(uint32_t run, uint32_t orb, uint32_t id, co
   return o2::utils::Str::concat_string(prefix, '_', fmt::format("run{:08d}_orbit{:010d}_tf{:010d}_{}", run, orb, id, host), ".root");
 }
 
+std::string NameConf::getRawTFFileName(uint32_t run, uint32_t orb, uint32_t id, const std::string& host, const std::string_view prefix)
+{
+  return o2::utils::Str::concat_string(prefix, '_', fmt::format("run{:08d}_orbit{:010d}_tf{:010d}_{}", run, orb, id, host), ".tf");
+}
+
 std::string NameConf::getCTFDictFileName()
 {
   return o2::utils::Str::concat_string(CTFDICT, ".root");
diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h
index 109eff2654466..8195b3e39c689 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CMV.h
@@ -85,11 +85,13 @@ struct Data {
     return positive ? magnitude : -magnitude;
   }
 
-  // Encode from float: clamps magnitude to 15 bits, range ±255.992
+  // Encode from float: truncates magnitude to 15 bits, range ±255.992
   void setCMVFloat(float value)
   {
     const bool positive = (value >= 0.f);
-    const uint16_t magnitude = static_cast<uint16_t>(std::abs(value) * 128.f + 0.5f) & 0x7FFF;
+    const uint16_t magnitude = static_cast<uint16_t>(
+                                 std::lround(std::abs(value) * 128.f)) &
+                               0x7FFF;
     cmv = (positive ? 0x8000 : 0x0000) | magnitude;
   }
 };
@@ -119,4 +121,4 @@ struct Container {
 
 } // namespace o2::tpc::cmv
 
-#endif
\ No newline at end of file
+#endif
diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
index f3070d456afb1..7939387bc76a8 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/ClusterNative.h
@@ -15,6 +15,7 @@
 #ifndef ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
 #define ALICEO2_DATAFORMATSTPC_CLUSTERNATIVE_H
 #ifndef GPUCA_GPUCODE_DEVICE
+#include <climits>
 #include <cstdint>
 #include <cstddef> // for size_t
 #include <utility>
@@ -62,6 +63,9 @@ struct ClusterNative {
   static constexpr int scalePadPacked = 64;       //< ~60 is needed for 0.1mm precision, but power of two avoids rounding
   static constexpr int scaleSigmaTimePacked = 32; // 1/32nd of pad/timebin precision for cluster size
   static constexpr int scaleSigmaPadPacked = 32;
+  static constexpr int scaleSaturatedQtot = 8;
+  static constexpr int maxRegularQtot = 25 * 1024;
+  static constexpr int maxSaturatedQtot = (USHRT_MAX - maxRegularQtot) * scaleSaturatedQtot;
 
   uint32_t timeFlagsPacked; //< Contains the time in the lower 24 bits in a packed format, contains the flags in the
                             // upper 8 bits
@@ -83,7 +87,14 @@ struct ClusterNative {
   }
 
   GPUd() uint16_t getQmax() const { return qMax; }
-  GPUd() uint16_t getQtot() const { return qTot; }
+  GPUd() uint16_t getQtot() const
+  {
+    if (isSaturated()) [[unlikely]] {
+      auto sQtot = getSaturatedQtot();
+      return sQtot < USHRT_MAX ? sQtot : USHRT_MAX;
+    }
+    return qTot;
+  }
   GPUd() uint8_t getFlags() const { return timeFlagsPacked >> 24; }
   GPUd() uint32_t getTimePacked() const { return timeFlagsPacked & 0xFFFFFF; }
   GPUd() void setTimePackedFlags(uint32_t timePacked, uint8_t flags)
@@ -119,7 +130,13 @@ struct ClusterNative {
   /// Y = (12.4 - 0.5 * (66 - 1)) * 4.16mm = -83.616mm
   GPUd() float getPad() const { return unpackPad(padPacked); }
   GPUd() void setPad(float pad) { padPacked = packPad(pad); }
-  GPUd() float getSigmaTime() const { return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked); }
+  GPUd() float getSigmaTime() const
+  {
+    if (isSaturated()) [[unlikely]] {
+      return 0;
+    }
+    return float(sigmaTimePacked) * (1.f / scaleSigmaTimePacked);
+  }
   GPUd() void setSigmaTime(float sigmaTime)
   {
     uint32_t tmp = sigmaTime * scaleSigmaTimePacked + 0.5;
@@ -138,6 +155,31 @@ struct ClusterNative {
     sigmaPadPacked = tmp;
   }
 
+  GPUd() bool isSaturated() const { return qTot > maxRegularQtot; }
+
+  GPUd() void setSaturatedQtot(uint32_t qtot)
+  {
+    this->qTot = USHRT_MAX;
+    if (qtot < maxSaturatedQtot) {
+      this->qTot = ((qtot + scaleSaturatedQtot / 2) / scaleSaturatedQtot) + maxRegularQtot;
+    }
+  }
+
+  GPUd() uint32_t getSaturatedQtot() const
+  {
+    return uint32_t(qTot - maxRegularQtot) * scaleSaturatedQtot;
+  }
+
+  GPUd() void setSaturatedTailLength(uint32_t tail)
+  {
+    sigmaTimePacked = encodeTailLength(tail);
+  }
+
+  GPUd() uint32_t getSaturatedTailLength() const
+  {
+    return decodeTailLength(sigmaTimePacked);
+  }
+
   GPUd() bool operator<(const ClusterNative& rhs) const
   {
     if (this->getTimePacked() != rhs.getTimePacked()) {
@@ -167,6 +209,93 @@ struct ClusterNative {
            this->qTot == rhs.qTot &&
            this->getFlags() == rhs.getFlags();
   }
+
+ private:
+  static constexpr GPUd() uint32_t decodeTailLength(uint8_t code)
+  {
+    // Quantize tail length into 8bits.
+    // Max expected length is 1500 tbs.
+    // But allow outliers up to 8000 tbs.
+    //
+    // Full code layout is:
+    //
+    // | Code range | Decoded values |  Step | Codes |
+    // | ---------: | -------------: | ----: | ----: |
+    // |    `0..63` |        `0..63` |   `1` |  `64` |
+    // |   `64..95` |      `64..126` |   `2` |  `32` |
+    // |  `96..127` |     `128..252` |   `4` |  `32` |
+    // | `128..159` |     `256..504` |   `8` |  `32` |
+    // | `160..223` |    `512..1520` |  `16` |  `64` |
+    // | `224..239` |   `1552..2032` |  `32` |  `16` |
+    // | `240..255` |   `2048..8048` | `400` |  `16` |
+    //
+
+    if (code < 64) {
+      return code;
+    }
+
+    if (code < 160) {
+      uint32_t q = (uint32_t)code - 64u;
+      uint32_t exponent = (q >> 5) + 1u; // 1, 2, 3
+      uint32_t mantissa = q & 31u;       // 0..31
+
+      return (32u + mantissa) << exponent;
+    }
+
+    if (code < 224) {
+      return 512u + 16u * ((uint32_t)code - 160u);
+    }
+
+    if (code < 240) {
+      return 1552u + 32u * ((uint32_t)code - 224u);
+    }
+
+    return 2048u + 400u * ((uint32_t)code - 240u);
+  }
+
+  static constexpr GPUd() uint8_t encodeTailLength(uint32_t value)
+  {
+    // Saturate above representable range.
+    if (value >= decodeTailLength(255)) [[unlikely]] {
+      return 255;
+    }
+
+    // Binary search for the first code whose decoded value >= value.
+    uint8_t lo = 0;
+    uint8_t hi = 255;
+
+    while (lo < hi) {
+      uint8_t mid = lo + ((hi - lo) >> 1);
+      uint32_t decoded = decodeTailLength(mid);
+
+      if (decoded < value) {
+        lo = mid + 1;
+      } else {
+        hi = mid;
+      }
+    }
+
+    // lo is now the first code with decoded >= value.
+    if (lo == 0) [[unlikely]] {
+      return 0;
+    }
+
+    uint8_t above_code = lo;
+    uint8_t below_code = lo - 1;
+
+    uint32_t above_value = decodeTailLength(above_code);
+    uint32_t below_value = decodeTailLength(below_code);
+
+    uint32_t above_error = above_value - value;
+    uint32_t below_error = value - below_value;
+
+    // Tie-break downward.
+    if (below_error <= above_error) {
+      return below_code;
+    } else {
+      return above_code;
+    }
+  }
 };
 
 // This is an index struct to access TPC clusters inside sectors and rows. It shall not own the data, but just point to
diff --git a/Detectors/CTF/workflow/src/CTFWriterSpec.cxx b/Detectors/CTF/workflow/src/CTFWriterSpec.cxx
index 5d6db7d613674..f175bf4c2e5d3 100644
--- a/Detectors/CTF/workflow/src/CTFWriterSpec.cxx
+++ b/Detectors/CTF/workflow/src/CTFWriterSpec.cxx
@@ -310,7 +310,7 @@ size_t CTFWriterSpec::processDet(o2::framework::ProcessingContext& pc, DetID det
   if (det == DetID::ITS) {
     nLayers = mInput.doITSStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
   } else if (det == DetID::MFT) {
-    nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
+    nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::MFT>::getNLayers() : 1;
   }
   for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) {
     auto binding = getBinding(det.getName(), iLayer);
@@ -431,7 +431,7 @@ size_t CTFWriterSpec::estimateCTFSize(ProcessingContext& pc)
     if (det == DetID::ITS) {
       nLayers = mInput.doITSStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
     } else if (det == DetID::MFT) {
-      nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
+      nLayers = mInput.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::MFT>::getNLayers() : 1;
     }
     for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) {
       auto binding = getBinding(det.getName(), iLayer);
@@ -818,7 +818,7 @@ DataProcessorSpec getCTFWriterSpec(const o2::ctf::CTFWriterInp& inp)
       if (det == DetID::ITS) {
         nLayers = inp.doITSStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
       } else if (det == DetID::MFT) {
-        nLayers = inp.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::ITS>::getNLayers() : 1;
+        nLayers = inp.doMFTStaggering ? o2::itsmft::DPLAlpideParam<DetID::MFT>::getNLayers() : 1;
       }
       for (uint32_t iLayer = 0; iLayer < nLayers; iLayer++) {
         inputs.emplace_back(CTFWriterSpec::getBinding(det.getName(), iLayer), det.getDataOrigin(), "CTFDATA", iLayer, Lifetime::Timeframe);
diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h
index 4f9bc90c1c758..76ac8878562de 100644
--- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h
+++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/CTFCoder.h
@@ -356,8 +356,10 @@ void CTFCoder<N>::decompress(const CompressedClusters& compCl, VROF& rofRecVec,
   assert(chipCount == compCl.header.nChips);
 
   if (clCount != compCl.header.nClusters) {
-    LOG(error) << "expected " << compCl.header.nClusters << " but counted " << clCount << " in ROFRecords";
-    throw std::runtime_error("mismatch between expected and counter number of clusters");
+    LOGP(error, "expected {} but counted {} clusters in {} ROFRecords", compCl.header.nClusters, clCount, compCl.header.nROFs);
+    if (clCount > compCl.header.nClusters) {
+      throw std::runtime_error("mismatch between expected and counter number of clusters");
+    }
   }
 }
 
@@ -456,8 +458,10 @@ void CTFCoder<N>::decompress(const CompressedClusters& compCl, VROF& rofRecVec,
   assert(chipCount == compCl.header.nChips);
 
   if (clCount != compCl.header.nClusters) {
-    LOG(error) << "expected " << compCl.header.nClusters << " but counted " << clCount << " in ROFRecords";
-    throw std::runtime_error("mismatch between expected and counter number of clusters");
+    LOGP(error, "expected {} but counted {} clusters in {} ROFRecords", compCl.header.nClusters, clCount, compCl.header.nROFs);
+    if (clCount > compCl.header.nClusters) {
+      throw std::runtime_error("mismatch between expected and counter number of clusters");
+    }
   }
 }
 
diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h
index eee9bdbb6a4dc..63d37a25ffbc9 100644
--- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h
+++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h
@@ -73,16 +73,15 @@ class ChipMappingMFT
   ///< total number of RUs
   static constexpr Int_t getNRUs() { return NRUs; }
 
-  ///< get FEEId of the RU (software id of the RU), read via given link
+  ///< get software id of the RU, from first 8 bits of FEEID (HW id of RU)
   uint8_t FEEId2RUSW(uint16_t hw) const { return mFEEId2RUSW[hw & 0xff]; }
 
-  ///< get HW id of the RU (software id of the RU)
+  ///< get FEEID, from software id of the RU and link number
   uint16_t RUSW2FEEId(uint16_t sw, uint16_t linkID = 0) const { return ((linkID << 8) + mRUInfo[sw].idHW); }
 
   ///< compose FEEid for given stave (ru) relative to layer and link, see documentation in the constructor
   uint16_t composeFEEId(uint16_t layer, uint16_t ruOnLayer, uint16_t link) const
   {
-    // only one link is used
     // ruOnLayer is 0, 1, 2, 3 for half = 0
     //              4, 5, 6, 7            1
     auto dhalf = std::div(ruOnLayer, 4);
@@ -114,7 +113,7 @@ class ChipMappingMFT
     face = (feeID >> 2) & 0x1;
   }
 
-  ///< get info on sw RU
+  ///< get info on sw RU corresponding to given FEEID
   const RUInfo* getRUInfoFEEId(Int_t feeID) const { return &mRUInfo[FEEId2RUSW(feeID)]; }
 
   ///< get number of chips served by single cable on given RU type
@@ -123,13 +122,13 @@ class ChipMappingMFT
     return ((0x1 << 7) + (cableHW & 0x1f));
   }
 
-  ///< convert HW cable ID to its position on the ActiveLanes word in the GBT.header for given RU type
+  ///< convert HW cable ID to its position on the ActiveLanes word in the GBT.header for given RU type (note: this position is equal to the HW cable ID)
   uint8_t cableHW2Pos(uint8_t ruType, uint8_t hwid) const { return mCableHW2Pos[ruType][hwid]; }
 
   ///< convert HW cable ID to SW ID for give RU type
   uint8_t cableHW2SW(uint8_t ruType, uint8_t hwid) const { return hwid < mCableHW2SW[ruType].size() ? mCableHW2SW[ruType][hwid] : 0xff; }
 
-  ///< convert cable iterator ID to its position on the ActiveLanes word in the GBT.header for given RU type
+  ///< convert cable iterator ID (i.e. chipOnModule) to its position on the ActiveLanes word in the GBT.header for given RU type (note: this position is equal to the HW cable ID)
   uint8_t cablePos(uint8_t ruType, uint8_t id) const { return mCablePos[ruType][id]; }
 
   ///< get chipID on module from chip global SW ID, cable SW ID and stave (RU) info
@@ -139,7 +138,7 @@ class ChipMappingMFT
     return 0xffff;
   }
 
-  ///< get chip global SW ID from chipID on module, cable SW ID and stave (RU) info
+  ///< get chip global SW ID from cable HW ID and stave (RU) info (note: chOnModuleHW is unused)
   uint16_t getGlobalChipID(uint16_t chOnModuleHW, int cableHW, const RUInfo& ruInfo) const
   {
     auto chipOnRU = cableHW2SW(ruInfo.ruType, cableHW);
@@ -393,11 +392,11 @@ class ChipMappingMFT
 
  private:
   Int_t invalid() const;
-  static constexpr Int_t NRUs = NLayers * NZonesPerLayer;
+  static constexpr Int_t NRUs = NLayers * NZonesPerLayer; // 10 layers * 8 zones per layer
   static constexpr Int_t NModules = 280;
   static constexpr Int_t NChipsInfo = 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 16 + 17 + 18 + 19 + 14;
   static constexpr Int_t NChipsPerCable = 1;
-  static constexpr Int_t NLinks = 1;
+  static constexpr Int_t NLinks = 3;
   static constexpr Int_t NConnectors = 5;
   static constexpr Int_t NMaxChipsPerLadder = 5;
   static constexpr Int_t NRUCables = 25;
diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h
index 0bdbb701a9356..dd3052e2cc5bd 100644
--- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h
+++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/Clusterer.h
@@ -236,6 +236,8 @@ class Clusterer
   ///< load the dictionary of cluster topologies
   void loadDictionary(const std::string& fileName) { mPattIdConverter.loadDictionary(fileName); }
   void setDictionary(const TopologyDictionary* dict) { mPattIdConverter.setDictionary(dict); }
+  const TopologyDictionary& getDictionary() const { return mPattIdConverter.getDictionary(); }
+  auto& getPattIdConverter() const { return mPattIdConverter; }
 
   TStopwatch& getTimer() { return mTimer; }           // cannot be const
   TStopwatch& getTimerMerge() { return mTimerMerge; } // cannot be const
diff --git a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h
index 3537a1f408886..4f84a838efc70 100644
--- a/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h
+++ b/Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/LookUp.h
@@ -43,7 +43,7 @@ class LookUp
   bool isGroup(int id) const { return mDictionary.isGroup(id); }
   int size() const { return mDictionary.getSize(); }
   auto getPattern(int id) const { return mDictionary.getPattern(id); }
-  auto getDictionaty() const { return mDictionary; }
+  auto& getDictionary() const { return mDictionary; }
 
  private:
   TopologyDictionary mDictionary;
diff --git a/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx b/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx
index de2358469e894..b79c529bef803 100644
--- a/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx
+++ b/Detectors/ITSMFT/common/reconstruction/src/ChipMappingMFT.cxx
@@ -1624,7 +1624,7 @@ ChipMappingMFT::ChipMappingMFT()
 {
   // init chips info
 
-  uint32_t maxRUHW = composeFEEId(NLayers - 1, NZonesPerLayer - 1, NLinks - 1); // Max possible FEE ID
+  uint32_t maxRUHW = composeFEEId(NLayers - 1, NZonesPerLayer - 1, 0); // Max possible RU HW ID (first 8 bits of max FEEID, while link stored in 9th and 10th bit of FEEID)
   mFEEId2RUSW.resize(maxRUHW + 1, 0xff);
 
   int curLayer = -1, curZone = -1, curHalf = -1;
@@ -1698,8 +1698,8 @@ ChipMappingMFT::ChipMappingMFT()
       auto& ruInfo = mRUInfo[ctrRU];
       ruInfo.idSW = ctrRU++;
 
-      // map FEEIds (RU read out by at most 3 GBT links) to SW ID
-      ruInfo.idHW = composeFEEId(iLayer, iZone, 0); // FEEId for link 0
+      // map RU HW ID (RU read out by at most 3 GBT links) to SW ID
+      ruInfo.idHW = composeFEEId(iLayer, iZone, 0); // RU HW ID (first 8 bits of FEEID)
       mFEEId2RUSW[ruInfo.idHW] = ruInfo.idSW;
       ruInfo.layer = iLayer;
       ruInfo.ruType = ZoneRUType[iZone % 4][iLayer / 2];
diff --git a/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx b/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx
index 7158551e02e20..874c432b097ce 100644
--- a/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx
+++ b/Detectors/ITSMFT/common/reconstruction/src/RawPixelDecoder.cxx
@@ -40,8 +40,7 @@ RawPixelDecoder<Mapping>::RawPixelDecoder()
   mTimerDecode.Stop();
   mTimerFetchData.Stop();
   mSelfName = o2::utils::Str::concat_string(Mapping::getName(), "Decoder");
-  DPLRawParser<>::setCheckIncompleteHBF(false);                                                                             // Disable incomplete HBF checking, see ErrPacketCounterJump check in GBTLink.cxx
-  mInputFilter = {InputSpec{"filter", ConcreteDataTypeMatcher{Mapping::getOrigin(), o2::header::gDataDescriptionRawData}}}; // by default take all raw data
+  DPLRawParser<>::setCheckIncompleteHBF(false); // Disable incomplete HBF checking, see ErrPacketCounterJump check in GBTLink.cxx
 }
 
 ///______________________________________________________________
@@ -235,13 +234,9 @@ void RawPixelDecoder<Mapping>::setupLinks(InputRecord& inputs)
   auto nLinks = mGBTLinks.size();
   auto origin = (mUserDataOrigin == o2::header::gDataOriginInvalid) ? mMAP.getOrigin() : mUserDataOrigin;
   auto datadesc = (mUserDataDescription == o2::header::gDataDescriptionInvalid) ? o2::header::gDataDescriptionRawData : mUserDataDescription;
-  if (mUserDataDescription != o2::header::gDataDescriptionInvalid) { // overwrite data filter origin&descriptions with user defined ones if possible
-    for (auto& filt : mInputFilter) {
-      if (std::holds_alternative<o2::framework::ConcreteDataMatcher>(filt.matcher)) {
-        std::get<o2::framework::ConcreteDataMatcher>(filt.matcher).origin = origin;
-        std::get<o2::framework::ConcreteDataMatcher>(filt.matcher).description = datadesc;
-      }
-    }
+
+  if (mInputFilter.empty()) { // if no filter set externally, create a global one from imposed or default origin / description
+    mInputFilter.emplace_back("filter", ConcreteDataTypeMatcher{origin, datadesc});
   }
 
   // if we see requested data type input with 0xDEADBEEF subspec and 0 payload this means that the "delayed message"
diff --git a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h
index 29b9f75bcbc4e..8c16759e16726 100644
--- a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h
+++ b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/STFDecoderSpec.h
@@ -75,7 +75,9 @@ class STFDecoder : public Task
   void finalize();
   void reset();
   std::unique_ptr<o2::itsmft::Clusterer> setupClusterer(const std::string& dictName);
-  void ensureContinuousROF(const std::vector<ROFRecord>& in, std::vector<ROFRecord>& out, int lr, int nROFsTF, const char* name);
+  bool ensureContinuousROF(const std::vector<ROFRecord>& in, std::vector<ROFRecord>& out, int lr, int nROFsTF, const char* name);
+  void rectifyDigits(std::vector<ROFRecord>& rofVec, std::vector<Digit>& digVec);
+  void rectifyClusters(std::vector<ROFRecord>& rofVec, std::vector<CompClusterExt>& clusVec, std::vector<unsigned char>& pattVec);
 
   TStopwatch mTimer;
   bool mDoClusters = false;
@@ -90,6 +92,8 @@ class STFDecoder : public Task
   bool mUseClusterDictionary = true;
   bool mVerifyDecoder = false;
   bool mDumpFrom1stPipeline = false;
+  bool mRunEnsureContinuousROF = true;
+  bool mDisableRectifyContinuousROF = false;
   int mDumpOnError = 0;
   int mNThreads = 1;
   int mVerbosity = 0;
diff --git a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx
index 8fb6ba4e6aa97..79372160c6ade 100644
--- a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx
+++ b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx
@@ -63,6 +63,7 @@ STFDecoder<Mapping>::STFDecoder(const STFDecoderInp& inp, std::shared_ptr<o2::ba
 template <class Mapping>
 void STFDecoder<Mapping>::init(InitContext& ic)
 {
+  int lane = ic.services().get<const o2::framework::DeviceSpec>().inputTimesliceId;
   o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest);
   try {
     auto v0 = o2::utils::Str::tokenize(mInputSpec, ':');
@@ -72,11 +73,22 @@ void STFDecoder<Mapping>::init(InitContext& ic)
     header::DataDescription dataDesc;
     dataOrig.runtimeInit(v1[0].c_str());
     dataDesc.runtimeInit(v2[0].c_str());
+    Mapping map;
     for (int iLayer{0}; iLayer < mLayers; ++iLayer) {
       auto& dec = mDecoder.emplace_back(std::make_unique<RawPixelDecoder<Mapping>>());
       dec->setUserDataOrigin(dataOrig);
       dec->setUserDataDescription(dataDesc);
       dec->init(); // is this no-op?
+
+      if (mDoStaggering) {
+        std::vector<o2::framework::InputSpec> filter;
+        for (const auto feeID : map.getLayer2FEEIDs(iLayer)) {
+          filter.emplace_back("filter", ConcreteDataMatcher{dataOrig, dataDesc, (o2::header::DataHeader::SubSpecificationType)feeID});
+        }
+        dec->setInputFilter(filter);
+      } else {
+        dec->setInputFilter({InputSpec{"filter", ConcreteDataTypeMatcher(dataOrig, dataDesc)}});
+      }
     }
   } catch (const std::exception& e) {
     LOG(error) << "exception was thrown in decoder creation: " << e.what();
@@ -126,22 +138,36 @@ void STFDecoder<Mapping>::init(InitContext& ic)
     LOG(error) << "non-std::exception was thrown in decoder configuration";
     throw;
   }
+  if (mDoCalibData) {
+    std::string warnMsg;
+    bool enforceEnsureContinuousROFinCalib = ic.options().get<bool>("enforce-continuous-rof-with-calib");
+    if (ic.options().get<bool>("enforce-continuous-rof-with-calib")) {
+      warnMsg = "Calibration data requested but the ensureContinuousROF is explicitly enforced!";
+    } else {
+      mRunEnsureContinuousROF = false;
+      warnMsg = "Calibration data requested, disabling ensureContinuousROF!";
+    }
+    if (lane == 0) {
+      LOGP(alarm, "{}", warnMsg);
+    } else {
+      LOGP(info, "{}", warnMsg);
+    }
+  }
+
+  mDisableRectifyContinuousROF = ic.options().get<bool>("disable-rectify-continuous-rof");
+  if (mDisableRectifyContinuousROF && mRunEnsureContinuousROF) {
+    std::string warnMsg = "Rectification of clusters/digits is explicitly disabled after the ensureContinuousROF!";
+    if (lane == 0) {
+      LOGP(alarm, "{}", warnMsg);
+    } else {
+      LOGP(info, "{}", warnMsg);
+    }
+  }
 
   if (mDoClusters) {
     mClusterer = std::make_unique<Clusterer>();
     mClusterer->setNChips(Mapping::getNChips());
   }
-
-  if (mDoStaggering) {
-    Mapping map;
-    for (uint32_t iLayer{0}; iLayer < mLayers; ++iLayer) {
-      std::vector<o2::framework::InputSpec> filter;
-      for (const auto feeID : map.getLayer2FEEIDs(iLayer)) {
-        filter.emplace_back("filter", ConcreteDataMatcher{Mapping::getOrigin(), o2::header::gDataDescriptionRawData, (o2::header::DataHeader::SubSpecificationType)feeID});
-      }
-      mDecoder[iLayer]->setInputFilter(filter);
-    }
-  }
 }
 
 ///_______________________________________
@@ -258,21 +284,29 @@ void STFDecoder<Mapping>::run(ProcessingContext& pc)
       }
     }
     if (mDoDigits) {
+      std::vector<o2::itsmft::ROFRecord> expDigRofVec;
+      if (ensureContinuousROF(digROFVec, expDigRofVec, iLayer, nROFsTF, "digits") && !mDisableRectifyContinuousROF) {
+        auto oldNDig = digVec.size();
+        rectifyDigits(expDigRofVec, digVec);
+        LOGP(warn, "Rectified {} digits out of original {} on layer {} following ensureContinuousROF", digVec.size(), oldNDig, iLayer);
+      }
       pc.outputs().snapshot(Output{orig, "DIGITS", iLayer}, digVec);
-      std::vector<o2::itsmft::ROFRecord> expDigRofVec(nROFsTF);
-      ensureContinuousROF(digROFVec, expDigRofVec, iLayer, nROFsTF, "digits");
-      pc.outputs().snapshot(Output{orig, "DIGITSROF", iLayer}, digROFVec);
+      pc.outputs().snapshot(Output{orig, "DIGITSROF", iLayer}, expDigRofVec);
       mEstNDig[iLayer] = std::max(mEstNDig[iLayer], size_t(digVec.size() * 1.2));
       if (mDoCalibData) {
         pc.outputs().snapshot(Output{orig, "GBTCALIB", iLayer}, calVec);
         mEstNCalib[iLayer] = std::max(mEstNCalib[iLayer], size_t(calVec.size() * 1.2));
       }
-      LOG(debug) << mSelfName << " Decoded " << digVec.size() << " Digits in " << digROFVec.size() << " ROFs" << ((mDoStaggering) ? std::format(" on layer {}", iLayer) : "");
+      LOG(debug) << mSelfName << " Decoded " << digVec.size() << " Digits in " << expDigRofVec.size() << " ROFs" << ((mDoStaggering) ? std::format(" on layer {}", iLayer) : "");
     }
 
     if (mDoClusters) { // we are not obliged to create vectors which are not requested, but other devices might not know the options of this one
-      std::vector<o2::itsmft::ROFRecord> expClusRofVec(nROFsTF);
-      ensureContinuousROF(clusROFVec, expClusRofVec, iLayer, nROFsTF, "clusters");
+      std::vector<o2::itsmft::ROFRecord> expClusRofVec;
+      if (ensureContinuousROF(clusROFVec, expClusRofVec, iLayer, nROFsTF, "clusters") && !mDisableRectifyContinuousROF) {
+        auto oldNClus = clusCompVec.size(), oldNPatt = clusPattVec.size();
+        rectifyClusters(expClusRofVec, clusCompVec, clusPattVec);
+        LOGP(warn, "Rectified {} clusters and {} patterns out of original {} and {} on layer {} following ensureContinuousROF", clusCompVec.size(), clusPattVec.size(), oldNClus, oldNPatt, iLayer);
+      }
       pc.outputs().snapshot(Output{orig, "COMPCLUSTERS", iLayer}, clusCompVec);
       pc.outputs().snapshot(Output{orig, "PATTERNS", iLayer}, clusPattVec);
       pc.outputs().snapshot(Output{orig, "CLUSTERSROF", iLayer}, expClusRofVec);
@@ -416,8 +450,12 @@ void STFDecoder<Mapping>::reset()
 
 ///_______________________________________
 template <class Mapping>
-void STFDecoder<Mapping>::ensureContinuousROF(const std::vector<ROFRecord>& rofVec, std::vector<ROFRecord>& expROFVec, int lr, int nROFsTF, const char* name)
+bool STFDecoder<Mapping>::ensureContinuousROF(const std::vector<ROFRecord>& rofVec, std::vector<ROFRecord>& expROFVec, int lr, int nROFsTF, const char* name)
 {
+  if (!mRunEnsureContinuousROF) {
+    expROFVec = rofVec;
+    return false;
+  }
   const auto& par = AlpideParam::Instance();
   // ensure that the rof output is continuous
   // we will preserve the digits/clusters as they are but the stray ROFs will be removed (leaving their clusters/digits unaddressed).
@@ -465,13 +503,82 @@ void STFDecoder<Mapping>::ensureContinuousROF(const std::vector<ROFRecord>& rofV
       }
     }
   }
-  int prevFirst{0};
+  int prevLast{0};
+  bool reReference = false; // in case a non-last ROF with non-0 entries is removed, ROF references need to be shifted and clusters/digits rewritten
   for (auto& rof : expROFVec) {
     if (rof.getFirstEntry() < 0) {
-      rof.setFirstEntry(prevFirst);
+      rof.setFirstEntry(prevLast);
+    } else if (rof.getFirstEntry() != prevLast) {
+      reReference = true; // there is jump
+    }
+    prevLast = rof.getFirstEntry() + rof.getNEntries();
+  }
+  return reReference;
+}
+
+///_______________________________________
+template <class Mapping>
+void STFDecoder<Mapping>::rectifyDigits(std::vector<ROFRecord>& rofVec, std::vector<Digit>& digVec)
+{
+  // following ensureContinuousROF call some old ROFs might have been dropped, need to rebuild digits vector and rereference ROF
+  std::vector<Digit> digVecTmp;
+  digVecTmp.reserve(digVec.size());
+  auto beg0 = digVec.begin();
+  for (auto& rof : rofVec) {
+    int firstEntry = digVecTmp.size();
+    if (rof.getNEntries()) {
+      auto beg = beg0 + rof.getFirstEntry(), end = beg + rof.getNEntries();
+      std::copy(beg, end, std::back_inserter(digVecTmp));
+    }
+    rof.setFirstEntry(firstEntry);
+  }
+  digVec.swap(digVecTmp);
+}
+
+///_______________________________________
+template <class Mapping>
+void STFDecoder<Mapping>::rectifyClusters(std::vector<ROFRecord>& rofVec, std::vector<CompClusterExt>& clusVec, std::vector<unsigned char>& pattVec)
+{
+  // following ensureContinuousROF call some old ROFs might have been dropped, need to rebuild clusters and patterns vectors and rereference ROF
+  std::vector<CompClusterExt> clusVecTmp;
+  clusVecTmp.reserve(clusVec.size());
+  std::vector<unsigned char> pattVecTmp;
+  pattVecTmp.reserve(pattVec.size());
+  const auto& dict = mClusterer->getDictionary();
+  auto begCl0 = clusVec.begin(), begClForPatt = begCl0;
+  auto pattIt = pattVec.begin();
+
+  auto skipToLastPattern = [&begClForPatt, &pattIt, &dict](const decltype(begCl0) tgt) {
+    while (begClForPatt < tgt) { // iterate clusters skipping their patterns until we reach targed cluster
+      const auto& clp = *begClForPatt;
+      auto pattID = clp.getPatternID();
+      if (pattID == itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) {
+        ClusterPattern::skipPattern(pattIt);
+      }
+      begClForPatt++;
+    }
+  };
+
+  for (auto& rof : rofVec) {
+    int firstEntry = clusVecTmp.size();
+    if (rof.getNEntries()) {
+      auto begClROF = begCl0 + rof.getFirstEntry(), endClROF = begClROF + rof.getNEntries(); // clusters to copy start/end here
+      if (mDoPatterns) {
+        if (begClForPatt > begClROF) { // normally should no happen unless original ROFs were not ordered
+          begClForPatt = begCl0;       // start from the beginning
+        }
+        skipToLastPattern(begClROF); // iterate clusters skipping their patterns until we reach the 1st cluster to be copied
+        auto begPattToCopy = pattIt; // the 1st pattern corresponding to the needed ROF
+        skipToLastPattern(endClROF); // iterate clusters skipping their patterns until we reach the last cluster to be copied
+        std::copy(begPattToCopy, pattIt, std::back_inserter(pattVecTmp));
+      }
+      std::copy(begClROF, endClROF, std::back_inserter(clusVecTmp));
     }
-    prevFirst = rof.getFirstEntry();
+    // copy patterns corresponding to this ROF
+    rof.setFirstEntry(firstEntry);
   }
+  clusVec.swap(clusVecTmp);
+  pattVec.swap(pattVecTmp);
 }
 
 ///_______________________________________
@@ -544,6 +651,8 @@ DataProcessorSpec getSTFDecoderSpec(const STFDecoderInp& inp)
       {"unmute-extra-lanes", VariantType::Bool, false, {"allow extra lanes to be as verbose as 1st one"}},
       {"allow-empty-rofs", VariantType::Bool, false, {"record ROFs w/o any hit"}},
       {"ignore-noise-map", VariantType::Bool, false, {"do not mask pixels flagged in the noise map"}},
+      {"enforce-continuous-rof-with-calib", VariantType::Bool, false, {"enforce ensureContinuousROF call even when calibration data is requested (not recommended)"}},
+      {"disable-rectify-continuous-rof", VariantType::Bool, false, {"do not rectify clusters and digits after ensureContinuousROF (not recommended)"}},
       {"accept-rof-rampup-data", VariantType::Bool, false, {"do not discard data during ROF ramp up"}},
       {"rof-length-error-freq", VariantType::Float, 60.f, {"do not report ROF length error more frequently than this value, disable if negative"}},
       {"ignore-cluster-dictionary", VariantType::Bool, false, {"do not use cluster dictionary, always store explicit patterns"}}}};
diff --git a/Detectors/Raw/README.md b/Detectors/Raw/README.md
index 557245030b980..1fece239723ec 100644
--- a/Detectors/Raw/README.md
+++ b/Detectors/Raw/README.md
@@ -548,6 +548,87 @@ list of detectors for which raw outputs are discarded.
 
 The raw data will be propagated (if present) only if the detector is selected in `--onlyDet` and `NOT` selected in `--non-raw-only-det`. The non-raw data will be propagated (if defined for the given detector and present in the file) only if the detector is selected in `--onlyDet` and `NOT` selected in `--raw-only-det`.
 
+## Raw TF (DD format) dumping workflow
+
+Use `o2-raw-tf-dump-workflow` to dump raw TF data in DD format. The options are:
+```
+--dataspec arg (=tst:TST/A)
+```
+Optional selection string for the data to be dumped, e.g. the same string supplied to the input raw proxy
+```
+--triggerspec arg (="")
+```
+Selection string for the external trigger to dump particular TF. Must be contained in the `--dataspec`. The workflow will loop over all available trigger inputs, interpreting them as span<bool>: any `span[0]==true` will trigger writing process (modulo throttling).
+```
+--include-deadbeef (false)
+```
+Include data with DPL-generated 0xdeadbeef subspecs (for data missing in the original TF).
+```
+--exclude-trigger-specs (="")
+
+```
+Ignore trigger seen in these inputs of triggerspec (e.g. to suppress noisy trigger inputs)
+```
+--max-dump-rate arg (=0)
+```
+Fraction in (`%`) of TFs to dump. W/o external trigger: random(>0) or periodic(<0) rejection. With external trigger: throttle dumping to have the lowest estimated acceptance rate compatible with this rate.
+```
+--rate-est-conf-limit arg (=0.05)
+```
+Quantile for the lowest rate estimate confidence limit
+```
+--max-warn arg (=5)
+```
+If throttling, max allowed warnings
+```
+--mute-warn-period arg (=100)
+```
+Mute warnings about throttling for this number of TFs
+```
+--output-dir arg (=none)
+```
+Dumped TFs output directory, must exist. `none` means current dir., `/dev/null`: ignort writing (dry run)
+```
+--meta-output-dir arg (=/dev/null)
+```
+TF metadata output directory, must exist (if not /dev/null, in which case the metadata will not be created)
+```
+--md5-for-meta (false)
+```
+Fill CTF file MD5 sum in the metadata file
+```
+--min-file-size arg (=0)
+```
+Accumulate TFs until given file size reached
+```
+--max-file-size arg (=0)
+```
+If > 0, try to avoid exceeding given file size, also used for space check
+```
+--max-tf-per-file arg (=0)
+```
+If > 0, avoid storing more than requested CTFs per file
+```
+--require-free-disk arg (=0)
+```
+Pause writing op. if available disk space is below this margin, in bytes if >0, as a fraction of total if <0
+```
+--wait-for-free-disk arg (=10)
+```
+If paused due to the low disk space, recheck after this time (in s)
+```
+--max-wait-for-free-disk arg (=60)
+```
+Produce fatal if paused due to the low disk space for more than time in seconds.
+```
+--verbosity-level (=0)
+```
+Verbose mode: 1: decision on every TF, 2: details of saved TF, 3: more details.
+```
+--ignore-partition-run-dir
+```
+Do not creare partition-run directory in output-dir
+
 ## TF rate limiting
 
 To apply TF rate limiting (i.e. make sure that no more than N TFs are in processing) provide `--timeframes-rate-limit <N> --timeframes-rate-limit-ipcid <IPCID>`
diff --git a/Detectors/Raw/TFReaderDD/CMakeLists.txt b/Detectors/Raw/TFReaderDD/CMakeLists.txt
index 12ecc9ca8795d..f87d1b5a7704e 100644
--- a/Detectors/Raw/TFReaderDD/CMakeLists.txt
+++ b/Detectors/Raw/TFReaderDD/CMakeLists.txt
@@ -26,3 +26,10 @@ o2_add_executable(tf-reader-workflow
                   SOURCES src/TFReaderSpec.cxx
                           src/tf-reader-workflow.cxx
                   PUBLIC_LINK_LIBRARIES O2::TFReaderDD)
+
+
+o2_add_executable(tf-dump-workflow
+                  COMPONENT_NAME raw
+                  SOURCES src/RawTFDumpSpec.cxx
+                          src/tf-data-dump-workflow.cxx
+                  PUBLIC_LINK_LIBRARIES O2::TFReaderDD)
diff --git a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h
index 340027642b74c..eeabf8e8d4117 100644
--- a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h
+++ b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFile.h
@@ -21,6 +21,8 @@
 #include <vector>
 
 #include <Headers/DataHeader.h>
+#include "Framework/DataSpecUtils.h"
+#include "Framework/OutputSpec.h"
 #include "Framework/Logger.h"
 
 namespace o2
@@ -151,13 +153,13 @@ struct SubTimeFrameFileMeta {
   ///
   std::uint64_t mWriteTimeMs;
 
-  auto getTimePoint()
+  auto getTimePoint() const
   {
     using namespace std::chrono;
     return time_point<system_clock, milliseconds>{milliseconds{mWriteTimeMs}};
   }
 
-  std::string getTimeString()
+  std::string getTimeString() const
   {
     using namespace std::chrono;
     std::time_t lTime = system_clock::to_time_t(getTimePoint());
@@ -167,6 +169,11 @@ struct SubTimeFrameFileMeta {
     return lTimeStream.str();
   }
 
+  const std::string info() const
+  {
+    return fmt::format("Size in file: {} Time: {} Version: {}", mStfSizeInFile, getTimeString(), mStfFileVersion);
+  }
+
   SubTimeFrameFileMeta(const std::uint64_t pStfSize)
     : SubTimeFrameFileMeta()
   {
@@ -220,6 +227,11 @@ struct SubTimeFrameFileDataIndex {
       static_assert(sizeof(DataIndexElem) == 48,
                     "DataIndexElem changed -> Binary compatibility is lost!");
     }
+
+    const std::string info() const
+    {
+      return fmt::format("DH: {} Cnt:{} Size:{} Offset:{}", o2::framework::DataSpecUtils::describe(o2::framework::OutputSpec{mDataOrigin, mDataDescription, mSubSpecification}), mDataBlockCnt, mSize, mOffset);
+    }
   };
 
   SubTimeFrameFileDataIndex() = default;
@@ -240,6 +252,8 @@ struct SubTimeFrameFileDataIndex {
     return sizeof(o2::header::DataHeader) + (sizeof(DataIndexElem) * mDataIndex.size());
   }
 
+  const std::vector<DataIndexElem>& getDataIndex() const { return mDataIndex; }
+
   friend std::ostream& operator<<(std::ostream& pStream, const SubTimeFrameFileDataIndex& pIndex);
 
  private:
diff --git a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h
index 3b926e0a79206..2b7d2b7ab8e74 100644
--- a/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h
+++ b/Detectors/Raw/TFReaderDD/include/TFReaderDD/SubTimeFrameFileReader.h
@@ -46,11 +46,11 @@ class SubTimeFrameFileReader
  public:
 
   SubTimeFrameFileReader() = delete;
-  SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask);
+  SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask, int verb, bool sup0xccdb, bool repaireHeaders, bool rejectDistSTF);
   ~SubTimeFrameFileReader();
 
   /// Read a single TF from the file
-  std::unique_ptr<MessagesPerRoute> read(fair::mq::Device* device, const std::vector<o2f::OutputRoute>& outputRoutes, const std::string& rawChannel, size_t slice, bool sup0xccdb, int verbosity);
+  std::unique_ptr<MessagesPerRoute> read(fair::mq::Device* device, const std::vector<o2f::OutputRoute>& outputRoutes, const std::string& rawChannel, size_t slice);
 
   /// Tell the current position of the file
   inline std::uint64_t position() const { return mFileMapOffset; }
@@ -76,6 +76,13 @@ class SubTimeFrameFileReader
   std::uint64_t mFileMapOffset = 0;
   std::uint64_t mFileSize = 0;
 
+  int mVerbosity = 0;
+  bool mSup0xccdb = true;
+  bool mRepaireHeaders = true;
+  bool mRejectDistSTF = true;
+
+  const std::string describeHeader(const o2::header::DataHeader& hd, bool full = false) const;
+
   // helper to make sure written chunks are buffered, only allow pointers
   template <typename pointer,
             typename = std::enable_if_t<std::is_pointer<pointer>::value>>
diff --git a/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx
new file mode 100644
index 0000000000000..8d9986eddef48
--- /dev/null
+++ b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.cxx
@@ -0,0 +1,618 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+#include "Framework/WorkflowSpec.h"
+#include "Framework/ConfigParamRegistry.h"
+#include "Framework/RawDeviceService.h"
+#include "Framework/DataProcessingHelpers.h"
+#include "Framework/InputRecordWalker.h"
+#include "Framework/Task.h"
+#include "Framework/DataTakingContext.h"
+#include "Framework/TimingInfo.h"
+#include "DataFormatsParameters/GRPECSObject.h"
+#include "DetectorsCommonDataFormats/FileMetaData.h"
+#include "DetectorsRaw/RDHUtils.h"
+#include "RawTFDumpSpec.h"
+#include "TFReaderDD/SubTimeFrameFile.h"
+#include "CommonUtils/NameConf.h"
+#include "CommonUtils/FileSystemUtils.h"
+#include "CommonUtils/StringUtils.h"
+#include "Algorithm/RangeTokenizer.h"
+#include <unistd.h>
+#include <TMath.h>
+#include <filesystem>
+#include <random>
+#include <set>
+
+namespace o2::rawdd
+{
+namespace o2h = o2::header;
+using namespace o2::framework;
+using DataHeader = o2::header::DataHeader;
+using DetID = o2::detectors::DetID;
+using ios = std::ios_base;
+
+class RawTFDump : public Task
+{
+ public:
+  static constexpr o2h::DataDescription DESCRaw{"RAWDATA"}, DESCCRaw{"CRAWDATA"};
+
+  RawTFDump(const std::string& trigger);
+  void init(InitContext& ic) final;
+  void run(ProcessingContext& pc) final;
+  void endOfStream(EndOfStreamContext& ec) final;
+
+ private:
+  bool triggerTF(ProcessingContext& pc);
+  void updateTimeDependentParams(ProcessingContext& pc);
+  void prepareTFForWriting(ProcessingContext& pc);
+  size_t getTFSizeInFile() const;
+  size_t getCurrentFileSize();
+  void prepareTFFile();
+  void closeTFFile();
+  bool checkFreeSpace(ProcessingContext& pc);
+  std::string reportRates() const;
+
+  SubTimeFrameFileDataIndex mTFDataIndex;
+  std::vector<std::pair<size_t, const void*>> mTFData;
+  std::map<EquipmentIdentifier, std::tuple<size_t, size_t, size_t, const void*>> mDataMap;
+  std::vector<InputSpec> mFilter{};
+  std::vector<InputSpec> mTriggerFilter{};
+  std::vector<InputSpec> mExclTriggerFilter{};
+
+  size_t mTFSize = 0;
+  size_t mMinFileSize = 0; // if > 0, accumulate TFs in the same file until the total size exceeds this minimum
+  size_t mMaxFileSize = 0; // if > MinSize, and accumulated size will exceed this value, stop accumulation (even if mMinFileSize is not reached)
+
+  int mNTFsSeen = 0;          // total number of TFs seen
+  int mNTFsExtTrig = 0;       // total nunber of TFs externally triggered
+  int mNTFsAccepted = 0;      // total number of TFs written
+  int mNTFsInFile = 0;        // total number of TFs accumulated in the current file
+  int mNTFFiles = 0;          // total number of TF files written
+  int mLastWarned = 0;        // TF when last warned about throttling
+  int mMaxTFPerFile = 0;      // max TFs per files to store
+  int mNWarnThrottle = 0;     // number of times we warned about the throttling
+  int mMaxWarnThrottle = 0;   // max allowed warnings about the throttling
+  int mWarnThrottleTF = 0;    // min period (in TFs) between the warnings about the throttling
+  int mWaitDiskFull = 0;      // if mCheckDiskFull triggers, pause for this amount of ms before new attempt
+  int mWaitDiskFullMax = -1;  // produce fatal mCheckDiskFull block the workflow for more than this time (in ms)
+  float mCheckDiskFull = 0.;  // wait for if available abs. disk space is < mCheckDiskFull (if >0) or if its fraction is < -mCheckDiskFull (if <0)
+  float mMaxAccRate = 0.f;    // max acceptance rate
+  float mConfLim = 0.05f;     // confidence limit for rate esimate (lower quantile)
+  float mRateEstAccLow = 0.f; // lower limit on accepted TFs rate
+  float mRateEstAccUpp = 0.f; // upper limit on accepted TFs rate
+  float mRateEstTrgLow = 0.f; // lower limit on triggered TFs rate
+  float mRateEstTrgUpp = 0.f; // upper limit on triggered TFs rate
+
+  bool mFillMD5 = false;
+  bool mWriteTF = true; // for dry run
+  bool mStoreMetaFile = false;
+  bool mCreateRunEnvDir = true;
+  bool mAcceptCurrentTF = false;
+  bool mRejectDEADBEEF = false;
+  bool mRejectDistSTF = true;
+  int mVerbose = 0;
+  std::vector<uint32_t> mTFOrbits{}; // 1st orbits of TF accumulated in current file
+  o2::framework::DataTakingContext mDataTakingContext{};
+  o2::framework::TimingInfo mTimingInfo{};
+
+  std::string mTrigger{}; // external trigger input
+  std::string mExclTriggerSpecs{}; // trigger specs to ignore
+  std::string mHostName{};
+  std::string mTFDir{};
+  std::string mTFMetaFileDir = "/dev/null";
+  std::string mCurrentTFFileName{};
+  std::string mCurrentTFFileNameFull{};
+  std::string mCurrentTFFileNameFullTmp{};
+  std::string mMetaDataType{};
+
+  static constexpr size_t MiB = 1ul << 20;
+  static constexpr std::streamsize sBuffSize = MiB; // 1 MiB
+  static constexpr std::streamsize sChunkSize = 512;
+  static const std::string TMPFileEnding;
+  std::unique_ptr<char[]> mFileBuf;
+  std::ofstream mFile;
+  std::uniform_real_distribution<double> mUniformDist{0.0, 100.0};
+  std::default_random_engine mRGen;
+
+  // helper to make sure the written blocks are buffered
+  template <
+    typename pointer,
+    typename std::enable_if<
+      std::is_pointer<pointer>::value &&                      // pointers only
+      (std::is_void<std::remove_pointer_t<pointer>>::value || // void* or standard layout!
+       std::is_standard_layout<std::remove_pointer_t<pointer>>::value)>::type* = nullptr>
+  void buffered_write(const pointer p, std::streamsize pCount)
+  {
+    // make sure we're not doing a short write
+    assert((pCount % sizeof(std::conditional_t<std::is_void<std::remove_pointer_t<pointer>>::value,
+                                               char, std::remove_pointer_t<pointer>>) ==
+            0) &&
+           "Performing short write?");
+
+    const char* lPtr = reinterpret_cast<const char*>(p);
+    // avoid the optimization if the write is large enough
+    if (pCount >= sBuffSize) {
+      mFile.write(lPtr, pCount);
+    } else {
+      // split the write to smaller chunks
+      while (pCount > 0) {
+        const auto lToWrite = std::min(pCount, sChunkSize);
+        assert(lToWrite > 0 && lToWrite <= sChunkSize && lToWrite <= pCount);
+
+        mFile.write(lPtr, lToWrite);
+        lPtr += lToWrite;
+        pCount -= lToWrite;
+      }
+    }
+  }
+};
+
+const std::string RawTFDump::TMPFileEnding{".part"};
+
+//________________________________________
+RawTFDump::RawTFDump(const std::string& trigger) : mTrigger{trigger}
+{
+  mTriggerFilter = select(trigger.c_str());
+  mFileBuf = std::make_unique<char[]>(sBuffSize);
+  mFile.rdbuf()->pubsetbuf(mFileBuf.get(), sBuffSize);
+  mFile.clear();
+  mFile.exceptions(std::fstream::failbit | std::fstream::badbit);
+}
+
+//________________________________________
+void RawTFDump::init(InitContext& ic)
+{
+  mRGen = std::default_random_engine(getpid());
+  mTFMetaFileDir = ic.options().get<std::string>("meta-output-dir");
+  if (mTFMetaFileDir != "/dev/null") {
+    mTFMetaFileDir = o2::utils::Str::rectifyDirectory(mTFMetaFileDir);
+    mStoreMetaFile = true;
+    mFillMD5 = ic.options().get<bool>("md5-for-meta");
+  }
+
+  mTFDir = ic.options().get<std::string>("output-dir");
+  if (mTFDir != "/dev/null") {
+    mTFDir = o2::utils::Str::rectifyDirectory(mTFDir);
+    mWriteTF = true;
+  } else {
+    mWriteTF = false;
+    mStoreMetaFile = false;
+  }
+  mRejectDistSTF = !ic.options().get<bool>("include-dist-stf");
+  mRejectDEADBEEF = !ic.options().get<bool>("include-deadbeef");
+  mCreateRunEnvDir = !ic.options().get<bool>("ignore-partition-run-dir");
+  mMinFileSize = ic.options().get<int64_t>("min-file-size");
+  mMaxFileSize = ic.options().get<int64_t>("max-file-size");
+  mMaxTFPerFile = ic.options().get<int>("max-tf-per-file");
+  mMaxAccRate = ic.options().get<float>("max-dump-rate");
+  float cl = ic.options().get<float>("rate-est-conf-limit");
+  if (mConfLim < 0.001 || mConfLim > 0.32) {
+    LOGP(warn, "Bad confidence limit {} for rate estimate, setting to default {}", cl, mConfLim);
+  } else {
+    mConfLim = cl;
+  }
+  mMaxWarnThrottle = ic.options().get<int>("max-warn");
+  mWarnThrottleTF = ic.options().get<int>("mute-warn-period");
+
+  mVerbose = ic.options().get<int>("verbosity-level");
+  mExclTriggerSpecs = ic.options().get<std::string>("exclude-trigger-specs");
+  if (!mExclTriggerSpecs.empty()) {
+    mExclTriggerFilter = select(mExclTriggerSpecs.c_str());
+  }
+  if (mTrigger.empty()) {
+    if (mMaxAccRate >= 0.f) {
+      LOGP(info, "Will accept randomly {}% of TFs", mMaxAccRate);
+    } else {
+      LOGP(info, "Will accept every {}-th TF", int(std::ceil(-100.f / mMaxAccRate)));
+    }
+  } else {
+    mMaxAccRate = std::abs(mMaxAccRate);
+    LOGP(info, "Will limit TFs triggered with {} by {}% at most", mTrigger, mMaxAccRate);
+    if (!mExclTriggerFilter.empty()) {
+      LOGP(info, "Inputs excluded from the trigger: {}", mExclTriggerSpecs);
+    }
+  }
+
+  if (mWriteTF) {
+    if (mMinFileSize > 0) {
+      LOGP(info, "Multiple TFs will be accumulated in the file until its size exceeds {}{}",
+           mMinFileSize, mMaxFileSize > mMinFileSize ? fmt::format(" but does not exceed {} B", mMaxFileSize) : std::string{});
+    }
+  }
+
+  mCheckDiskFull = ic.options().get<float>("require-free-disk");
+  mWaitDiskFull = 1000 * ic.options().get<float>("wait-for-free-disk");
+  mWaitDiskFullMax = 1000 * ic.options().get<float>("max-wait-for-free-disk");
+
+  char hostname[_POSIX_HOST_NAME_MAX];
+  gethostname(hostname, _POSIX_HOST_NAME_MAX);
+  mHostName = hostname;
+  mHostName = mHostName.substr(0, mHostName.find('.'));
+}
+
+//________________________________________
+void RawTFDump::run(ProcessingContext& pc)
+{
+  mNTFsSeen++;
+  updateTimeDependentParams(pc);
+  mAcceptCurrentTF = triggerTF(pc);
+  if (mAcceptCurrentTF) {
+    prepareTFForWriting(pc);
+  } else {
+    return;
+  }
+
+  prepareTFFile();
+  if (mWriteTF && checkFreeSpace(pc)) { // write data
+    try {
+      size_t lTFSizeInFile = getTFSizeInFile();
+      SubTimeFrameFileMeta lTFFileMeta(lTFSizeInFile);
+      lTFFileMeta.mWriteTimeMs = mTimingInfo.creation;
+
+      mFile << lTFFileMeta;  // Write DataHeader + SubTimeFrameFileMeta
+      mFile << mTFDataIndex; // Write DataHeader + SubTimeFrameFileDataIndex
+
+      for (const auto& eqEntry : mDataMap) {
+        auto& [lSize, lCnt, lEntry, lHeader] = eqEntry.second;
+        for (size_t part = 0; part < lCnt; part++) {
+          const auto& dataPtr = mTFData[lEntry + part];
+          DataHeader hdToWrite = *reinterpret_cast<const DataHeader*>(lHeader); // make a local DataHeader copy to clear flagsNextHeader bit and set the parts correctly
+          hdToWrite.flagsNextHeader = 0;
+          hdToWrite.splitPayloadIndex = part;
+          hdToWrite.payloadSize = dataPtr.first;
+          if (mVerbose > 2) {
+            LOGP(info, "Writing part:{}/{} of {} | TFCounter:{} part{}/{}, size:{}", part, lCnt, DataSpecUtils::describe(OutputSpec{hdToWrite.dataOrigin, hdToWrite.dataDescription, hdToWrite.subSpecification}), hdToWrite.firstTForbit, hdToWrite.splitPayloadIndex, hdToWrite.splitPayloadParts, hdToWrite.payloadSize);
+          }
+          buffered_write(reinterpret_cast<const char*>(&hdToWrite), sizeof(DataHeader));
+          buffered_write(dataPtr.second, hdToWrite.payloadSize);
+        }
+      }
+      mFile.flush(); // flush the buffer and check the state
+      mTFOrbits.push_back(mTimingInfo.firstTForbit);
+      mNTFsInFile++;
+    } catch (const std::ios_base::failure& eFailExc) {
+      LOGP(error, "Writing of TF {} to file {} failed. error={}", mTimingInfo.tfCounter, mCurrentTFFileNameFullTmp, eFailExc.what());
+    }
+  }
+  // cleanup
+  mTFData.clear();
+  mDataMap.clear();
+  mTFDataIndex.clear();
+  mTFSize = 0;
+}
+
+//____________________________________________________________
+void RawTFDump::endOfStream(EndOfStreamContext&)
+{
+  closeTFFile();
+  LOGP(info, "Dumped {} TFs to {} files", mNTFsAccepted, mNTFFiles);
+  if (!mTriggerFilter.empty()) {
+    LOGP(info, "External trigger summary: {}", reportRates());
+  }
+}
+
+//________________________________________
+size_t RawTFDump::getTFSizeInFile() const
+{
+  return SubTimeFrameFileMeta::getSizeInFile() + mTFDataIndex.getSizeInFile() + mTFSize;
+}
+
+//________________________________________
+size_t RawTFDump::getCurrentFileSize()
+{
+  return mFile.is_open() ? size_t(mFile.tellp()) : 0;
+}
+
+//___________________________________________________________________
+void RawTFDump::prepareTFFile()
+{
+  if (!mWriteTF) {
+    return;
+  }
+  bool needToOpen;
+  if (!mFile.is_open()) {
+    needToOpen = true;
+  } else {
+    auto currSize = getCurrentFileSize();
+    if ((mNTFsInFile >= mMaxTFPerFile) ||
+        (currSize >= mMinFileSize) ||                                                         // min size exceeded, may close the file.
+        (currSize && mMaxFileSize > mMinFileSize && ((currSize + mTFSize) > mMaxFileSize))) { // this is not the 1st TF in the file and the new size will exceed allowed max
+      needToOpen = true;
+    } else {
+      LOGP(info, "Will add new TF of size {} to existing file of size {} with {} TFs", mTFSize, currSize, mNTFsInFile);
+      needToOpen = false;
+    }
+  }
+  if (needToOpen) {
+    closeTFFile();
+    auto TFDir = mTFDir.empty() ? o2::utils::Str::rectifyDirectory("./") : mTFDir;
+    if (mCreateRunEnvDir && !mDataTakingContext.envId.empty() && (mDataTakingContext.envId != o2::framework::DataTakingContext::UNKNOWN)) {
+      TFDir += fmt::format("{}_{}tf/", mDataTakingContext.envId, mDataTakingContext.runNumber);
+      if (!TFDir.empty()) {
+        o2::utils::createDirectoriesIfAbsent(TFDir);
+        LOGP(info, "Created {} directory for TFs output", TFDir);
+      }
+    }
+    mCurrentTFFileName = o2::base::NameConf::getRawTFFileName(mTimingInfo.runNumber, mTimingInfo.firstTForbit, mTimingInfo.tfCounter, mHostName);
+    mCurrentTFFileNameFull = fmt::format("{}{}", TFDir, mCurrentTFFileName);
+    mCurrentTFFileNameFullTmp = TMPFileEnding.empty() ? mCurrentTFFileNameFull : o2::utils::Str::concat_string(mCurrentTFFileNameFull, TMPFileEnding);
+    mFile.open(mCurrentTFFileNameFullTmp.c_str(), ios::binary | ios::trunc | ios::out | ios::ate);
+    LOGP(info, "Opened new raw-tf dump file {}[{}]", mCurrentTFFileNameFull, TMPFileEnding);
+    mNTFFiles++;
+  }
+}
+
+//___________________________________________________________________
+void RawTFDump::updateTimeDependentParams(ProcessingContext& pc)
+{
+  namespace GRPECS = o2::parameters::GRPECS;
+  mTimingInfo = pc.services().get<o2::framework::TimingInfo>();
+  if (mTimingInfo.globalRunNumberChanged) {
+    mDataTakingContext = pc.services().get<DataTakingContext>();
+    // determine the output type for the TF metadata
+    mMetaDataType = GRPECS::getRawDataPersistencyMode(mDataTakingContext.runType, mDataTakingContext.forcedRaw);
+  }
+}
+
+//___________________________________________________________________
+void RawTFDump::closeTFFile()
+{
+  if (!mFile.is_open()) {
+    return;
+  }
+  try {
+    LOGP(info, "Closing output file {}[{}]", mCurrentTFFileNameFull, TMPFileEnding);
+    mFile.close();
+    // write TF file metaFile data
+    if (mStoreMetaFile) {
+      o2::dataformats::FileMetaData TFMetaData;
+      if (!TFMetaData.fillFileData(mCurrentTFFileNameFullTmp, mFillMD5, TMPFileEnding)) {
+        throw std::runtime_error("metadata file was requested but not created");
+      }
+      TFMetaData.setDataTakingContext(mDataTakingContext);
+      TFMetaData.type = mMetaDataType;
+      TFMetaData.priority = "high";
+      TFMetaData.tfOrbits.swap(mTFOrbits);
+      auto metaFileNameTmp = fmt::format("{}{}.tmp", mTFMetaFileDir, mCurrentTFFileName);
+      auto metaFileName = fmt::format("{}{}.done", mTFMetaFileDir, mCurrentTFFileName);
+      try {
+        std::ofstream metaFileOut(metaFileNameTmp);
+        metaFileOut << TFMetaData;
+        metaFileOut.close();
+        if (!TMPFileEnding.empty()) {
+          std::filesystem::rename(mCurrentTFFileNameFullTmp, mCurrentTFFileNameFull);
+        }
+        std::filesystem::rename(metaFileNameTmp, metaFileName);
+        LOGP(info, "wrote meta file {}", metaFileName);
+      } catch (std::exception const& e) {
+        LOGP(error, "Failed to store TF meta data file {}, reason {}", metaFileName, e.what());
+      }
+    } else if (!TMPFileEnding.empty()) {
+      std::filesystem::rename(mCurrentTFFileNameFullTmp, mCurrentTFFileNameFull);
+    }
+  } catch (std::exception const& e) {
+    LOGP(error, "Failed to finalize TF file {}, reason: ", mCurrentTFFileNameFull, e.what());
+  }
+  mTFOrbits.clear();
+  mNTFsInFile = 0;
+}
+
+//________________________________________
+bool RawTFDump::checkFreeSpace(ProcessingContext& pc)
+{
+  int totalWait = 0, nwaitCycles = 0;
+  while (mCheckDiskFull) {
+    constexpr int showFirstN = 10, prsecaleWarnings = 50;
+    try {
+      const auto si = std::filesystem::space(mCurrentTFFileNameFullTmp);
+      std::string wmsg{};
+      if (mCheckDiskFull > 0.f && si.available < mCheckDiskFull) {
+        nwaitCycles++;
+        wmsg = fmt::format("Disk has {} MiB available while at least {} MiB is requested, wait for {} ms (on top of {} ms)", si.available / MiB, size_t(mCheckDiskFull) / MiB, mWaitDiskFull, totalWait);
+      } else if (mCheckDiskFull < 0.f && float(si.available) / si.capacity < -mCheckDiskFull) { // relative margin requested
+        nwaitCycles++;
+        wmsg = fmt::format("Disk has {:.3f}% available while at least {:.3f}% is requested, wait for {} ms (on top of {} ms)", si.capacity ? float(si.available) / si.capacity * 100.f : 0., -mCheckDiskFull, mWaitDiskFull, totalWait);
+      } else {
+        nwaitCycles = 0;
+      }
+      if (nwaitCycles) {
+        if (mWaitDiskFullMax > 0 && totalWait > mWaitDiskFullMax) {
+          closeTFFile(); // try to save whatever we have
+          LOGP(fatal, "Disk has {} MiB available out of {} MiB after waiting for {} ms", si.available / MiB, si.capacity / MiB, mWaitDiskFullMax);
+        }
+        if (nwaitCycles < showFirstN + 1 || (prsecaleWarnings && (nwaitCycles % prsecaleWarnings) == 0)) {
+          LOGP(alarm, "{}", wmsg);
+        }
+        pc.services().get<RawDeviceService>().waitFor((unsigned int)(mWaitDiskFull));
+        totalWait += mWaitDiskFull;
+        continue;
+      }
+    } catch (std::exception const& e) {
+      LOGP(fatal, "unable to query disk space info for path {}, reason {}", mCurrentTFFileNameFull, e.what()); // do we want this?
+    }
+    break;
+  }
+  return true;
+}
+
+//________________________________________
+bool RawTFDump::triggerTF(ProcessingContext& pc)
+{
+  bool trig = false;
+  if (mTrigger.empty()) { // random
+    if (mMaxAccRate > 0.f) {
+      trig = (mUniformDist(mRGen) <= mMaxAccRate);
+    } else if (mMaxAccRate < 0.f) {
+      trig = (mTimingInfo.tfCounter % int(std::ceil(-100.f / mMaxAccRate))) == 0;
+    }
+  } else {
+    for (auto const& ref : InputRecordWalker(pc.inputs(), mTriggerFilter)) {
+      auto const* dh = DataRefUtils::getHeader<DataHeader*>(ref);
+      if (!dh) {
+        LOGP(error, "Failed to extract header for trigger input");
+        continue;
+      }
+      auto extTrig = DataRefUtils::as<bool>(ref);
+      if (mVerbose > 1 || (mVerbose > 0 && extTrig.size() > 0 && extTrig[0])) {
+        LOGP(info, "trigger input {}, part: {} of {}, payload {}, 1stTFOrbit: {} TF: {} | span size: {} span[0]={}",
+             DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}),
+             dh->splitPayloadIndex, dh->splitPayloadParts, dh->payloadSize, dh->firstTForbit, dh->tfCounter, extTrig.size(), extTrig.size() > 0 ? extTrig[0] : false);
+      }
+      if (extTrig.size() && extTrig[0]) {
+        // is the input with this trigger vetoed?
+        bool veto = false;
+        for (const auto& excl : mExclTriggerFilter) {
+          if (DataRefUtils::match(ref, excl)) {
+            if (mVerbose > 0) {
+              LOGP(info, "ignoring trigger from black-listed {}", DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}));
+            }
+            veto = true;
+            break;
+          }
+        }
+        if (veto) {
+          continue;
+        }
+        trig = true;
+        break;
+      }
+    }
+    if (trig) { // do we need to throttle?
+      mNTFsExtTrig++;
+      mRateEstTrgLow = TMath::ChisquareQuantile(mConfLim, 2 * (mNTFsExtTrig)) / (2 * mNTFsSeen);
+      mRateEstTrgUpp = TMath::ChisquareQuantile(1. - mConfLim, 2 * (mNTFsExtTrig + 1)) / (2 * mNTFsSeen);
+      mRateEstAccLow = TMath::ChisquareQuantile(mConfLim, 2 * (mNTFsAccepted)) / (2 * mNTFsSeen);
+      mRateEstAccUpp = TMath::ChisquareQuantile(1. - mConfLim, 2 * (mNTFsAccepted + 1)) / (2 * mNTFsSeen);
+      if (mRateEstAccLow > 0.01 * mMaxAccRate) { // current lowest estimate on the acceptance rate exceeds desired limit -> ignore trigger
+        trig = false;
+        // do we need to warn?
+        if ((mNTFsSeen - mLastWarned) > mWarnThrottleTF && ((mNWarnThrottle < mMaxWarnThrottle) || mMaxWarnThrottle < 0)) {
+          mLastWarned = mNTFsSeen;
+          std::string swarn = reportRates();
+          if (++mNWarnThrottle == mMaxWarnThrottle) {
+            swarn += " Will not warn anymore.";
+          } else {
+            swarn += fmt::format(" Will suppress this warnings for {} TFs", mWarnThrottleTF);
+          }
+          LOGP(alarm, "Ignoring TF triggered for dumping: {}", swarn);
+        }
+      }
+    }
+  }
+  if (trig) {
+    mNTFsAccepted++;
+  }
+  if (mVerbose > 0) {
+    LOGP(info, "TF#{} (slice#{}) will{} be written, {}", mTimingInfo.tfCounter, mTimingInfo.timeslice, trig ? "" : " not", reportRates());
+  }
+  return trig;
+}
+
+//________________________________________
+void RawTFDump::prepareTFForWriting(ProcessingContext& pc)
+{
+  for (auto const& ref : InputRecordWalker(pc.inputs(), mFilter)) {
+    auto const* dh = DataRefUtils::getHeader<DataHeader*>(ref);
+    if (!dh) {
+      LOGP(error, "Failed to extract header");
+      continue;
+    }
+    if ((dh->subSpecification == 0xdeadbeef && mRejectDEADBEEF) ||
+        (dh->dataOrigin == o2::header::gDataOriginFLP && dh->dataDescription == o2::header::gDataDescriptionDISTSTF && mRejectDistSTF)) {
+      if (mVerbose > 2) {
+        LOGP(info, "Rejecting {}", DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}));
+      }
+      continue;
+    }
+    const auto payloadSize = DataRefUtils::getPayloadSize(ref);
+    const auto lHdrDataSize = sizeof(DataHeader) + payloadSize;
+    mTFSize += lHdrDataSize;
+
+    auto& [lSize, lCnt, lEntry, lHeader] = mDataMap[EquipmentIdentifier(*dh)];
+    if (!lCnt) {
+      lEntry = mTFData.size(); // flag where the data of this spec starts
+      lHeader = ref.header;
+    }
+    lSize += lHdrDataSize;
+    lCnt++;
+    mTFData.push_back({payloadSize, ref.payload});
+    if (mVerbose > 2) {
+      const auto* dph = DataRefUtils::getHeader<DataProcessingHeader*>(ref);
+      LOGP(info, "{}, part: {}({}) of {}, payload {}({}), 1stTFOrbit: {} TF: {}, creation: {} | counter:{} size:{} entry:{}",
+           DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}),
+           dh->splitPayloadIndex, lCnt - 1, dh->splitPayloadParts, dh->payloadSize, payloadSize, dh->firstTForbit, dh->tfCounter, dph ? dph->creation : -1UL, lCnt, lSize, lEntry);
+      // if (o2::raw::RDHUtils::checkRDH(ref.payload)) {
+      //   o2::raw::RDHUtils::printRDH(ref.payload);
+      //  }
+    }
+  }
+
+  // build the index
+  {
+    LOGP(info, "Creating dump image for TF {} of run {}, starting orbit {}, size = {}", mTimingInfo.tfCounter, mTimingInfo.runNumber, mTimingInfo.firstTForbit, mTFSize);
+    std::uint64_t lCurrOff = 0;
+    for (const auto& eqEntry : mDataMap) {
+      const auto& eq = eqEntry.first;
+      auto& [lSize, lCnt, lEntry, lHeader] = eqEntry.second;
+      assert(lSize > sizeof(DataHeader));
+
+      OutputSpec spec{eq.mDataOrigin, eq.mDataDescription, eq.mSubSpecification};
+      if (mVerbose > 1) {
+        LOGP(info, "{} : {} parts of size {} entry {}| offset: {}", DataSpecUtils::describe(spec), lCnt, lSize, lEntry, lCurrOff);
+      }
+      mTFDataIndex.AddStfElement(eq, lCnt, lCurrOff, lSize);
+      lCurrOff += lSize;
+    }
+  }
+}
+
+//____________________________________________________________
+std::string RawTFDump::reportRates() const
+{
+  std::string rep = fmt::format("{} TFs seen, {} accepted", mNTFsSeen, mNTFsAccepted);
+  if (!mTrigger.empty()) {
+    rep += fmt::format(", {} ext.triggered,  est.rate: [{:.2e}:{:.2e}]/[{:.2e}:{:.2e}].", mNTFsExtTrig, mRateEstAccLow, mRateEstAccUpp, mRateEstTrgLow, mRateEstTrgUpp);
+  }
+  return rep;
+}
+
+//__________________________________________________________
+DataProcessorSpec getRawTFDumpSpec(const std::string& inpconfig, const std::string& trigger)
+{
+  std::vector<InputSpec> inputs = select(inpconfig.c_str());
+  return DataProcessorSpec{
+    "raw-tf-dump",
+    inputs,
+    {},
+    AlgorithmSpec{adaptFromTask<RawTFDump>(trigger)},
+    Options{
+      {"include-deadbeef", VariantType::Bool, false, {"Include DPL-generated 0xdeadbeef subspecs for missing data"}},
+      {"include-dist-stf", VariantType::Bool, false, {"Include FLP/DISTSUBTIMEFRAME input"}},
+      {"exclude-trigger-specs", VariantType::String, "", {"Ignore trigger seen in these inputs of triggerspec"}},
+      {"max-dump-rate", VariantType::Float, 0.f, {"%-age of TFs to dump. W/o external trigger: random(>0) or periodic(<0) rejection, with: max limit"}},
+      {"rate-est-conf-limit", VariantType::Float, 0.05f, {"quantile for the lowest rate estimate confidence limit"}},
+      {"max-warn", VariantType::Int, 5, {"max allowed warnings on throttling"}},
+      {"mute-warn-period", VariantType::Int, 100, {"mute warnings on throttling for this number of TFs"}},
+      {"output-dir", VariantType::String, "none", {"TF output directory, must exist"}},
+      {"meta-output-dir", VariantType::String, "/dev/null", {"TF metadata output directory, must exist (if not /dev/null)"}},
+      {"md5-for-meta", VariantType::Bool, false, {"fill CTF file MD5 sum in the metadata file"}},
+      {"min-file-size", VariantType::Int64, 0l, {"accumulate TFs until given file size reached"}},
+      {"max-file-size", VariantType::Int64, 0l, {"if > 0, try to avoid exceeding given file size, also used for space check"}},
+      {"max-tf-per-file", VariantType::Int, 0, {"if > 0, avoid storing more than requested CTFs per file"}},
+      {"require-free-disk", VariantType::Float, 0.f, {"pause writing op. if available disk space is below this margin, in bytes if >0, as a fraction of total if <0"}},
+      {"wait-for-free-disk", VariantType::Float, 10.f, {"if paused due to the low disk space, recheck after this time (in s)"}},
+      {"max-wait-for-free-disk", VariantType::Float, 60.f, {"produce fatal if paused due to the low disk space for more than this amount in s."}},
+      {"verbosity-level", VariantType::Int, 0, {"Verbose mode: 1: decision on every TF, 2: details of saved TF, 3: more details"}},
+      {"ignore-partition-run-dir", VariantType::Bool, false, {"Do not creare partition-run directory in output-dir"}}}};
+}
+
+} // namespace o2::rawdd
diff --git a/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h
new file mode 100644
index 0000000000000..a39cfb026ed52
--- /dev/null
+++ b/Detectors/Raw/TFReaderDD/src/RawTFDumpSpec.h
@@ -0,0 +1,23 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+#ifndef O2_RAW_TF_DUMP_SPEC_
+#define O2_RAW_TF_DUMP_SPEC_
+
+#include "DetectorsCommonDataFormats/DetID.h"
+#include "Framework/DeviceSpec.h"
+
+namespace o2::rawdd
+{
+o2::framework::DataProcessorSpec getRawTFDumpSpec(const std::string& inpconfig, const std::string& trigger);
+}
+
+#endif
diff --git a/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx b/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx
index f227390e67ef3..c8bc6ff374ead 100644
--- a/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx
+++ b/Detectors/Raw/TFReaderDD/src/SubTimeFrameFileReader.cxx
@@ -45,8 +45,8 @@ namespace o2f = o2::framework;
 /// SubTimeFrameFileReader
 ////////////////////////////////////////////////////////////////////////////////
 
-SubTimeFrameFileReader::SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask)
-  : mFileName(pFileName)
+SubTimeFrameFileReader::SubTimeFrameFileReader(const std::string& pFileName, o2::detectors::DetID::mask_t detMask, int verb, bool sup0xccdb, bool repaireHeaders, bool rejectDistSTF)
+  : mFileName(pFileName), mVerbosity(verb), mSup0xccdb(sup0xccdb), mRepaireHeaders(repaireHeaders), mRejectDistSTF(rejectDistSTF)
 {
   mFileMap.open(mFileName);
   if (!mFileMap.is_open()) {
@@ -115,7 +115,7 @@ std::size_t SubTimeFrameFileReader::getHeaderStackSize() // throws ios_base::fai
     LOGP(error, "FileReader: Reached max number of headers allowed: {}.", cMaxHeaders);
     return 0;
   }
-
+  LOGP(debug, "getHeaderStackSize, pos = {}, size = {}", lFilePosStart, lStackSize);
   return lStackSize;
 }
 
@@ -178,13 +178,21 @@ Stack SubTimeFrameFileReader::getHeaderStack(std::size_t& pOrigsize)
   return Stack(lStackMem);
 }
 
+const std::string SubTimeFrameFileReader::describeHeader(const o2::header::DataHeader& hd, bool full) const
+{
+  std::string res = fmt::format("{}", o2f::DataSpecUtils::describe(o2::framework::OutputSpec{hd.dataOrigin, hd.dataDescription, hd.subSpecification}));
+  if (full) {
+    res += fmt::format(" part:{}/{} sz:{} TF:{} Orb:{} Run:{}", hd.splitPayloadIndex, hd.splitPayloadParts, hd.payloadSize, hd.tfCounter, hd.firstTForbit, hd.runNumber);
+  }
+  return res;
+}
+
 std::uint32_t sRunNumber = 0;                     // TODO: add id to files metadata
 std::uint32_t sFirstTForbit = 0;                  // TODO: add id to files metadata
 std::uint64_t sCreationTime = 0;
 std::mutex stfMtx;
 
-std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device* device, const std::vector<o2f::OutputRoute>& outputRoutes,
-                                                               const std::string& rawChannel, size_t slice, bool sup0xccdb, int verbosity)
+std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device* device, const std::vector<o2f::OutputRoute>& outputRoutes, const std::string& rawChannel, size_t slice)
 {
   std::unique_ptr<MessagesPerRoute> messagesPerRoute = std::make_unique<MessagesPerRoute>();
   auto& msgMap = *messagesPerRoute.get();
@@ -252,9 +260,15 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
     return nullptr;
   }
   lStfMetaDataHdr = o2::header::DataHeader::Get(lMetaHdrStack.first());
+  if (mVerbosity > 0) {
+    LOGP(info, "read filemeta, pos = {}, size = {}", position(), sizeof(SubTimeFrameFileMeta));
+  }
   if (!read_advance(&lStfFileMeta, sizeof(SubTimeFrameFileMeta))) {
     return nullptr;
   }
+  if (mVerbosity > 0) {
+    LOGP(info, "TFMeta : {}", lStfFileMeta.info());
+  }
   if (lStfFileMeta.mWriteTimeMs == 0 && creationFallBack != 0) {
     if (!creation0Notified) {
       creation0Notified = true;
@@ -318,9 +332,9 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
 
   std::int64_t lLeftToRead = lStfDataSize;
   STFHeader stfHeader{tfID, -1u, -1u};
+  DataHeader prevHeader;
   // read <hdrStack + data> pairs
   while (lLeftToRead > 0) {
-
     // allocate and read the Headers
     std::size_t lDataHeaderStackSize = 0;
     Stack lDataHeaderStack = getHeaderStack(lDataHeaderStackSize);
@@ -335,6 +349,25 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
       return nullptr;
     }
     DataHeader locDataHeader(*lDataHeader);
+
+    if (mRepaireHeaders) {
+      if (locDataHeader == prevHeader) {
+        if (prevHeader.tfCounter == locDataHeader.tfCounter && (prevHeader.splitPayloadIndex + 1) != locDataHeader.splitPayloadIndex) {
+          if (mVerbosity > 3) {
+            LOGP(warn, "Repairing wrong part index for {} to {}", describeHeader(locDataHeader, true), (prevHeader.splitPayloadIndex + 1) % prevHeader.splitPayloadParts);
+          }
+          locDataHeader.splitPayloadIndex = (++prevHeader.splitPayloadIndex) % prevHeader.splitPayloadParts;
+        }
+      } else { // new header
+        if (locDataHeader.splitPayloadIndex != 0) {
+          if (mVerbosity > 2) {
+            LOGP(warn, "Repairing wrong part index for new {} to {}", describeHeader(locDataHeader, true), (prevHeader.splitPayloadIndex + 1) % prevHeader.splitPayloadParts);
+          }
+          locDataHeader.splitPayloadIndex = 0;
+        }
+      }
+      prevHeader = locDataHeader;
+    }
     // sanity check
     if (int(locDataHeader.firstTForbit) == -1) {
       if (!negativeOrbitNotified) {
@@ -350,6 +383,18 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
       }
       locDataHeader.runNumber = runNumberFallBack;
     }
+    const std::uint64_t lDataSize = locDataHeader.payloadSize;
+
+    if (locDataHeader.dataOrigin == o2::header::gDataOriginFLP && locDataHeader.dataDescription == o2::header::gDataDescriptionDISTSTF && mRejectDistSTF) {
+      if (mVerbosity > 0) {
+        LOGP(warn, "Ignoring stored {}", describeHeader(locDataHeader));
+      }
+      if (!ignore_nbytes(lDataSize)) {
+        return nullptr;
+      }
+      lLeftToRead -= (lDataHeaderStackSize + lDataSize); // update the counter
+      continue;
+    }
     o2::header::Stack headerStack{locDataHeader, o2f::DataProcessingHeader{tfID, 1, lStfFileMeta.mWriteTimeMs}};
     if (stfHeader.runNumber == -1) {
       stfHeader.id = locDataHeader.tfCounter;
@@ -359,8 +404,6 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
       sRunNumber = stfHeader.runNumber;
       sFirstTForbit = stfHeader.firstOrbit;
     }
-
-    const std::uint64_t lDataSize = locDataHeader.payloadSize;
     // do we accept these data?
     auto detOrigStatus = mDetOrigMap.find(locDataHeader.dataOrigin);
     if (detOrigStatus != mDetOrigMap.end() && !detOrigStatus->second) { // this is a detector data and we don't want to read it
@@ -398,14 +441,15 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
     msgSW.Stop();
 #endif
     memcpy(lHdrStackMsg->GetData(), headerStack.data(), headerStack.size());
+    LOGP(debug, "read data, pos = {}, size = {} leftToRead {}", position(), lDataSize, lLeftToRead);
 
     if (!read_advance(lDataMsg->GetData(), lDataSize)) {
       return nullptr;
     }
-    if (verbosity > 0) {
-      if (verbosity > 1 || locDataHeader.splitPayloadIndex == 0) {
+    if (mVerbosity > 0) {
+      if (mVerbosity > 1 || locDataHeader.splitPayloadIndex == 0) {
         printStack(headerStack);
-        if (o2::raw::RDHUtils::checkRDH(lDataMsg->GetData()) && verbosity > 2) {
+        if (o2::raw::RDHUtils::checkRDH(lDataMsg->GetData()) && mVerbosity > 2) {
           o2::raw::RDHUtils::printRDH(lDataMsg->GetData());
         }
       }
@@ -413,6 +457,9 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
 #ifdef _RUN_TIMING_MEASUREMENT_
     addPartSW.Start(false);
 #endif
+    if (mVerbosity > 2) {
+      LOGP(info, "addPart {} to {} | HdrSize:{} DataSize:{}", describeHeader(locDataHeader, true), fmqChannel, lHdrStackMsg->GetSize(), lDataMsg->GetSize());
+    }
     addPart(std::move(lHdrStackMsg), std::move(lDataMsg), fmqChannel);
 #ifdef _RUN_TIMING_MEASUREMENT_
     addPartSW.Stop();
@@ -434,7 +481,7 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
   }
 
   unsigned stfSS[2] = {0, 0xccdb};
-  for (int iss = 0; iss < (sup0xccdb ? 1 : 2); iss++) {
+  for (int iss = 0; iss < (mSup0xccdb ? 1 : 2); iss++) {
     o2::header::DataHeader stfDistDataHeader(o2::header::gDataDescriptionDISTSTF, o2::header::gDataOriginFLP, stfSS[iss], sizeof(STFHeader), 0, 1);
     stfDistDataHeader.payloadSerializationMethod = o2::header::gSerializationMethodNone;
     stfDistDataHeader.firstTForbit = stfHeader.firstOrbit;
@@ -444,7 +491,7 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
     if (!fmqChannel.empty()) { // no output channel
       auto fmqFactory = device->GetChannel(fmqChannel, 0).Transport();
       o2::header::Stack headerStackSTF{stfDistDataHeader, o2f::DataProcessingHeader{tfID, 1, lStfFileMeta.mWriteTimeMs}};
-      if (verbosity > 0) {
+      if (mVerbosity > 0) {
         printStack(headerStackSTF);
       }
       auto hdMessageSTF = fmqFactory->CreateMessage(headerStackSTF.size(), fair::mq::Alignment{64});
@@ -454,6 +501,9 @@ std::unique_ptr<MessagesPerRoute> SubTimeFrameFileReader::read(fair::mq::Device*
 #ifdef _RUN_TIMING_MEASUREMENT_
       addPartSW.Start(false);
 #endif
+      if (mVerbosity > 2) {
+        LOGP(info, "addPart forced {} to {} | HdrSize:{} DataSize:{}", describeHeader(stfDistDataHeader, true), fmqChannel, hdMessageSTF->GetSize(), plMessageSTF->GetSize());
+      }
       addPart(std::move(hdMessageSTF), std::move(plMessageSTF), fmqChannel);
 #ifdef _RUN_TIMING_MEASUREMENT_
       addPartSW.Stop();
diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx
index 919e76083f595..e9c37933a2e1c 100644
--- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx
+++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx
@@ -118,6 +118,9 @@ void TFReaderSpec::init(o2f::InitContext& ic)
   mInput.maxTFsPerFile = mInput.maxTFsPerFile > 0 ? mInput.maxTFsPerFile : 0x7fffffff;
   mInput.maxTFCache = std::max(1, ic.options().get<int>("max-cached-tf"));
   mInput.maxFileCache = std::max(1, ic.options().get<int>("max-cached-files"));
+  mInput.repairHeaders = !ic.options().get<bool>("ignore-repair-headers");
+  mInput.rejectDistSTF = !ic.options().get<bool>("read-dist-stf");
+
   if (!mInput.fileRunTimeSpans.empty()) {
     loadRunTimeSpans(mInput.fileRunTimeSpans);
   }
@@ -263,7 +266,11 @@ void TFReaderSpec::run(o2f::ProcessingContext& ctx)
       setTimingInfo(*tfPtr.get());
       size_t nparts = 0, dataSize = 0;
       if (mInput.sendDummyForMissing) {
+        int cntAck = 0;
         for (auto& msgIt : *tfPtr.get()) { // complete with empty output for the specs which were requested but were not seen in the data
+          if (mInput.verbosity > 0) {
+            LOGP(info, "acknowledgeOutput {}", cntAck++);
+          }
           acknowledgeOutput(*msgIt.second.get(), true);
         }
         addMissingParts(*tfPtr.get());
@@ -409,7 +416,7 @@ void TFReaderSpec::TFBuilder()
     }
 
     LOG(info) << "Processing file " << tfFileName;
-    SubTimeFrameFileReader reader(tfFileName, mInput.detMask);
+    SubTimeFrameFileReader reader(tfFileName, mInput.detMask, mInput.verbosity, mInput.sup0xccdb, mInput.repairHeaders, mInput.rejectDistSTF);
     size_t locID = 0;
     // try
     {
@@ -421,7 +428,7 @@ void TFReaderSpec::TFBuilder()
           std::this_thread::sleep_for(sleepTime);
           continue;
         }
-        auto tf = reader.read(mDevice, mOutputRoutes, mInput.rawChannelConfig, mAccTFCounter, mInput.sup0xccdb, mInput.verbosity);
+        auto tf = reader.read(mDevice, mOutputRoutes, mInput.rawChannelConfig, mAccTFCounter);
         bool acceptTF = true;
         if (tf) {
           if (mRunTimeRanges.size()) {
@@ -610,6 +617,10 @@ o2f::DataProcessorSpec o2::rawdd::getTFReaderSpec(o2::rawdd::TFReaderInp& rinp)
           spec.outputs.emplace_back(o2f::OutputSpec{o2f::OutputSpec{DetID::getDataOrigin(DetID::CTP), "LUMI", 0}});
           rinp.hdVec.emplace_back(o2h::DataHeader{"LUMI", DetID::getDataOrigin(DetID::CTP), 0, 0}); // in abcence of real data this will be sent
         }
+        if (id == DetID::TPC) {
+          spec.outputs.emplace_back(o2f::OutputSpec{o2f::ConcreteDataTypeMatcher{DetID::getDataOrigin(id), "CMVTRIGGER"}});
+          rinp.hdVec.emplace_back("CMVTRIGGER", DetID::getDataOrigin(id), 0xDEADBEEF, 0); // in abcence of real data this will be sent
+        }
         if (id == DetID::TOF) {
           spec.outputs.emplace_back(o2f::OutputSpec{o2f::ConcreteDataTypeMatcher{DetID::getDataOrigin(DetID::TOF), "CRAWDATA"}});
           rinp.hdVec.emplace_back(o2h::DataHeader{"CRAWDATA", DetID::getDataOrigin(DetID::TOF), 0xDEADBEEF, 0}); // in abcence of real data this will be sent
@@ -675,6 +686,8 @@ o2f::DataProcessorSpec o2::rawdd::getTFReaderSpec(o2::rawdd::TFReaderInp& rinp)
   }
   spec.options.emplace_back(o2f::ConfigParamSpec{"select-tf-ids", o2f::VariantType::String, "", {"comma-separated list TF IDs to inject (from cumulative counter of TFs seen)"}});
   spec.options.emplace_back(o2f::ConfigParamSpec{"fetch-failure-threshold", o2f::VariantType::Float, 0.f, {"Fatil if too many failures( >0: fraction, <0: abs number, 0: no threshold)"}});
+  spec.options.emplace_back(o2f::ConfigParamSpec{"ignore-repair-headers", o2f::VariantType::Bool, false, {"do not check/repair headers"}});
+  spec.options.emplace_back(o2f::ConfigParamSpec{"read-dist-stf", o2f::VariantType::Bool, false, {"do not ignore stored FLP/DISTSUBTIMEFRAME (will clash with injected one)"}});
   spec.options.emplace_back(o2f::ConfigParamSpec{"max-tf", o2f::VariantType::Int, -1, {"max TF ID to process (<= 0 : infinite)"}});
   spec.options.emplace_back(o2f::ConfigParamSpec{"max-tf-per-file", o2f::VariantType::Int, -1, {"max TFs to process per raw-tf file (<= 0 : infinite)"}});
   spec.options.emplace_back(o2f::ConfigParamSpec{"max-cached-tf", o2f::VariantType::Int, 3, {"max TFs to cache in memory"}});
diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h
index 2c1c62ecbb414..6ecce0d032c06 100644
--- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h
+++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.h
@@ -49,6 +49,8 @@ struct TFReaderInp {
   bool sendDummyForMissing = true;
   bool sup0xccdb = false;
   bool invertIRFramesSelection = false;
+  bool repairHeaders = true;
+  bool rejectDistSTF = true;
   std::vector<o2::header::DataHeader> hdVec;
   std::vector<int> tfIDs{};
 };
diff --git a/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx b/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx
new file mode 100644
index 0000000000000..fbade100d202f
--- /dev/null
+++ b/Detectors/Raw/TFReaderDD/src/tf-data-dump-workflow.cxx
@@ -0,0 +1,46 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+#include "CommonUtils/ConfigurableParam.h"
+#include "Framework/ConfigParamRegistry.h"
+#include "Framework/CompletionPolicy.h"
+#include "Framework/CompletionPolicyHelpers.h"
+
+using namespace o2::framework;
+
+void customize(std::vector<ConfigParamSpec>& workflowOptions)
+{
+  std::vector<ConfigParamSpec> options;
+  options.push_back(ConfigParamSpec{"dataspec", VariantType::String, "tst:TST/A", {"selection string for the data to be proxied"}});
+  options.push_back(ConfigParamSpec{"triggerspec", VariantType::String, "", {"selection string for the trigger input (must be also in dataspec if non-empty)"}});
+  options.push_back(ConfigParamSpec{"configKeyValues", VariantType::String, "", {"semicolon separated key=value strings"}});
+  std::swap(workflowOptions, options);
+}
+
+void customize(std::vector<CompletionPolicy>& policies)
+{
+  policies.push_back({CompletionPolicyHelpers::consumeWhenPastOldestPossibleTimeframe("raw-tf-dump", [](auto const&) -> bool { return true; })});
+  // policies.push_back({CompletionPolicyHelpers::consumeWhenAllOrdered("raw-tf-dump", [](auto const&) -> bool { return true; })}); // RSTOREM
+}
+
+// ------------------------------------------------------------------
+
+#include "Framework/runDataProcessing.h"
+#include "RawTFDumpSpec.h"
+
+WorkflowSpec defineDataProcessing(ConfigContext const& configcontext)
+{
+  o2::conf::ConfigurableParam::updateFromString(configcontext.options().get<std::string>("configKeyValues"));
+  auto inpconfig = configcontext.options().get<std::string>("dataspec");
+  auto trigger = configcontext.options().get<std::string>("triggerspec");
+  WorkflowSpec specs{o2::rawdd::getRawTFDumpSpec(inpconfig, trigger)};
+  return specs;
+}
diff --git a/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx b/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx
index b424353531de7..a29b4dadfdb25 100644
--- a/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx
+++ b/Detectors/Raw/TFReaderDD/src/tf-reader-workflow.cxx
@@ -34,7 +34,7 @@ void customize(std::vector<ConfigParamSpec>& workflowOptions)
   options.push_back(ConfigParamSpec{"copy-dir", VariantType::String, "/tmp/", {"copy base directory for remote files"}});
   options.push_back(ConfigParamSpec{"tf-file-regex", VariantType::String, ".+\\.tf$", {"regex string to identify TF files"}});
   options.push_back(ConfigParamSpec{"remote-regex", VariantType::String, "^(alien://|)/alice/data/.+", {"regex string to identify remote files"}}); // Use "^/eos/aliceo2/.+" for direct EOS access
-  options.push_back(ConfigParamSpec{"tf-reader-verbosity", VariantType::Int, 0, {"verbosity level (1 or 2: check RDH, print DH/DPH for 1st or all slices, >2 print RDH)"}});
+  options.push_back(ConfigParamSpec{"tf-reader-verbosity", VariantType::Int, 0, {"verbosity level (1 or 2: check RDH, print DH/DPH for 1st or all slices, >2 print RDH), report repairs"}});
   options.push_back(ConfigParamSpec{"raw-channel-config", VariantType::String, "", {"optional raw FMQ channel for non-DPL output"}});
   options.push_back(ConfigParamSpec{"send-diststf-0xccdb", VariantType::Bool, false, {"send explicit FLP/DISTSUBTIMEFRAME/0xccdb output"}});
   options.push_back(ConfigParamSpec{"disable-dummy-output", VariantType::Bool, false, {"Disable sending empty output if corresponding data is not found in the data"}});
diff --git a/Detectors/TPC/calibration/CMakeLists.txt b/Detectors/TPC/calibration/CMakeLists.txt
index 675f15e89258b..6aeb497c1cf23 100644
--- a/Detectors/TPC/calibration/CMakeLists.txt
+++ b/Detectors/TPC/calibration/CMakeLists.txt
@@ -60,6 +60,7 @@ o2_add_library(TPCCalibration
                        src/PressureTemperatureHelper.cxx
                        src/CMVContainer.cxx
                        src/CorrectionMapsLoader.cxx
+                       src/CMVHelper.cxx
                PUBLIC_LINK_LIBRARIES O2::DataFormatsTPC O2::TPCBaseRecSim
                                      O2::TPCReconstruction ROOT::Minuit
                                      Microsoft.GSL::GSL
@@ -119,7 +120,8 @@ o2_target_root_dictionary(TPCCalibration
                                   include/TPCCalibration/CorrectdEdxDistortions.h
                                   include/TPCCalibration/PressureTemperatureHelper.h
                                   include/TPCCalibration/CMVContainer.h
-                                  include/TPCCalibration/CorrectionMapsLoader.h)
+                                  include/TPCCalibration/CorrectionMapsLoader.h
+                                  include/TPCCalibration/CMVHelper.h)
 
 o2_add_test_root_macro(macro/comparePedestalsAndNoise.C
                        PUBLIC_LINK_LIBRARIES O2::TPCBaseRecSim
diff --git a/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h b/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h
new file mode 100644
index 0000000000000..d687c6872b8df
--- /dev/null
+++ b/Detectors/TPC/calibration/include/TPCCalibration/CMVHelper.h
@@ -0,0 +1,52 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// @file   CMVHelper.h
+/// @author Tuba Gündem, tuba.gundem@cern.ch
+/// @brief  Helper utilities for reading CMV ROOT files
+
+#ifndef ALICEO2_TPC_CMVHELPER_H_
+#define ALICEO2_TPC_CMVHELPER_H_
+
+#include <string>
+
+#include "TFile.h"
+#include "TTree.h"
+
+namespace o2::tpc
+{
+
+struct CMVPerTF;
+struct CMVPerTFCompressed;
+
+struct CMVFileHandle {
+  TFile* file{nullptr};
+  TTree* tree{nullptr};
+  bool isCompressed{false};
+  CMVPerTFCompressed* tfCompressed{nullptr};
+  CMVPerTF* tfRaw{nullptr};
+  CMVPerTF* tfDecoded{nullptr}; ///< scratch buffer used when decompressing
+  long firstTFInTree{-1};       ///< first global TF index from tree UserInfo ("firstTF"); -1 if absent
+  long lastTFInTree{-1};        ///< last  global TF index from tree UserInfo ("lastTF");  -1 if absent
+
+  /// Open path and set up branch addresses. Returns false on any error
+  bool open(const std::string& path);
+
+  /// Load entry iEntry and return a pointer to the decoded CMVPerTF, or nullptr on error
+  const CMVPerTF* getEntry(long long iEntry);
+
+  /// Release all resources
+  void close();
+};
+
+} // namespace o2::tpc
+
+#endif // ALICEO2_TPC_CMVHELPER_H_
diff --git a/Detectors/TPC/calibration/macro/drawCMV.C b/Detectors/TPC/calibration/macro/drawCMV.C
index 4f74db16cecd6..78e951fcfd676 100644
--- a/Detectors/TPC/calibration/macro/drawCMV.C
+++ b/Detectors/TPC/calibration/macro/drawCMV.C
@@ -11,19 +11,19 @@
 
 #if !defined(__CLING__) || defined(__ROOTCLING__)
 #include <string>
-#include <vector>
 #include <string_view>
+#include <vector>
 #include <fmt/format.h>
 
-#include "TFile.h"
-#include "TParameter.h"
 #include "TTree.h"
 #include "TH1F.h"
 #include "TH2F.h"
 #include "TCanvas.h"
 
-#include "TPCCalibration/CMVContainer.h"
 #include "TPCBase/Utils.h"
+#include "TPCCalibration/CMVContainer.h"
+#include "TPCCalibration/CMVHelper.h"
+
 #endif
 
 using namespace o2::tpc;
@@ -38,39 +38,20 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri
   arrCanvases->SetName("CMV");
 
   // open file
-  TFile f(filename.data(), "READ");
-  if (f.IsZombie()) {
+  CMVFileHandle fh;
+  if (!fh.open(std::string(filename))) {
     fmt::print("ERROR: cannot open '{}'\n", filename);
     return arrCanvases;
   }
   fmt::print("Opened file: {}\n", filename);
+  fmt::print("Tree 'ccdb_object' found, entries: {}\n", fh.tree->GetEntries());
 
-  // get TTree
-  TTree* tree = nullptr;
-  f.GetObject("ccdb_object", tree);
-  if (!tree) {
-    fmt::print("ERROR: TTree 'ccdb_object' not found\n");
-    return arrCanvases;
-  }
-  fmt::print("Tree 'ccdb_object' found, entries: {}\n", tree->GetEntries());
-
-  // read metadata
-  long firstTF = -1, lastTF = -1;
-  if (auto* userInfo = tree->GetUserInfo()) {
-    for (int i = 0; i < userInfo->GetSize(); ++i) {
-      if (auto* p = dynamic_cast<TParameter<long>*>(userInfo->At(i))) {
-        if (std::string(p->GetName()) == "firstTF")
-          firstTF = p->GetVal();
-        if (std::string(p->GetName()) == "lastTF")
-          lastTF = p->GetVal();
-      }
-    }
-  }
-  fmt::print("firstTF: {}, lastTF: {}\n", firstTF, lastTF);
+  fmt::print("firstTF: {}, lastTF: {}\n", fh.firstTFInTree, fh.lastTFInTree);
 
-  const int nEntries = tree->GetEntries();
+  const int nEntries = fh.tree->GetEntries();
   if (nEntries == 0) {
     fmt::print("ERROR: no entries in tree\n");
+    fh.close();
     return arrCanvases;
   }
 
@@ -80,61 +61,62 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri
   TH2F* h2d = new TH2F("hCMVvsTimeBin", ";Timebin (200 ns);Common Mode Values (ADC)",
                        100, 0, nTimeBins,
                        110, -100.5, 9.5);
+  h2d->SetDirectory(nullptr);
   h2d->SetStats(1);
   TH1F* h1d = new TH1F("hCMV", ";Common Mode Values (ADC);Counts",
                        110, -100.5, 9.5);
+  h1d->SetDirectory(nullptr);
   h1d->SetStats(1);
 
-  // auto-detect branch format: compressed or raw
-  const bool isCompressed = (tree->GetBranch("CMVPerTFCompressed") != nullptr);
-  const bool isRaw = (tree->GetBranch("CMVPerTF") != nullptr);
-  if (!isCompressed && !isRaw) {
-    fmt::print("ERROR: no recognised branch found (expected 'CMVPerTFCompressed' or 'CMVPerTF')\n");
-    return arrCanvases;
-  }
-  fmt::print("Branch format: {}\n", isCompressed ? "CMVPerTFCompressed" : "CMVPerTF (raw)");
-
-  o2::tpc::CMVPerTFCompressed* tfCompressed = nullptr;
-  o2::tpc::CMVPerTF* tfRaw = nullptr;
-  CMVPerTF* tfDecoded = isCompressed ? new CMVPerTF() : nullptr;
+  TH1F* h1dCRU = new TH1F("hCRU", ";CRU;Counts",
+                          360, -0.5, 359.5);
+  h1dCRU->SetDirectory(nullptr);
+  h1dCRU->SetStats(1);
+  TH2F* h2dCRU = new TH2F("hCMVvsCRU", ";CRU;Common Mode Values (ADC)",
+                          360, -0.5, 359.5,
+                          110, -100.5, 9.5);
+  h2dCRU->SetDirectory(nullptr);
+  h2dCRU->SetStats(0);
 
-  if (isCompressed) {
-    tree->SetBranchAddress("CMVPerTFCompressed", &tfCompressed);
-  } else {
-    tree->SetBranchAddress("CMVPerTF", &tfRaw);
-  }
+  fmt::print("Branch format: {}\n", fh.isCompressed ? "CMVPerTFCompressed" : "CMVPerTF (raw)");
 
   long firstOrbit = -1;
   long firstOrbitDPL = -1;
 
+  // Pre-allocate fill arrays once; x-values (timebins) are constant across entries and CRUs
+  const int fillsPerEntry = nCRUs * nTimeBins;
+  std::vector<double> xArr(fillsPerEntry), yArr(fillsPerEntry), wArr(fillsPerEntry, 1.0), cruArr(fillsPerEntry);
+  for (int cru = 0; cru < nCRUs; ++cru) {
+    for (int tb = 0; tb < nTimeBins; ++tb) {
+      xArr[cru * nTimeBins + tb] = tb;
+      cruArr[cru * nTimeBins + tb] = cru;
+    }
+  }
+
   for (int i = 0; i < nEntries; ++i) {
-    tree->GetEntry(i);
-
-    // Decompress if needed; resolve to a unified CMVPerTF pointer
-    const CMVPerTF* tf = nullptr;
-    if (isCompressed) {
-      tfCompressed->decompress(tfDecoded);
-      tf = tfDecoded;
-    } else {
-      tf = tfRaw;
+    const CMVPerTF* tf = fh.getEntry(i);
+    if (!tf) {
+      continue;
     }
 
     firstOrbit = tf->firstOrbit;
     firstOrbitDPL = tf->firstOrbitDPL;
-    fmt::print("firstOrbit: {}, firstOrbitDPL: {}\n", firstOrbit, firstOrbitDPL);
+
+    fmt::print("Entry {}: firstOrbit: {}, firstOrbitDPL: {}\n", i, firstOrbit, firstOrbitDPL);
 
     for (int cru = 0; cru < nCRUs; ++cru) {
       for (int tb = 0; tb < nTimeBins; ++tb) {
-        const float cmvValue = tf->getCMVFloat(cru, tb);
-        h2d->Fill(tb, cmvValue);
-        h1d->Fill(cmvValue);
+        yArr[cru * nTimeBins + tb] = tf->getCMVFloat(cru, tb);
+        // fmt::print("Entry {}: cru: {}, tb: {}, cmv: {}\n", i, cru, tb, tf->getCMVFloat(cru, tb));
       }
     }
+    h2d->FillN(fillsPerEntry, xArr.data(), yArr.data(), wArr.data());
+    h1d->FillN(fillsPerEntry, yArr.data(), wArr.data());
+    h2dCRU->FillN(fillsPerEntry, cruArr.data(), yArr.data(), wArr.data());
+    h1dCRU->FillN(fillsPerEntry, cruArr.data(), wArr.data());
   }
 
-  delete tfDecoded;
-  tree->ResetBranchAddresses();
-  delete tfCompressed;
+  fh.close();
 
   // draw
   auto* c = new TCanvas("cCMVvsTimeBin", "");
@@ -149,10 +131,20 @@ TObjArray* drawCMV(std::string_view filename, std::string_view outDir, std::stri
 
   arrCanvases->Add(c1);
 
+  auto* c2 = new TCanvas("cCRUDistribution", "");
+  h1dCRU->Draw();
+
+  arrCanvases->Add(c2);
+
+  auto* c3 = new TCanvas("cCMVvsCRU", "");
+  c3->SetLogz();
+  h2dCRU->Draw("colz");
+
+  arrCanvases->Add(c3);
+
   if (outDir.size()) {
     utils::saveCanvases(*arrCanvases, outDir, "", rootFileName);
   }
 
-  f.Close();
   return arrCanvases;
 }
diff --git a/Detectors/TPC/calibration/src/CMVHelper.cxx b/Detectors/TPC/calibration/src/CMVHelper.cxx
new file mode 100644
index 0000000000000..abcbd977a9acb
--- /dev/null
+++ b/Detectors/TPC/calibration/src/CMVHelper.cxx
@@ -0,0 +1,98 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// @file   CMVHelper.cxx
+/// @author Tuba Gündem, tuba.gundem@cern.ch
+/// @brief  Helper utilities for reading CMV ROOT files
+
+#include "TPCCalibration/CMVHelper.h"
+
+#include <iostream>
+
+#include "TPCCalibration/CMVContainer.h"
+#include "TParameter.h"
+
+namespace o2::tpc
+{
+
+bool CMVFileHandle::open(const std::string& path)
+{
+  file = TFile::Open(path.c_str());
+  if (!file || file->IsZombie()) {
+    std::cerr << "CMVFileHandle: failed to open: " << path << "\n";
+    return false;
+  }
+  file->GetObject("ccdb_object", tree);
+  if (!tree) {
+    std::cerr << "CMVFileHandle: TTree 'ccdb_object' not found in: " << path << "\n";
+    close();
+    return false;
+  }
+
+  // Extract firstTF / lastTF from UserInfo if stored by the aggregation workflow
+  if (auto* ui = tree->GetUserInfo()) {
+    if (auto* p = dynamic_cast<TParameter<long>*>(ui->FindObject("firstTF"))) {
+      firstTFInTree = p->GetVal();
+    }
+    if (auto* p = dynamic_cast<TParameter<long>*>(ui->FindObject("lastTF"))) {
+      lastTFInTree = p->GetVal();
+    }
+  }
+
+  isCompressed = (tree->GetBranch("CMVPerTFCompressed") != nullptr);
+  const bool isRaw = (tree->GetBranch("CMVPerTF") != nullptr);
+  if (!isCompressed && !isRaw) {
+    std::cerr << "CMVFileHandle: no recognised branch (CMVPerTFCompressed / CMVPerTF) in: "
+              << path << "\n";
+    close();
+    return false;
+  }
+
+  if (isCompressed) {
+    tree->SetBranchAddress("CMVPerTFCompressed", &tfCompressed);
+    tfDecoded = new CMVPerTF();
+  } else {
+    tree->SetBranchAddress("CMVPerTF", &tfRaw);
+  }
+  return true;
+}
+
+const CMVPerTF* CMVFileHandle::getEntry(long long iEntry)
+{
+  tree->GetEntry(iEntry);
+  if (isCompressed) {
+    if (!tfCompressed) {
+      return nullptr;
+    }
+    tfCompressed->decompress(tfDecoded);
+    return tfDecoded;
+  }
+  return tfRaw;
+}
+
+void CMVFileHandle::close()
+{
+  if (tree) {
+    tree->ResetBranchAddresses();
+    tree = nullptr;
+  }
+  tfCompressed = nullptr;
+  tfRaw = nullptr;
+  delete tfDecoded;
+  tfDecoded = nullptr;
+  if (file) {
+    file->Close();
+    delete file;
+    file = nullptr;
+  }
+}
+
+} // namespace o2::tpc
diff --git a/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx b/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx
index 9569e0eb8abd2..c8bdfa0f99350 100644
--- a/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx
+++ b/Detectors/TPC/calibration/src/CorrectionMapsLoader.cxx
@@ -28,8 +28,13 @@ using namespace o2::framework;
 void CorrectionMapsLoader::extractCCDBInputs(ProcessingContext& pc, float tpcScaler)
 {
   pc.inputs().get<o2::tpc::CorrMapParam*>("tpcCorrPar");
-  pc.inputs().get<o2::gpu::TPCFastTransform*>("tpcCorrMap");
-  pc.inputs().get<o2::gpu::TPCFastTransform*>("tpcCorrMapRef");
+  const auto lumiMode = getLumiScaleMode();
+  if (lumiMode != LumiScaleMode::NoCorrection && lumiMode != LumiScaleMode::StaticMapOnly) {
+    pc.inputs().get<o2::gpu::TPCFastTransform*>("tpcCorrMap");
+  }
+  if (lumiMode != LumiScaleMode::NoCorrection) {
+    pc.inputs().get<o2::gpu::TPCFastTransform*>("tpcCorrMapRef");
+  }
   const int maxDumRep = 5;
   int dumRep = 0;
   o2::ctp::LumiInfo lumiObj;
@@ -97,6 +102,10 @@ void CorrectionMapsLoader::requestCCDBInputs(std::vector<InputSpec>& inputs, con
     // for MC corrections
     addInput(inputs, {"tpcCorrMap", "TPC", "CorrMap", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrMapMC), {}, 1)});            // time-dependent
     addInput(inputs, {"tpcCorrMapRef", "TPC", "CorrMapRef", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrDerivMapMC), {}, 1)}); // time-dependent
+  } else if (gloOpts.lumiMode == LumiScaleMode::NoCorrection) {
+    // no correction maps needed — a dummy map is created at runtime
+  } else if (gloOpts.lumiMode == LumiScaleMode::StaticMapOnly) {
+    addInput(inputs, {"tpcCorrMapRef", "TPC", "CorrMapRef", 0, Lifetime::Condition, ccdbParamSpec(CDBTypeMap.at(CDBType::CalCorrMapRef), {}, 0)}); // load once
   } else {
     LOG(fatal) << "Correction mode unknown! Choose either 0 (default) or 1 (derivative map) for flag corrmap-lumi-mode.";
   }
diff --git a/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx b/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx
index 604b7c680385b..45c3771db57bf 100644
--- a/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx
+++ b/Detectors/TPC/calibration/src/CorrectionMapsOptions.cxx
@@ -21,13 +21,13 @@ CorrectionMapsGloOpts CorrectionMapsOptions::parseGlobalOptions(const o2::framew
 {
   CorrectionMapsGloOpts tpcopt;
   auto lumiTypeVal = opts.get<int>("lumi-type");
-  if (lumiTypeVal < -1 || lumiTypeVal > 2) {
+  if (lumiTypeVal < static_cast<int>(LumiScaleType::Unset) || lumiTypeVal >= static_cast<int>(LumiScaleType::Count)) {
     LOGP(fatal, "Invalid lumi-type value: {}", lumiTypeVal);
   }
   tpcopt.lumiType = static_cast<LumiScaleType>(lumiTypeVal);
 
   auto lumiModeVal = opts.get<int>("corrmap-lumi-mode");
-  if (lumiModeVal < -1 || lumiModeVal > 2) {
+  if (lumiModeVal < static_cast<int>(LumiScaleMode::Unset) || lumiModeVal >= static_cast<int>(LumiScaleMode::Count)) {
     LOGP(fatal, "Invalid corrmap-lumi-mode value: {}", lumiModeVal);
   }
   tpcopt.lumiMode = static_cast<LumiScaleMode>(lumiModeVal);
diff --git a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h
index 14d3d0a8ffb8e..847ae5ad7d788 100644
--- a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h
+++ b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h
@@ -124,6 +124,7 @@
 #pragma link C++ class std::vector < o2::tpc::DigitAdd> + ;
 #pragma link C++ class o2::tpc::PressureTemperatureHelper + ;
 
+#pragma link C++ struct o2::tpc::CMVFileHandle + ;
 #pragma link C++ class o2::tpc::CMVPerTF + ;
 #pragma link C++ class o2::tpc::CMVPerTFCompressed + ;
 
diff --git a/Detectors/TPC/workflow/CMakeLists.txt b/Detectors/TPC/workflow/CMakeLists.txt
index 37ac398db40ec..f64a223f683d8 100644
--- a/Detectors/TPC/workflow/CMakeLists.txt
+++ b/Detectors/TPC/workflow/CMakeLists.txt
@@ -309,4 +309,9 @@ o2_add_executable(cmv-aggregate
                   SOURCES src/tpc-aggregate-cmv.cxx
                   PUBLIC_LINK_LIBRARIES O2::TPCWorkflow)
 
+o2_add_executable(cmv-trigger
+                  COMPONENT_NAME tpc
+                  SOURCES test/test_cmv-trigger.cxx
+                  PUBLIC_LINK_LIBRARIES O2::TPCWorkflow)
+
 add_subdirectory(readers)
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h
index add37af5706e5..2f9209ee07da8 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/CMVToVectorSpec.h
@@ -23,7 +23,7 @@ namespace o2::tpc
 
 /// create a processor spec
 /// convert CMV raw values to a vector in a CRU
-o2::framework::DataProcessorSpec getCMVToVectorSpec(const std::string inputSpec, std::vector<uint32_t> const& crus);
+o2::framework::DataProcessorSpec getCMVToVectorSpec(std::string const& inputSpec, std::vector<uint32_t> const& crus);
 
 } // end namespace o2::tpc
 
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h
index b46f2169f06c9..3383da527cccf 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCAggregateCMVSpec.h
@@ -49,9 +49,6 @@
 #include "CommonUtils/StringUtils.h"
 #include "DetectorsCommonDataFormats/FileMetaData.h"
 
-using namespace o2::framework;
-using o2::header::gDataOriginTPC;
-
 namespace o2::tpc
 {
 
@@ -114,7 +111,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     initIntervalTree();
   }
 
-  void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) final
+  void finaliseCCDB(o2::framework::ConcreteDataMatcher& matcher, void* obj) final
   {
     o2::base::GRPGeomHelper::instance().finaliseCCDB(matcher, obj);
   }
@@ -136,7 +133,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     }
 
     if (mSetDataTakingCont) {
-      mDataTakingContext = pc.services().get<DataTakingContext>();
+      mDataTakingContext = pc.services().get<o2::framework::DataTakingContext>();
       mSetDataTakingCont = false;
     }
 
@@ -147,7 +144,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     const auto currTF = processing_helpers::getCurrentTF(pc);
 
     if (mTFFirst == -1) {
-      for (auto& ref : InputRecordWalker(pc.inputs(), mFirstTFFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFirstTFFilter)) {
         mTFFirst = pc.inputs().get<long>(ref);
         mIntervalFirstTF = mTFFirst;
         mHasIntervalFirstTF = true;
@@ -203,7 +200,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     // Capture orbit info first so setTimestampCCDB can use the measured stride
     if (!mOrbitInfoSeen[relTF]) {
       // all CRUs within a batch carry identical timing, so the first one is sufficient
-      for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) {
         mOrbitInfo[relTF] = pc.inputs().get<uint64_t>(ref);
         const auto batchFirstOrbit = static_cast<uint32_t>(mOrbitInfo[relTF] >> 32);
         // TimingInfo.firstTForbit is the orbit of the last real TF in the batch (the TF that triggered the FLP to send).
@@ -222,8 +219,8 @@ class TPCAggregateCMVDevice : public o2::framework::Task
       setTimestampCCDB(relTF, mOrbitStep[relTF], pc);
     }
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
-      auto const* hdr = DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
+      auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
       const unsigned int cru = hdr->subSpecification;
       if (!(std::binary_search(mCRUs.begin(), mCRUs.end(), cru))) {
         LOGP(debug, "Received CMV data from CRU {} which is not part of this aggregate lane", cru);
@@ -233,7 +230,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
         continue;
       }
 
-      auto cmvVec = pc.inputs().get<pmr::vector<uint16_t>>(ref);
+      auto cmvVec = pc.inputs().get<o2::pmr::vector<uint16_t>>(ref);
       mRawCMVs[relTF][cru] = std::vector<uint16_t>(cmvVec.begin(), cmvVec.end());
       mProcessedCRUs[relTF][cru] = true;
       ++mProcessedCRU[relTF];
@@ -257,7 +254,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     materializeBufferedTFs(true);
     materializeEOSBuffer();
     sendOutput(ec.outputs());
-    ec.services().get<ControlService>().readyToQuit(QuitRequest::Me);
+    ec.services().get<o2::framework::ControlService>().readyToQuit(o2::framework::QuitRequest::Me);
   }
 
   static constexpr header::DataDescription getDataDescriptionCCDBCMV() { return header::DataDescription{"TPC_CMV"}; }
@@ -314,18 +311,18 @@ class TPCAggregateCMVDevice : public o2::framework::Task
   std::unique_ptr<TTree> mIntervalTree{};                                      ///< in-memory TTree accumulating one entry per real TF; serialised to CCDB/disk at interval end
   CMVPerTF mCurrentTF{};                                                       ///< staging object written to the TTree branch for the uncompressed path
   CMVPerTFCompressed mCurrentCompressedTF{};                                   ///< staging object written to the TTree branch when any compression flags are set
-  const std::vector<InputSpec> mFilter{
+  const std::vector<o2::framework::InputSpec> mFilter{
     {"cmvagg",
-     ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(mLaneId)},
-     Lifetime::Sporadic}};
-  const std::vector<InputSpec> mOrbitFilter{
+     o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(mLaneId)},
+     o2::framework::Lifetime::Sporadic}};
+  const std::vector<o2::framework::InputSpec> mOrbitFilter{
     {"cmvorbit",
-     ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(mLaneId), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(mLaneId)}},
-     Lifetime::Sporadic}};
-  const std::vector<InputSpec> mFirstTFFilter{
+     o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(mLaneId), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(mLaneId)}},
+     o2::framework::Lifetime::Sporadic}};
+  const std::vector<o2::framework::InputSpec> mFirstTFFilter{
     {"firstTF",
-     ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(mLaneId)}},
-     Lifetime::Sporadic}};
+     o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(mLaneId)}},
+     o2::framework::Lifetime::Sporadic}};
 
   uint8_t buildCompressionFlags() const
   {
@@ -360,7 +357,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
   void collectEOSInputs(o2::framework::ProcessingContext& pc)
   {
     if (mEOSFirstOrbit == 0) {
-      for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) {
         const auto orbitBC = pc.inputs().get<uint64_t>(ref);
         mEOSFirstOrbit = static_cast<uint32_t>(orbitBC >> 32);
         mEOSFirstBC = static_cast<uint16_t>(orbitBC & 0xFFFFu);
@@ -368,13 +365,13 @@ class TPCAggregateCMVDevice : public o2::framework::Task
       }
     }
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
-      auto const* hdr = DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
+      auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
       const unsigned int cru = hdr->subSpecification;
       if (!(std::binary_search(mCRUs.begin(), mCRUs.end(), cru))) {
         continue;
       }
-      auto cmvVec = pc.inputs().get<pmr::vector<uint16_t>>(ref);
+      auto cmvVec = pc.inputs().get<o2::pmr::vector<uint16_t>>(ref);
       auto& buffer = mEOSRawCMVs[cru];
       buffer.insert(buffer.end(), cmvVec.begin(), cmvVec.end());
     }
@@ -548,7 +545,7 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     }
   }
 
-  void sendOutput(DataAllocator& output)
+  void sendOutput(o2::framework::DataAllocator& output)
   {
     using timer = std::chrono::high_resolution_clock;
 
@@ -619,8 +616,8 @@ class TPCAggregateCMVDevice : public o2::framework::Task
     }
 
     LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {}", ccdbInfoCMV.getPath(), ccdbInfoCMV.getFileName(), image->size(), ccdbInfoCMV.getStartValidityTimestamp(), ccdbInfoCMV.getEndValidityTimestamp());
-    output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBCMV(), 0}, *image);
-    output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBCMV(), 0}, ccdbInfoCMV);
+    output.snapshot(o2::framework::Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBCMV(), 0}, *image);
+    output.snapshot(o2::framework::Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBCMV(), 0}, ccdbInfoCMV);
 
     auto stop = timer::now();
     std::chrono::duration<float> elapsed = stop - start;
@@ -666,25 +663,25 @@ class TPCAggregateCMVDevice : public o2::framework::Task
 
 /// Build a DataProcessorSpec for one aggregate lane
 /// Each lane receives CMV data from one distribute output lane (matched by lane index) and expects the full CRU list — the distribute stage already routes per-CRU data to the correct lane
-inline DataProcessorSpec getTPCAggregateCMVSpec(const int lane,
-                                                const std::vector<uint32_t>& crus,
-                                                const unsigned int timeframes,
-                                                const bool sendCCDB,
-                                                const bool usePreciseTimestamp,
-                                                const int nTFsBuffer = 1)
+inline o2::framework::DataProcessorSpec getTPCAggregateCMVSpec(const int lane,
+                                                               const std::vector<uint32_t>& crus,
+                                                               const unsigned int timeframes,
+                                                               const bool sendCCDB,
+                                                               const bool usePreciseTimestamp,
+                                                               const int nTFsBuffer = 1)
 {
-  std::vector<OutputSpec> outputSpecs;
+  std::vector<o2::framework::OutputSpec> outputSpecs;
   if (sendCCDB) {
-    outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, Lifetime::Sporadic);
-    outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, o2::framework::Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCAggregateCMVDevice::getDataDescriptionCCDBCMV()}, o2::framework::Lifetime::Sporadic);
   }
 
-  std::vector<InputSpec> inputSpecs;
-  inputSpecs.emplace_back(InputSpec{"cmvagg", ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, Lifetime::Sporadic});
-  inputSpecs.emplace_back(InputSpec{"cmvorbit", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, Lifetime::Sporadic});
-  inputSpecs.emplace_back(InputSpec{"firstTF", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, Lifetime::Sporadic});
+  std::vector<o2::framework::InputSpec> inputSpecs;
+  inputSpecs.emplace_back(o2::framework::InputSpec{"cmvagg", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, o2::framework::Lifetime::Sporadic});
+  inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, o2::framework::Lifetime::Sporadic});
+  inputSpecs.emplace_back(o2::framework::InputSpec{"firstTF", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, o2::framework::Lifetime::Sporadic});
   if (usePreciseTimestamp) {
-    inputSpecs.emplace_back(InputSpec{"orbitreset", gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, Lifetime::Sporadic});
+    inputSpecs.emplace_back(o2::framework::InputSpec{"orbitreset", o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{static_cast<unsigned int>(lane)}, o2::framework::Lifetime::Sporadic});
   }
 
   // Request GRPECS from CCDB so that GRPGeomHelper::getNHBFPerTF() is valid in this (separate) process
@@ -696,21 +693,21 @@ inline DataProcessorSpec getTPCAggregateCMVSpec(const int lane,
                                                                 o2::base::GRPGeomRequest::None, // geometry
                                                                 inputSpecs);
 
-  DataProcessorSpec spec{
+  o2::framework::DataProcessorSpec spec{
     fmt::format("tpc-aggregate-cmv-{:02}", lane).data(),
     inputSpecs,
     outputSpecs,
-    AlgorithmSpec{adaptFromTask<TPCAggregateCMVDevice>(lane, crus, timeframes, sendCCDB, usePreciseTimestamp, nTFsBuffer, ccdbRequest)},
-    Options{{"output-dir", VariantType::String, "/dev/null", {"CMV output directory, must exist (if not /dev/null)"}},
-            {"meta-output-dir", VariantType::String, "/dev/null", {"calibration metadata output directory, must exist (if not /dev/null)"}},
-            {"nthreads-compression", VariantType::Int, 1, {"Number of threads used for CMV per timeframe preprocessing and compression"}},
-            {"use-sparse", VariantType::Bool, false, {"Sparse encoding (skip zero time bins). Alone: raw uint16 values. With --use-compression-varint: varint exact values. With --use-compression-huffman: Huffman exact values"}},
-            {"use-compression-varint", VariantType::Bool, false, {"Delta+zigzag+varint compression (all values). Combined with --use-sparse: sparse positions + varint encoded exact CMV values"}},
-            {"use-compression-huffman", VariantType::Bool, false, {"Huffman encoding. Combined with --use-sparse: sparse positions + Huffman-encoded exact CMV values"}},
-            {"cmv-zero-threshold", VariantType::Float, 0.f, {"Zero out CMV values whose float magnitude is below this threshold after optional integer rounding and before compression; 0 disables"}},
-            {"cmv-round-integers-threshold", VariantType::Int, 0, {"Round values to nearest integer ADC for |v| <= N ADC before compression; 0 disables"}},
-            {"cmv-dynamic-precision-mean", VariantType::Float, 1.f, {"Gaussian centre in |CMV| ADC where the strongest fractional bit trimming is applied"}},
-            {"cmv-dynamic-precision-sigma", VariantType::Float, 0.f, {"Gaussian width in ADC for smooth CMV fractional bit trimming; 0 disables"}}}};
+    o2::framework::AlgorithmSpec{o2::framework::adaptFromTask<TPCAggregateCMVDevice>(lane, crus, timeframes, sendCCDB, usePreciseTimestamp, nTFsBuffer, ccdbRequest)},
+    o2::framework::Options{{"output-dir", o2::framework::VariantType::String, "/dev/null", {"CMV output directory, must exist (if not /dev/null)"}},
+                           {"meta-output-dir", o2::framework::VariantType::String, "/dev/null", {"calibration metadata output directory, must exist (if not /dev/null)"}},
+                           {"nthreads-compression", o2::framework::VariantType::Int, 1, {"Number of threads used for CMV per timeframe preprocessing and compression"}},
+                           {"use-sparse", o2::framework::VariantType::Bool, false, {"Sparse encoding (skip zero time bins). Alone: raw uint16 values. With --use-compression-varint: varint exact values. With --use-compression-huffman: Huffman exact values"}},
+                           {"use-compression-varint", o2::framework::VariantType::Bool, false, {"Delta+zigzag+varint compression (all values). Combined with --use-sparse: sparse positions + varint encoded exact CMV values"}},
+                           {"use-compression-huffman", o2::framework::VariantType::Bool, false, {"Huffman encoding. Combined with --use-sparse: sparse positions + Huffman-encoded exact CMV values"}},
+                           {"cmv-zero-threshold", o2::framework::VariantType::Float, 0.f, {"Zero out CMV values whose float magnitude is below this threshold after optional integer rounding and before compression; 0 disables"}},
+                           {"cmv-round-integers-threshold", o2::framework::VariantType::Int, 0, {"Round values to nearest integer ADC for |v| <= N ADC before compression; 0 disables"}},
+                           {"cmv-dynamic-precision-mean", o2::framework::VariantType::Float, 1.f, {"Gaussian centre in |CMV| ADC where the strongest fractional bit trimming is applied"}},
+                           {"cmv-dynamic-precision-sigma", o2::framework::VariantType::Float, 0.f, {"Gaussian width in ADC for smooth CMV fractional bit trimming; 0 disables"}}}};
   spec.rank = lane;
   return spec;
 }
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h
index f3373070ab7bb..af576b2f30a5b 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCDistributeCMVSpec.h
@@ -36,10 +36,6 @@
 #include "DetectorsBase/GRPGeomHelper.h"
 #include "CommonDataFormat/Pair.h"
 
-using namespace o2::framework;
-using o2::header::gDataOriginTPC;
-using namespace o2::tpc;
-
 namespace o2::tpc
 {
 
@@ -78,8 +74,8 @@ class TPCDistributeCMVSpec : public o2::framework::Task
       }
     }
 
-    mFilter.emplace_back(InputSpec{"cmvsgroup", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, Lifetime::Sporadic});
-    mOrbitFilter.emplace_back(InputSpec{"cmvorbit", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, Lifetime::Sporadic});
+    mFilter.emplace_back(o2::framework::InputSpec{"cmvsgroup", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, o2::framework::Lifetime::Sporadic});
+    mOrbitFilter.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, o2::framework::Lifetime::Sporadic});
   }
 
   void init(o2::framework::InitContext& ic) final
@@ -97,13 +93,13 @@ class TPCDistributeCMVSpec : public o2::framework::Task
     }
   }
 
-  void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) final
+  void finaliseCCDB(o2::framework::ConcreteDataMatcher& matcher, void* obj) final
   {
     o2::base::GRPGeomHelper::instance().finaliseCCDB(matcher, obj);
-    if (matcher == ConcreteDataMatcher("CTP", "ORBITRESET", 0)) {
+    if (matcher == o2::framework::ConcreteDataMatcher("CTP", "ORBITRESET", 0)) {
       LOGP(debug, "Updating ORBITRESET");
       std::fill(mSendCCDBOutputOrbitReset.begin(), mSendCCDBOutputOrbitReset.end(), true);
-    } else if (matcher == ConcreteDataMatcher("GLO", "GRPECS", 0)) {
+    } else if (matcher == o2::framework::ConcreteDataMatcher("GLO", "GRPECS", 0)) {
       // check if received object is valid
       if (o2::base::GRPGeomHelper::instance().getGRPECS()->getRun() != 0) {
         LOGP(debug, "Updating GRPECS");
@@ -175,18 +171,18 @@ class TPCDistributeCMVSpec : public o2::framework::Task
 
     if (mSendOutputStartInfo[currentBuffer]) {
       mSendOutputStartInfo[currentBuffer] = false;
-      pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[currentBuffer]);
+      pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[currentBuffer]);
     }
 
     if (mSendCCDBOutputOrbitReset[currentOutLane] && mSendCCDBOutputGRPECS[currentOutLane]) {
       mSendCCDBOutputOrbitReset[currentOutLane] = false;
       mSendCCDBOutputGRPECS[currentOutLane] = false;
-      pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair<long, int>{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()});
+      pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair<long, int>{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()});
     }
 
     forwardOrbitInfo(pc, currentBuffer, relTF, currentOutLane);
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
       auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
       const unsigned int cru = tpcCRUHeader->subSpecification >> 7;
 
@@ -204,7 +200,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task
       // to keep track of processed CRUs
       mProcessedCRUs[currentBuffer][relTF][cru] = true;
 
-      sendOutput(pc, currentOutLane, cru, pc.inputs().get<pmr::vector<uint16_t>>(ref));
+      sendOutput(pc, currentOutLane, cru, pc.inputs().get<o2::pmr::vector<uint16_t>>(ref));
     }
 
     LOGP(detail, "Number of received CRUs for current TF: {} Needed a total number of processed CRUs of: {} Current TF: {}", mProcessedCRU[currentBuffer][relTF], mCRUs.size(), tf);
@@ -223,7 +219,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task
     }
   }
 
-  void endOfStream(o2::framework::EndOfStreamContext& ec) final { ec.services().get<ControlService>().readyToQuit(QuitRequest::Me); }
+  void endOfStream(o2::framework::EndOfStreamContext& ec) final { ec.services().get<o2::framework::ControlService>().readyToQuit(o2::framework::QuitRequest::Me); }
 
   /// Return data description for aggregated CMVs for a given lane
   static header::DataDescription getDataDescriptionCMV(const unsigned int lane)
@@ -267,8 +263,8 @@ class TPCDistributeCMVSpec : public o2::framework::Task
   std::array<int, 2> mStartNTFsDataDrop{0};                                            ///< first relative TF index to check for missing data in each buffer
   long mProcessedTotalData{0};                                                         ///< call counter used to throttle checkIntervalsForMissingData checks
   int mCheckEveryNData{1};                                                             ///< check for missing data every N run() calls (0 → default = mTimeFrames/2)
-  std::vector<InputSpec> mFilter{};                                                    ///< filter for looping over CMVGROUP input data from FLPs
-  std::vector<InputSpec> mOrbitFilter{};                                               ///< filter for CMVORBITINFO input from FLPs
+  std::vector<o2::framework::InputSpec> mFilter{};                                     ///< filter for looping over CMVGROUP input data from FLPs
+  std::vector<o2::framework::InputSpec> mOrbitFilter{};                                ///< filter for CMVORBITINFO input from FLPs
   std::vector<header::DataDescription> mDataDescrOut{};                                ///< per-output-lane CMV data descriptions (CMVAGG0, CMVAGG1, …)
   std::vector<header::DataDescription> mOrbitDescrOut{};                               ///< per-output-lane orbit-info data descriptions (CMVORB0, CMVORB1, …)
   std::array<std::vector<bool>, 2> mOrbitInfoForwarded{};                              ///< tracks whether orbit/BC has been forwarded to the aggregate lane per (buffer, relTF)
@@ -280,12 +276,12 @@ class TPCDistributeCMVSpec : public o2::framework::Task
 
   void sendOutput(o2::framework::ProcessingContext& pc, const unsigned int currentOutLane, const unsigned int cru, o2::pmr::vector<uint16_t> cmvs)
   {
-    pc.outputs().adoptContainer(Output{gDataOriginTPC, mDataDescrOut[currentOutLane], header::DataHeader::SubSpecificationType{cru}}, std::move(cmvs));
+    pc.outputs().adoptContainer(o2::framework::Output{o2::header::gDataOriginTPC, mDataDescrOut[currentOutLane], header::DataHeader::SubSpecificationType{cru}}, std::move(cmvs));
   }
 
   void sendOrbitInfo(o2::framework::ProcessingContext& pc, const unsigned int outLane, const uint64_t orbitInfo)
   {
-    pc.outputs().snapshot(Output{gDataOriginTPC, mOrbitDescrOut[outLane], header::DataHeader::SubSpecificationType{outLane}}, orbitInfo);
+    pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, mOrbitDescrOut[outLane], header::DataHeader::SubSpecificationType{outLane}}, orbitInfo);
   }
 
   void forwardOrbitInfo(o2::framework::ProcessingContext& pc, const bool currentBuffer, const unsigned int relTF, const unsigned int currentOutLane)
@@ -294,7 +290,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task
       return;
     }
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) {
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) {
       auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
       const unsigned int cru = hdr->subSpecification >> 7;
       if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) {
@@ -313,17 +309,17 @@ class TPCDistributeCMVSpec : public o2::framework::Task
 
     if (mSendOutputStartInfo[mBuffer] && (mTFStart[mBuffer] >= 0)) {
       mSendOutputStartInfo[mBuffer] = false;
-      pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[mBuffer]);
+      pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{currentOutLane}}, mTFStart[mBuffer]);
     }
 
     if (mSendCCDBOutputOrbitReset[currentOutLane] && mSendCCDBOutputGRPECS[currentOutLane]) {
       mSendCCDBOutputOrbitReset[currentOutLane] = false;
       mSendCCDBOutputGRPECS[currentOutLane] = false;
-      pc.outputs().snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair<long, int>{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()});
+      pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{currentOutLane}}, dataformats::Pair<long, int>{o2::base::GRPGeomHelper::instance().getOrbitResetTimeMS(), o2::base::GRPGeomHelper::instance().getNHBFPerTF()});
     }
 
     if (!mOrbitInfoForwarded[mBuffer].empty()) {
-      for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) {
         auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
         const unsigned int cru = hdr->subSpecification >> 7;
         if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) {
@@ -334,13 +330,13 @@ class TPCDistributeCMVSpec : public o2::framework::Task
       }
     }
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
       auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
       const unsigned int cru = hdr->subSpecification >> 7;
       if (!std::binary_search(mCRUs.begin(), mCRUs.end(), cru)) {
         continue;
       }
-      sendOutput(pc, currentOutLane, cru, pc.inputs().get<pmr::vector<uint16_t>>(ref));
+      sendOutput(pc, currentOutLane, cru, pc.inputs().get<o2::pmr::vector<uint16_t>>(ref));
     }
   }
 
@@ -398,7 +394,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task
         for (auto& it : mProcessedCRUs[currentBuffer][iTF]) {
           if (!it.second) {
             it.second = true;
-            sendOutput(pc, outLane, it.first, pmr::vector<uint16_t>());
+            sendOutput(pc, outLane, it.first, o2::pmr::vector<uint16_t>());
           }
         }
 
@@ -417,7 +413,7 @@ class TPCDistributeCMVSpec : public o2::framework::Task
       mNFactorTFs = 0;
       // ToDo: Find better fix. Set oldestForChannel to a very large value so the DPL dispatcher does not block waiting for older TF data that will never arrive
       for (unsigned int ilane = 0; ilane < mOutLanes; ++ilane) {
-        auto& deviceProxy = pc.services().get<FairMQDeviceProxy>();
+        auto& deviceProxy = pc.services().get<o2::framework::FairMQDeviceProxy>();
         auto& state = deviceProxy.getOutputChannelState({static_cast<int>(ilane)});
         size_t oldest = std::numeric_limits<size_t>::max() - 1;
         state.oldestForChannel = {oldest};
@@ -431,18 +427,18 @@ class TPCDistributeCMVSpec : public o2::framework::Task
   }
 };
 
-DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector<uint32_t>& crus, const unsigned int timeframes, const unsigned int outlanes, const int firstTF, const bool sendPrecisetimeStamp = false, const int nTFsBuffer = 1)
+o2::framework::DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector<uint32_t>& crus, const unsigned int timeframes, const unsigned int outlanes, const int firstTF, const bool sendPrecisetimeStamp = false, const int nTFsBuffer = 1)
 {
-  std::vector<InputSpec> inputSpecs;
-  inputSpecs.emplace_back(InputSpec{"cmvsgroup", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, Lifetime::Sporadic});
-  inputSpecs.emplace_back(InputSpec{"cmvorbit", ConcreteDataTypeMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, Lifetime::Sporadic});
+  std::vector<o2::framework::InputSpec> inputSpecs;
+  inputSpecs.emplace_back(o2::framework::InputSpec{"cmvsgroup", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup()}, o2::framework::Lifetime::Sporadic});
+  inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbit", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo()}, o2::framework::Lifetime::Sporadic});
 
-  std::vector<OutputSpec> outputSpecs;
+  std::vector<o2::framework::OutputSpec> outputSpecs;
   outputSpecs.reserve(3 * outlanes);
   for (unsigned int lane = 0; lane < outlanes; ++lane) {
-    outputSpecs.emplace_back(ConcreteDataTypeMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, Lifetime::Sporadic);
-    outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{lane}}, Lifetime::Sporadic);
-    outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{lane}}, Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMV(lane)}, o2::framework::Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitInfo(lane), header::DataHeader::SubSpecificationType{lane}}, o2::framework::Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVFirstTF(), header::DataHeader::SubSpecificationType{lane}}, o2::framework::Lifetime::Sporadic);
   }
 
   // Only lane 0 fetches CCDB orbit-reset/GRPECS objects and broadcasts them to all aggregate lanes, the other distribute lanes do not need them, avoiding redundant CCDB requests
@@ -450,7 +446,7 @@ DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector<uin
   if (sendPrecisetimeStamp && (ilane == 0)) {
     fetchCCDB = true;
     for (unsigned int lane = 0; lane < outlanes; ++lane) {
-      outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{lane}}, Lifetime::Sporadic);
+      outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCDistributeCMVSpec::getDataDescriptionCMVOrbitReset(), header::DataHeader::SubSpecificationType{lane}}, o2::framework::Lifetime::Sporadic);
     }
   }
 
@@ -463,14 +459,14 @@ DataProcessorSpec getTPCDistributeCMVSpec(const int ilane, const std::vector<uin
                                                                 inputSpecs);
 
   const auto id = fmt::format("tpc-distribute-cmv-{:02}", ilane);
-  DataProcessorSpec spec{
+  o2::framework::DataProcessorSpec spec{
     id.data(),
     inputSpecs,
     outputSpecs,
-    AlgorithmSpec{adaptFromTask<TPCDistributeCMVSpec>(crus, timeframes, nTFsBuffer, outlanes, firstTF, ccdbRequest)},
-    Options{{"drop-data-after-nTFs", VariantType::Int, 0, {"Number of TFs after which to drop the data."}},
-            {"check-data-every-n", VariantType::Int, 0, {"Number of run function called after which to check for missing data (-1 for no checking, 0 for default checking)."}},
-            {"nFactorTFs", VariantType::Int, 1000, {"Number of TFs to skip for sending oldest TF."}}}};
+    o2::framework::AlgorithmSpec{o2::framework::adaptFromTask<TPCDistributeCMVSpec>(crus, timeframes, nTFsBuffer, outlanes, firstTF, ccdbRequest)},
+    o2::framework::Options{{"drop-data-after-nTFs", o2::framework::VariantType::Int, 0, {"Number of TFs after which to drop the data."}},
+                           {"check-data-every-n", o2::framework::VariantType::Int, 0, {"Number of run function called after which to check for missing data (-1 for no checking, 0 for default checking)."}},
+                           {"nFactorTFs", o2::framework::VariantType::Int, 1000, {"Number of TFs to skip for sending oldest TF."}}}};
   spec.rank = ilane;
   return spec;
 }
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h
index 9931c27c9d3fa..d86356234a1c2 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFLPCMVSpec.h
@@ -13,8 +13,8 @@
 /// @author Tuba Gündem, tuba.gundem@cern.ch
 /// @brief TPC device for processing CMVs on FLPs
 
-#ifndef O2_TPCFLPIDCSPEC_H
-#define O2_TPCFLPIDCSPEC_H
+#ifndef O2_TPCFLPCMVSPEC_H
+#define O2_TPCFLPCMVSPEC_H
 
 #include <vector>
 #include <unordered_map>
@@ -28,24 +28,27 @@
 #include "Headers/DataHeader.h"
 #include "TPCWorkflow/ProcessingHelpers.h"
 #include "TPCBase/CRU.h"
+#include "DataFormatsTPC/CMV.h"
 #include "TFile.h"
 
-using namespace o2::framework;
-using o2::header::gDataOriginTPC;
-using namespace o2::tpc;
-
 namespace o2::tpc
 {
 
 class TPCFLPCMVDevice : public o2::framework::Task
 {
  public:
-  TPCFLPCMVDevice(const int lane, const std::vector<uint32_t>& crus, const int nTFsBuffer)
-    : mLane{lane}, mCRUs{crus}, mNTFsBuffer{nTFsBuffer} {}
+  TPCFLPCMVDevice(const int lane, const std::vector<uint32_t>& crus, const bool triggerPerFlp, const int nTFsBuffer)
+    : mLane{lane}, mCRUs{crus}, mTriggerPerFLP{triggerPerFlp}, mNTFsBuffer{nTFsBuffer} {}
 
   void init(o2::framework::InitContext& ic) final
   {
     mDumpCMVs = ic.options().get<bool>("dump-cmvs-flp");
+    mEnableTrigger = ic.options().get<bool>("trigger");
+    mTriggerThresholdCMV = ic.options().get<float>("trigger-threshold-cmv");
+    mTriggerThresholdMeanMax = ic.options().get<float>("trigger-threshold-cmvMeanMax");
+    mTriggerThresholdMeanMin = ic.options().get<float>("trigger-threshold-cmvMeanMin");
+    mTriggerTimebinMin = ic.options().get<int>("trigger-threshold-timebinMin");
+    mTriggerTimebinMax = ic.options().get<int>("trigger-threshold-timebinMax");
   }
 
   void run(o2::framework::ProcessingContext& pc) final
@@ -56,7 +59,7 @@ class TPCFLPCMVDevice : public o2::framework::Task
 
     // Capture heartbeatOrbit / heartbeatBC from the first TF in the buffer
     if (mCountTFsForBuffer == 1) {
-      for (auto& ref : InputRecordWalker(pc.inputs(), mOrbitFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mOrbitFilter)) {
         auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
         const uint32_t cru = hdr->subSpecification >> 7;
         if (mFirstOrbitBC.find(cru) == mFirstOrbitBC.end()) {
@@ -68,11 +71,23 @@ class TPCFLPCMVDevice : public o2::framework::Task
       }
     }
 
-    for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
+    bool triggered = false;
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
       auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
-      const int cru = tpcCRUHeader->subSpecification >> 7;
+      const uint32_t cru = tpcCRUHeader->subSpecification >> 7;
       auto vecCMVs = pc.inputs().get<o2::pmr::vector<uint16_t>>(ref);
       mCMVs[cru].insert(mCMVs[cru].end(), vecCMVs.begin(), vecCMVs.end());
+
+      const bool cruTriggered = mEnableTrigger && evaluateTrigger(vecCMVs);
+      if (!mTriggerPerFLP) {
+        pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVTrigger(), tpcCRUHeader->subSpecification}, cruTriggered);
+      } else {
+        triggered |= cruTriggered;
+      }
+    }
+    if (mTriggerPerFLP) {
+      const header::DataHeader::SubSpecificationType trigSubSpec{mCRUs.front() << 7};
+      pc.outputs().snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVTrigger(), trigSubSpec}, triggered);
     }
 
     if (mCountTFsForBuffer >= mNTFsBuffer) {
@@ -86,7 +101,7 @@ class TPCFLPCMVDevice : public o2::framework::Task
 
     if (mDumpCMVs) {
       TFile fOut(fmt::format("CMVs_{}_tf_{}.root", mLane, processing_helpers::getCurrentTF(pc)).data(), "RECREATE");
-      for (auto& ref : InputRecordWalker(pc.inputs(), mFilter)) {
+      for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
         auto const* tpcCRUHeader = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
         const int cru = tpcCRUHeader->subSpecification >> 7;
         auto vec = pc.inputs().get<std::vector<uint16_t>>(ref);
@@ -103,7 +118,7 @@ class TPCFLPCMVDevice : public o2::framework::Task
         sendOutput(ec.outputs(), cru);
       }
     }
-    ec.services().get<ControlService>().readyToQuit(QuitRequest::Me);
+    ec.services().get<o2::framework::ControlService>().readyToQuit(o2::framework::QuitRequest::Me);
   }
 
   static constexpr header::DataDescription getDataDescriptionCMVGroup() { return header::DataDescription{"CMVGROUP"}; }
@@ -111,21 +126,65 @@ class TPCFLPCMVDevice : public o2::framework::Task
   /// Data description for the packed (orbit<<32|bc) scalar forwarded alongside each CRU's CMVGROUP.
   static constexpr header::DataDescription getDataDescriptionCMVOrbitInfo() { return header::DataDescription{"CMVORBITINFO"}; }
 
+  /// Data description for the per-CRU per-TF trigger flag (empty span = not triggered or disabled; {1} = triggered).
+  static constexpr header::DataDescription getDataDescriptionCMVTrigger() { return header::DataDescription{"CMVTRIGGER"}; }
+
  private:
   const int mLane{};                                                   ///< lane number of processor
   const std::vector<uint32_t> mCRUs{};                                 ///< CRUs to process in this instance
   int mNTFsBuffer{1};                                                  ///< number of TFs to buffer before sending
   bool mDumpCMVs{};                                                    ///< dump CMVs to file for debugging
+  bool mTriggerPerFLP{false};                                          ///< send per-FLP trigger decision aggregated over CRUs
   int mCountTFsForBuffer{0};                                           ///< counts TFs to track when to send output
   std::unordered_map<unsigned int, o2::pmr::vector<uint16_t>> mCMVs{}; ///< buffered raw 16-bit CMV values per CRU
   std::unordered_map<uint32_t, uint64_t> mFirstOrbitBC{};              ///< first packed orbit/BC per CRU for the current buffer window
+  bool mEnableTrigger{false};                                          ///< enable CMV trigger evaluation
+  float mTriggerThresholdCMV{-10.f};                                   ///< CMV value threshold: trigger sequence starts when value drops below this
+  float mTriggerThresholdMeanMax{-40.f};                               ///< upper bound on trigger-sequence mean CMV value
+  float mTriggerThresholdMeanMin{-80.f};                               ///< lower bound on trigger-sequence mean CMV value
+  int mTriggerTimebinMin{4};                                           ///< minimum trigger-sequence length (timebins) to accept
+  int mTriggerTimebinMax{-1};                                          ///< maximum trigger-sequence length (timebins) to accept; -1 disables
 
   /// Filter for CMV float vectors (one CMVVECTOR message per CRU per TF)
-  const std::vector<InputSpec> mFilter = {{"cmvs", ConcreteDataTypeMatcher{gDataOriginTPC, "CMVVECTOR"}, Lifetime::Timeframe}};
+  const std::vector<o2::framework::InputSpec> mFilter = {{"cmvs", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CMVVECTOR"}, o2::framework::Lifetime::Timeframe}};
   /// Filter for CMV packet timing info (one CMVORBITS message per CRU per TF, sent by CMVToVectorSpec)
-  const std::vector<InputSpec> mOrbitFilter = {{"cmvorbits", ConcreteDataTypeMatcher{gDataOriginTPC, "CMVORBITS"}, Lifetime::Timeframe}};
+  const std::vector<o2::framework::InputSpec> mOrbitFilter = {{"cmvorbits", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CMVORBITS"}, o2::framework::Lifetime::Timeframe}};
+
+  // Scan a CRU's CMV vector for contiguous below-threshold sequences.
+  // Returns true as soon as one sequence satisfies both the length and mean criteria.
+  bool evaluateTrigger(const o2::pmr::vector<uint16_t>& cmvs) const
+  {
+    float seqSum = 0.f;
+    int seqLen = 0;
+
+    auto checkSequence = [&]() -> bool {
+      if (seqLen == 0) {
+        return false;
+      }
+      const float mean = seqSum / seqLen;
+      return (seqLen >= mTriggerTimebinMin) &&
+             (mTriggerTimebinMax < 0 || seqLen <= mTriggerTimebinMax) &&
+             (mean >= mTriggerThresholdMeanMin) &&
+             (mean <= mTriggerThresholdMeanMax);
+    };
+
+    for (const auto raw : cmvs) {
+      const float val = cmv::Data{raw}.getCMVFloat();
+      if (val < mTriggerThresholdCMV) {
+        seqSum += val;
+        ++seqLen;
+      } else {
+        if (checkSequence()) {
+          return true;
+        }
+        seqLen = 0;
+        seqSum = 0.f;
+      }
+    }
+    return checkSequence(); // trailing sequence that reached end of buffer
+  }
 
-  void sendOutput(DataAllocator& output, const uint32_t cru)
+  void sendOutput(o2::framework::DataAllocator& output, const uint32_t cru)
   {
     const header::DataHeader::SubSpecificationType subSpec{cru << 7};
 
@@ -134,39 +193,54 @@ class TPCFLPCMVDevice : public o2::framework::Task
     if (auto it = mFirstOrbitBC.find(cru); it != mFirstOrbitBC.end()) {
       orbitBC = it->second;
     }
-    output.snapshot(Output{gDataOriginTPC, getDataDescriptionCMVOrbitInfo(), subSpec}, orbitBC);
+    output.snapshot(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVOrbitInfo(), subSpec}, orbitBC);
 
-    output.adoptContainer(Output{gDataOriginTPC, getDataDescriptionCMVGroup(), subSpec}, std::move(mCMVs[cru]));
+    output.adoptContainer(o2::framework::Output{o2::header::gDataOriginTPC, getDataDescriptionCMVGroup(), subSpec}, std::move(mCMVs[cru]));
   }
 };
 
-DataProcessorSpec getTPCFLPCMVSpec(const int ilane, const std::vector<uint32_t>& crus, const int nTFsBuffer = 1)
+o2::framework::DataProcessorSpec getTPCFLPCMVSpec(const int ilane, const std::vector<uint32_t>& crus, const bool triggerPerFlp, const int nTFsBuffer = 1)
 {
-  std::vector<OutputSpec> outputSpecs;
-  std::vector<InputSpec> inputSpecs;
-  outputSpecs.reserve(crus.size());
-  inputSpecs.reserve(crus.size());
+  std::vector<o2::framework::OutputSpec> outputSpecs;
+  std::vector<o2::framework::InputSpec> inputSpecs;
+  outputSpecs.reserve(crus.size() * 2 + 1);
+  inputSpecs.reserve(crus.size() * 2);
 
   for (const auto& cru : crus) {
     const header::DataHeader::SubSpecificationType subSpec{cru << 7};
 
     // Inputs from CMVToVectorSpec
-    inputSpecs.emplace_back(InputSpec{"cmvs", gDataOriginTPC, "CMVVECTOR", subSpec, Lifetime::Timeframe});
-    inputSpecs.emplace_back(InputSpec{"cmvorbits", gDataOriginTPC, "CMVORBITS", subSpec, Lifetime::Timeframe});
+    inputSpecs.emplace_back(o2::framework::InputSpec{"cmvs", o2::header::gDataOriginTPC, "CMVVECTOR", subSpec, o2::framework::Lifetime::Timeframe});
+    inputSpecs.emplace_back(o2::framework::InputSpec{"cmvorbits", o2::header::gDataOriginTPC, "CMVORBITS", subSpec, o2::framework::Lifetime::Timeframe});
 
     // Outputs to TPCDistributeCMVSpec
-    outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup(), subSpec}, Lifetime::Sporadic);
-    outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo(), subSpec}, Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVGroup(), subSpec}, o2::framework::Lifetime::Sporadic);
+    outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVOrbitInfo(), subSpec}, o2::framework::Lifetime::Sporadic);
+
+    if (!triggerPerFlp) {
+      outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVTrigger(), subSpec}, o2::framework::Lifetime::Timeframe);
+    }
+  }
+  if (triggerPerFlp) { // Single per-FLP trigger output, subspec keyed on the first CRU
+    const header::DataHeader::SubSpecificationType trigSubSpec{crus.front() << 7};
+    outputSpecs.emplace_back(o2::framework::ConcreteDataMatcher{o2::header::gDataOriginTPC, TPCFLPCMVDevice::getDataDescriptionCMVTrigger(), trigSubSpec}, o2::framework::Lifetime::Timeframe);
   }
 
   const auto id = fmt::format("tpc-flp-cmv-{:02}", ilane);
-  return DataProcessorSpec{
+  return o2::framework::DataProcessorSpec{
     id.data(),
     inputSpecs,
     outputSpecs,
-    AlgorithmSpec{adaptFromTask<TPCFLPCMVDevice>(ilane, crus, nTFsBuffer)},
-    Options{{"dump-cmvs-flp", VariantType::Bool, false, {"Dump CMVs to file"}}}};
+    o2::framework::AlgorithmSpec{o2::framework::adaptFromTask<TPCFLPCMVDevice>(ilane, crus, triggerPerFlp, nTFsBuffer)},
+    o2::framework::Options{
+      {"dump-cmvs-flp", o2::framework::VariantType::Bool, false, {"Dump CMVs to file"}},
+      {"trigger", o2::framework::VariantType::Bool, false, {"Enable CMV trigger evaluation"}},
+      {"trigger-threshold-cmv", o2::framework::VariantType::Float, -10.f, {"CMV threshold: sequence starts when value drops below this (ADC units)"}},
+      {"trigger-threshold-cmvMeanMax", o2::framework::VariantType::Float, -40.f, {"Upper bound on trigger-sequence mean CMV value"}},
+      {"trigger-threshold-cmvMeanMin", o2::framework::VariantType::Float, -80.f, {"Lower bound on trigger-sequence mean CMV value"}},
+      {"trigger-threshold-timebinMin", o2::framework::VariantType::Int, 4, {"Minimum trigger-sequence length in timebins"}},
+      {"trigger-threshold-timebinMax", o2::framework::VariantType::Int, -1, {"Maximum trigger-sequence length in timebins (-1 disables upper bound)"}}}};
 }
 
 } // namespace o2::tpc
-#endif
\ No newline at end of file
+#endif
diff --git a/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx b/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx
index 81ce358d1a809..86cf4ca97aa19 100644
--- a/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx
+++ b/Detectors/TPC/workflow/src/CMVToVectorSpec.cxx
@@ -76,7 +76,6 @@ class CMVToVectorDevice : public o2::framework::Task
   {
     const auto runNumber = processing_helpers::getRunNumber(pc);
     std::vector<InputSpec> filter = {{"check", ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "RAWDATA"}, Lifetime::Timeframe}};
-    const auto& mapper = Mapper::instance();
 
     // open files if necessary
     if ((mWriteDebug || mWriteDebugOnError) && !mDebugStream) {
@@ -95,10 +94,7 @@ class CMVToVectorDevice : public o2::framework::Task
       mRawOutputFile.open(rawFileName, std::ios::binary);
     }
 
-    uint32_t heartbeatOrbit = 0;
-    uint16_t heartbeatBC = 0;
     uint32_t tfCounter = 0;
-    bool first = true;
     bool hasErrors = false;
 
     for (auto const& ref : InputRecordWalker(pc.inputs(), filter)) {
@@ -149,7 +145,7 @@ class CMVToVectorDevice : public o2::framework::Task
           LOGP(debug, "Processing firstTForbit {:9}, tfCounter {:5}, run {:6}, feeId {:6}, cruID {:3}, link {:2}", dh->firstTForbit, dh->tfCounter, dh->runNumber, feeId, cruID, link);
 
           if (std::find(mCRUs.begin(), mCRUs.end(), cruID) == mCRUs.end()) {
-            LOGP(warning, "CMV CRU {:3} not configured in CRUs, skipping", cruID);
+            // LOGP(debug, "CMV CRU {:3} not configured in CRUs, skipping", cruID);
             continue;
           }
 
@@ -171,7 +167,7 @@ class CMVToVectorDevice : public o2::framework::Task
           cmvVec.reserve(cmvVec.size() + cmv::NTimeBinsPerPacket);
           for (uint32_t tb = 0; tb < cmv::NTimeBinsPerPacket; ++tb) {
             cmvVec.push_back(cmvs.getCMV(tb));
-            // LOGP(debug, "Appended CMV {} for timebin {}, CRU {}, orbit {}, bc {}", cmvs.getCMV(tb), tb, cruID, orbit, bc);
+            // LOGP(debug, "For CRU {}, timebin {}, orbit {}, bc {}, appended CMV {} float: {}", cruID, tb, orbit, bc, cmvs.getCMV(tb), cmvs.getCMVFloat(tb));
           }
         }
       } catch (const std::exception& e) {
@@ -204,7 +200,7 @@ class CMVToVectorDevice : public o2::framework::Task
       }
     }
 
-    hasErrors |= snapshotCMVs(pc.outputs(), tfCounter);
+    hasErrors |= snapshotCMVs(pc.outputs());
 
     if (mWriteDebug || (mWriteDebugOnError && hasErrors)) {
       writeDebugOutput(tfCounter);
@@ -274,7 +270,7 @@ class CMVToVectorDevice : public o2::framework::Task
   std::string mRawOutputFileName;                                  ///< name of the raw output file
 
   //____________________________________________________________________________
-  bool snapshotCMVs(DataAllocator& output, uint32_t tfCounter)
+  bool snapshotCMVs(DataAllocator& output)
   {
     bool hasErrors = false;
 
@@ -321,12 +317,8 @@ class CMVToVectorDevice : public o2::framework::Task
   //____________________________________________________________________________
   void writeDebugOutput(uint32_t tfCounter)
   {
-    const auto& mapper = Mapper::instance();
-
     mDebugStream->GetFile()->cd();
     auto& stream = (*mDebugStream) << "cmvs";
-    uint32_t seen = 0;
-    static uint32_t firstOrbit = std::numeric_limits<uint32_t>::max();
 
     for (auto cru : mCRUs) {
       if (mCMVInfos.find(cru) == mCMVInfos.end()) {
@@ -404,7 +396,7 @@ class CMVToVectorDevice : public o2::framework::Task
   }
 };
 
-o2::framework::DataProcessorSpec getCMVToVectorSpec(const std::string inputSpec, std::vector<uint32_t> const& crus)
+o2::framework::DataProcessorSpec getCMVToVectorSpec(std::string const& inputSpec, std::vector<uint32_t> const& crus)
 {
   using device = o2::tpc::CMVToVectorDevice;
 
diff --git a/Detectors/TPC/workflow/src/TPCScalerSpec.cxx b/Detectors/TPC/workflow/src/TPCScalerSpec.cxx
index 8e2a78d69757b..1df192dd5ec00 100644
--- a/Detectors/TPC/workflow/src/TPCScalerSpec.cxx
+++ b/Detectors/TPC/workflow/src/TPCScalerSpec.cxx
@@ -183,40 +183,45 @@ class TPCScalerSpec : public Task
 
   void buildMap(ProcessingContext& pc)
   {
-    // reference map
-    auto* corrMap = mTPCCorrMapsLoader.getCorrMap();
-
-    // // new correction map
+    const auto lumiMode = mTPCCorrMapsLoader.getLumiScaleMode();
     o2::gpu::TPCFastTransform finalMap;
-    finalMap.cloneFromObject(*corrMap, nullptr);
-    finalMap.setApplyCorrectionOn();
-
-    const auto* corrMapRef = mTPCCorrMapsLoader.getCorrMapRef();
-    const float lumiScale = mTPCCorrMapsLoader.getLumiScale();
     std::vector<std::pair<const o2::gpu::TPCFastSpaceChargeCorrection*, float>> additionalCorrections;
 
-    // if standard scaling is used: map(lumi) = (mean_map - ref_map) * lumiScale + ref_map
-    if (mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::Linear) {
-      const std::vector<std::pair<const o2::gpu::TPCFastSpaceChargeCorrection*, float>> step0{{&(corrMapRef->getCorrection()), -1.f}};
-      // finalMap = (mean_map - finalMap)
-      TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, step0, true);
-
-      // finalMap = finalMap * lumiScale + ref_map
-      const std::vector<std::pair<const o2::gpu::TPCFastSpaceChargeCorrection*, float>> step1{{&(corrMapRef->getCorrection()), 1.f}};
-      TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), lumiScale, step1, true);
-
-    } else if (mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::DerivativeMap || mTPCCorrMapsLoader.getLumiScaleMode() == LumiScaleMode::DerivativeMapMC) {
-      additionalCorrections.emplace_back(&(corrMapRef->getCorrection()), lumiScale);
-    }
+    if (lumiMode == LumiScaleMode::NoCorrection) {
+      std::unique_ptr<o2::gpu::TPCFastTransform> dummy(TPCFastTransformHelperO2::instance()->create(0));
+      finalMap.cloneFromObject(*dummy, nullptr);
+      finalMap.setApplyCorrectionOff();
+    } else {
+      auto* corrMap = mTPCCorrMapsLoader.getCorrMap();
+      const auto* corrMapRef = mTPCCorrMapsLoader.getCorrMapRef();
+      finalMap.cloneFromObject(lumiMode == LumiScaleMode::StaticMapOnly && corrMapRef ? *corrMapRef : *corrMap, nullptr);
+      finalMap.setApplyCorrectionOn();
+
+      const float lumiScale = mTPCCorrMapsLoader.getLumiScale();
+
+      // if standard scaling is used: map(lumi) = (mean_map - ref_map) * lumiScale + ref_map
+      if (lumiMode == LumiScaleMode::Linear) {
+        const std::vector<std::pair<const o2::gpu::TPCFastSpaceChargeCorrection*, float>> step0{{&(corrMapRef->getCorrection()), -1.f}};
+        // finalMap = (mean_map - finalMap)
+        TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, step0, true);
+
+        // finalMap = finalMap * lumiScale + ref_map
+        const std::vector<std::pair<const o2::gpu::TPCFastSpaceChargeCorrection*, float>> step1{{&(corrMapRef->getCorrection()), 1.f}};
+        TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), lumiScale, step1, true);
+
+      } else if (lumiMode == LumiScaleMode::DerivativeMap || lumiMode == LumiScaleMode::DerivativeMapMC) {
+        additionalCorrections.emplace_back(&(corrMapRef->getCorrection()), lumiScale);
+      }
 
-    // if mshape map valid
-    if (!mTPCCorrMapsLoader.isCorrMapMShapeDummy()) {
-      LOGP(info, "Adding M-shape correction to the final map with scaling factor {}", mMShapeScalingFac);
-      additionalCorrections.emplace_back(&(mTPCCorrMapsLoader.getCorrMapMShape()->getCorrection()), 1.f);
-    }
+      // if mshape map valid
+      if (!mTPCCorrMapsLoader.isCorrMapMShapeDummy()) {
+        LOGP(info, "Adding M-shape correction to the final map with scaling factor {}", mMShapeScalingFac);
+        additionalCorrections.emplace_back(&(mTPCCorrMapsLoader.getCorrMapMShape()->getCorrection()), 1.f);
+      }
 
-    if (!additionalCorrections.empty()) {
-      TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, additionalCorrections, true);
+      if (!additionalCorrections.empty()) {
+        TPCFastSpaceChargeCorrectionHelper::instance()->mergeCorrections(finalMap.getCorrection(), 1, additionalCorrections, true);
+      }
     }
 
     Output corrMapOutput{header::gDataOriginTPC, "TPCCORRMAP", 0};
diff --git a/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx b/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx
index f41fe5b8fbd15..b7734c5d0b24f 100644
--- a/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx
+++ b/Detectors/TPC/workflow/src/tpc-flp-cmv.cxx
@@ -32,6 +32,7 @@ void customize(std::vector<ConfigParamSpec>& workflowOptions)
     {"time-lanes", VariantType::Int, 1, {"Number of parallel processing lanes (timeframes are split per device)"}},
     {"crus", VariantType::String, cruDefault.c_str(), {"List of CRUs, comma separated ranges, e.g. 0-3,7,9-15"}},
     {"n-TFs-buffer", VariantType::Int, 1, {"Buffer n-TFs before sending output"}},
+    {"trigger-per-flp", VariantType::Bool, false, {"Aggregate triggers of CRUs on FLP to a single trigger"}},
     {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}};
 
   std::swap(workflowOptions, options);
@@ -48,6 +49,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config)
   const auto nLanes = std::min(static_cast<unsigned long>(config.options().get<int>("lanes")), nCRUs);
   const auto time_lanes = static_cast<unsigned int>(config.options().get<int>("time-lanes"));
   const auto crusPerLane = nCRUs / nLanes + ((nCRUs % nLanes) != 0);
+  const bool triggerPerFLP = config.options().get<bool>("trigger-per-flp");
   const int nTFsBuffer = config.options().get<int>("n-TFs-buffer");
 
   o2::conf::ConfigurableParam::updateFromFile(config.options().get<std::string>("configFile"));
@@ -65,8 +67,8 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config)
     }
     const auto last = std::min(tpcCRUs.end(), first + crusPerLane);
     const std::vector<uint32_t> rangeCRUs(first, last);
-    workflow.emplace_back(timePipeline(getTPCFLPCMVSpec(ilane, rangeCRUs, nTFsBuffer), time_lanes));
+    workflow.emplace_back(timePipeline(getTPCFLPCMVSpec(ilane, rangeCRUs, triggerPerFLP, nTFsBuffer), time_lanes));
   }
 
   return workflow;
-}
\ No newline at end of file
+}
diff --git a/Detectors/TPC/workflow/test/test_cmv-trigger.cxx b/Detectors/TPC/workflow/test/test_cmv-trigger.cxx
new file mode 100644
index 0000000000000..c102a5ae531f4
--- /dev/null
+++ b/Detectors/TPC/workflow/test/test_cmv-trigger.cxx
@@ -0,0 +1,85 @@
+// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// @file   test_cmv-trigger.cxx
+/// @author Tuba Gündem, tuba.gundem@cern.ch
+/// @brief  Test workflow: reads CMVTRIGGER packets from tpc-flp-cmv and logs results
+
+#include <vector>
+#include <string>
+#include "Framework/WorkflowSpec.h"
+#include "Framework/ConfigParamSpec.h"
+#include "Framework/Task.h"
+#include "Framework/ControlService.h"
+#include "Framework/Logger.h"
+#include "Framework/DataProcessorSpec.h"
+#include "Framework/InputRecordWalker.h"
+#include "Framework/DataRefUtils.h"
+#include "Headers/DataHeader.h"
+#include "TPCWorkflow/ProcessingHelpers.h"
+#include "TPCWorkflow/TPCFLPCMVSpec.h"
+
+using namespace o2::framework;
+
+void customize(std::vector<ConfigParamSpec>&) {}
+
+#include "Framework/runDataProcessing.h"
+
+namespace o2::tpc
+{
+
+class CMVTriggerDevice : public o2::framework::Task
+{
+ public:
+  void run(o2::framework::ProcessingContext& pc) final
+  {
+    const auto tf = processing_helpers::getCurrentTF(pc);
+
+    for (auto& ref : o2::framework::InputRecordWalker(pc.inputs(), mFilter)) {
+      auto const* hdr = o2::framework::DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
+      const uint32_t firstCRU = hdr->subSpecification >> 7;
+      const bool triggered = pc.inputs().get<bool>(ref);
+      if (triggered) {
+        LOGP(info, "TF {:6} first CRU {:3}: {}", tf, firstCRU, "triggered");
+      }
+    }
+  }
+
+  void endOfStream(o2::framework::EndOfStreamContext& ec) final
+  {
+    ec.services().get<o2::framework::ControlService>().readyToQuit(o2::framework::QuitRequest::Me);
+  }
+
+ private:
+  const std::vector<o2::framework::InputSpec> mFilter = {
+    {"cmvtrigger", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, o2::tpc::TPCFLPCMVDevice::getDataDescriptionCMVTrigger()}, o2::framework::Lifetime::Timeframe}};
+};
+
+o2::framework::DataProcessorSpec getCMVTriggerSpec()
+{
+  std::vector<o2::framework::InputSpec> inputSpecs;
+  inputSpecs.emplace_back(o2::framework::InputSpec{"cmvtrigger", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, o2::tpc::TPCFLPCMVDevice::getDataDescriptionCMVTrigger()}, o2::framework::Lifetime::Timeframe});
+
+  return o2::framework::DataProcessorSpec{
+    "tpc-cmv-trigger",
+    inputSpecs,
+    {},
+    o2::framework::AlgorithmSpec{o2::framework::adaptFromTask<CMVTriggerDevice>()}};
+}
+
+} // namespace o2::tpc
+
+WorkflowSpec defineDataProcessing(ConfigContext const& config)
+{
+  WorkflowSpec workflow;
+  workflow.emplace_back(o2::tpc::getCMVTriggerSpec());
+  return workflow;
+}
diff --git a/Detectors/TRD/calibration/src/DCSProcessor.cxx b/Detectors/TRD/calibration/src/DCSProcessor.cxx
index f110ba844791e..6f719b71e10c3 100644
--- a/Detectors/TRD/calibration/src/DCSProcessor.cxx
+++ b/Detectors/TRD/calibration/src/DCSProcessor.cxx
@@ -382,7 +382,7 @@ bool DCSProcessor::updateGasDPsCCDB()
   }
   std::map<std::string, std::string> md;
   md["responsible"] = "Ole Schmidt";
-  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSGas, mCcdbGasDPsInfo, "TRD/Calib/DCSDPsGas", md, mGasStartTS, mGasStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY);
+  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSGas, mCcdbGasDPsInfo, "TRD/Calib/DCSDPsGas", md, mGasStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
@@ -410,7 +410,7 @@ bool DCSProcessor::updateCurrentsDPsCCDB()
   }
   std::map<std::string, std::string> md;
   md["responsible"] = "Ole Schmidt";
-  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSCurrents, mCcdbCurrentsDPsInfo, "TRD/Calib/DCSDPsI", md, mCurrentsStartTS, mCurrentsStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY);
+  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSCurrents, mCcdbCurrentsDPsInfo, "TRD/Calib/DCSDPsI", md, mCurrentsStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
@@ -437,7 +437,7 @@ bool DCSProcessor::updateVoltagesDPsCCDB()
   }
   std::map<std::string, std::string> md;
   md["responsible"] = "Ole Schmidt";
-  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSVoltages, mCcdbVoltagesDPsInfo, "TRD/Calib/DCSDPsU", md, mVoltagesStartTS, mVoltagesStartTS + 7 * o2::ccdb::CcdbObjectInfo::DAY);
+  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSVoltages, mCcdbVoltagesDPsInfo, "TRD/Calib/DCSDPsU", md, mVoltagesStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
@@ -465,7 +465,7 @@ bool DCSProcessor::updateEnvDPsCCDB()
   }
   std::map<std::string, std::string> md;
   md["responsible"] = "Leonardo Barreto";
-  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSEnv, mCcdbEnvDPsInfo, "TRD/Calib/DCSDPsEnv", md, mEnvStartTS, mEnvStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY);
+  o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSEnv, mCcdbEnvDPsInfo, "TRD/Calib/DCSDPsEnv", md, mEnvStartTS, mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
@@ -498,7 +498,7 @@ bool DCSProcessor::updateFedChamberStatusDPsCCDB()
   // LB: set start timestamp 30000 miliseconds before DPs are received
   o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSFedChamberStatus, mCcdbFedChamberStatusDPsInfo,
                                                 "TRD/Calib/DCSDPsFedChamberStatus", md, mFedChamberStatusStartTS - 30000,
-                                                mFedChamberStatusStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY);
+                                                mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
@@ -531,7 +531,7 @@ bool DCSProcessor::updateFedCFGtagDPsCCDB()
   // LB: set start timestamp 30000 seconds before DPs are received
   o2::calibration::Utils::prepareCCDBobjectInfo(mTRDDCSFedCFGtag, mCcdbFedCFGtagDPsInfo,
                                                 "TRD/Calib/DCSDPsFedCFGtag", md, mFedCFGtagStartTS - 30000,
-                                                mFedCFGtagStartTS + 3 * o2::ccdb::CcdbObjectInfo::DAY);
+                                                mCurrentTS + 14 * o2::ccdb::CcdbObjectInfo::DAY);
 
   return retVal;
 }
diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h
index db57e7ec06d4b..be88973561e0a 100644
--- a/GPU/Common/GPUCommonAlgorithm.h
+++ b/GPU/Common/GPUCommonAlgorithm.h
@@ -354,6 +354,11 @@ GPUdi() uint8_t warp_broadcast_FUNC<uint8_t>(uint8_t v, int32_t i)
 #define warp_scan_inclusive_add(v) warp_scan_inclusive_add_FUNC(v)
 #define warp_broadcast(v, i) warp_broadcast_FUNC(v, i)
 
+[[nodiscard]] GPUdi() int32_t work_group_count(bool pred)
+{
+  return work_group_reduce_add((int32_t)pred);
+}
+
 #elif (defined(__CUDACC__) || defined(__HIPCC__))
 // CUDA and HIP work the same way using cub, need just different header
 
@@ -416,6 +421,16 @@ GPUdi() T warp_broadcast_FUNC(T v, int32_t i)
 #endif
 }
 
+[[nodiscard]] GPUdi() bool work_group_any(bool pred)
+{
+  return __syncthreads_or(pred);
+}
+
+[[nodiscard]] GPUdi() uint32_t work_group_count(bool pred)
+{
+  return __syncthreads_count(pred);
+}
+
 #else
 // Trivial implementation for the CPU
 
@@ -449,6 +464,16 @@ GPUdi() T warp_broadcast(T v, int32_t i)
   return v;
 }
 
+[[nodiscard]] GPUdi() bool work_group_any(bool pred)
+{
+  return pred;
+}
+
+[[nodiscard]] GPUdi() uint32_t work_group_count(bool pred)
+{
+  return pred;
+}
+
 #endif
 
 #ifdef GPUCA_ALGORITHM_STD
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
index 3d8e749e84147..918b2d459a2d6 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx
@@ -128,9 +128,11 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess*
           tmpClusters[k] = clustersNative->clusters[i][j][k];
           if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
             GPUTPCCompression::truncateSignificantBitsChargeMax(tmpClusters[k].qMax, param);
-            GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
             GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaPadPacked, param);
-            GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
+            if (!tmpClusters[k].isSaturated()) [[likely]] {
+              GPUTPCCompression::truncateSignificantBitsCharge(tmpClusters[k].qTot, param);
+              GPUTPCCompression::truncateSignificantBitsWidth(tmpClusters[k].sigmaTimePacked, param);
+            }
           }
         }
         std::sort(tmpClusters.begin(), tmpClusters.end());
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h
index 4efaa7f33257c..8450c3ee59210 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h
+++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h
@@ -44,7 +44,7 @@ class GPUTPCClusterStatistics
   bool mDecodingError = false;
 
   static constexpr uint32_t P_MAX_QMAX = GPUTPCCompression::P_MAX_QMAX;
-  static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_QTOT;
+  static constexpr uint32_t P_MAX_QTOT = GPUTPCCompression::P_MAX_SATURATED_QTOT;
   static constexpr uint32_t P_MAX_TIME = GPUTPCCompression::P_MAX_TIME;
   static constexpr uint32_t P_MAX_PAD = GPUTPCCompression::P_MAX_PAD;
   static constexpr uint32_t P_MAX_SIGMA = GPUTPCCompression::P_MAX_SIGMA;
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h
index 82e44eda6f3cc..5efe3936067b7 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h
+++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h
@@ -47,14 +47,15 @@ class GPUTPCCompression : public GPUProcessor
 #endif
 
   static constexpr uint32_t P_MAX_QMAX = 1 << 10;
-  static constexpr uint32_t P_MAX_QTOT = 5 * 5 * P_MAX_QMAX;
+  static constexpr uint32_t P_MAX_REGULAR_QTOT = 5 * 5 * P_MAX_QMAX;
+  static constexpr uint32_t P_MAX_SATURATED_QTOT = 1 << 16; // Need two different limits as saturated clusters use full u16 range for qTot
   static constexpr uint32_t P_MAX_TIME = 1 << 24;
   static constexpr uint32_t P_MAX_PAD = 1 << 16;
   static constexpr uint32_t P_MAX_SIGMA = 1 << 8;
   static constexpr uint32_t P_MAX_FLAGS = 1 << 8;
   static constexpr uint32_t P_MAX_QPT = 1 << 8;
 
-  GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QTOT); }
+  GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_REGULAR_QTOT); }
   GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); }
   GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); }
 
diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
index b98f5c28f57b0..bd42c2a2472d4 100644
--- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
+++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
@@ -121,9 +121,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
       uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
       if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
         compressor.truncateSignificantBitsChargeMax(qmax, param);
-        compressor.truncateSignificantBitsCharge(qtot, param);
         compressor.truncateSignificantBitsWidth(sigmapad, param);
-        compressor.truncateSignificantBitsWidth(sigmatime, param);
+        if (!orgCl.isSaturated()) [[likely]] {
+          compressor.truncateSignificantBitsCharge(qtot, param);
+          compressor.truncateSignificantBitsWidth(sigmatime, param);
+        }
       }
       c.qTotA[cidx] = qtot;
       c.qMaxA[cidx] = qmax;
@@ -298,9 +300,11 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
         uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked;
         if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionTruncate) {
           compressor.truncateSignificantBitsChargeMax(qmax, param);
-          compressor.truncateSignificantBitsCharge(qtot, param);
           compressor.truncateSignificantBitsWidth(sigmapad, param);
-          compressor.truncateSignificantBitsWidth(sigmatime, param);
+          if (!orgCl.isSaturated()) [[likely]] {
+            compressor.truncateSignificantBitsCharge(qtot, param);
+            compressor.truncateSignificantBitsWidth(sigmatime, param);
+          }
         }
         c.qTotU[outidx] = qtot;
         c.qMaxU[outidx] = qmax;
diff --git a/GPU/GPUTracking/DataTypes/GPUTPCExtraADC.h b/GPU/GPUTracking/DataTypes/GPUTPCExtraADC.h
new file mode 100644
index 0000000000000..64fc5da833169
--- /dev/null
+++ b/GPU/GPUTracking/DataTypes/GPUTPCExtraADC.h
@@ -0,0 +1,25 @@
+// Copyright 2019-2026 CERN and copyright holders of ALICE O2.
+// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
+// All rights not expressly granted are reserved.
+//
+// This software is distributed under the terms of the GNU General Public
+// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file GPUTPCExtraADC.h
+/// \author Felix Weiglhofer
+
+#include "GPUDefConstantsAndSettings.h"
+#include "DataFormatsTPC/Digit.h"
+#include <array>
+#include <vector>
+
+namespace o2::gpu
+{
+struct GPUTPCExtraADC {
+  std::array<std::vector<tpc::Digit>, tpc::constants::MAXSECTOR> digitsBySector;
+};
+} // namespace o2::gpu
diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h
index 43a5f4f79abdc..eb7d67a913ceb 100644
--- a/GPU/GPUTracking/Definitions/GPUSettingsList.h
+++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h
@@ -112,6 +112,9 @@ AddOptionRTC(trackletMinSharedNormFactor, float, 0.f, "", 0, "Max shared defined
 AddOptionRTC(maxTimeBinAboveThresholdIn1000Bin, uint16_t, 500, "", 0, "Except pad from cluster finding if total number of charges in a fragment is above this baseline (disable = 0)")
 AddOptionRTC(maxConsecTimeBinAboveThreshold, uint16_t, 200, "", 0, "Except pad from cluster finding if number of consecutive charges in a fragment is above this baseline (disable = 0)")
 AddOptionRTC(noisyPadSaturationThreshold, uint16_t, 700, "", 0, "Threshold where a timebin is considered saturated, disabling the noisy pad check for that pad")
+AddOptionRTC(hipTailFilter, uint8_t, 0, "", 0, "Enable Highly Ionising Particle tail filter in CheckPadBaseline (0 = disable, 1 = filter tails)")
+AddOptionRTC(hipTailFilterThreshold, uint16_t, 100, "", 0, "Threshold that must be exceeded for a timebin to be counted towards Highly Ionising Particle tail")
+AddOptionRTC(hipTailFilterAlpha, float, 0.5f, "", 0, "Smoothing factor for the exponential Highly Ionising Particle tail filter")
 AddOptionRTC(occupancyMapTimeBins, uint16_t, 16, "", 0, "Number of timebins per histogram bin of occupancy map (0 = disable occupancy map)")
 AddOptionRTC(occupancyMapTimeBinsAverage, uint16_t, 0, "", 0, "Number of timebins +/- to use for the averaging")
 AddOptionRTC(trackFitCovLimit, uint16_t, 1000, "", 0, "Abort fit when y/z cov exceed the limit")
diff --git a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv
index ef215ba5ca870..823a70b24565b 100644
--- a/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv
+++ b/GPU/GPUTracking/Definitions/Parameters/GPUParameters.csv
@@ -60,6 +60,8 @@ GPUTPCGMO2Output_output,256,,,,,,,,,,,,,,,256
 GPUTPCStartHitsFinder,256,,"[1024, 2]","[1024, 7]",256,256,256,256,256,512,512,512,,,,608
 GPUTPCStartHitsSorter,256,,"[1024, 5]","[512, 7]",256,256,256,256,256,"[512, 1]","[512, 1]","[512, 1]",,,,608
 GPUTPCCFCheckPadBaseline,576,,"[576, 2]","[576, 2]",,,,,,"[576, 2]",,,,,,"[576, 2]"
+GPUTPCCFHIPTailConnector,256,,256,256,,,,,,256,
+GPUTPCCFHIPClusterizer,256,,256,256,,,,,,256,
 GPUTPCCFChargeMapFiller_fillIndexMap,512,,512,512,,,,,,448,,,,,,448
 GPUTPCCFChargeMapFiller_fillFromDigits,512,,512,512,,,,,,448,,,,,,448
 GPUTPCCFChargeMapFiller_findFragmentStart,512,,512,512,,,,,,448,,,,,,448
diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h
index 9913762ae34df..78a43856f00f1 100644
--- a/GPU/GPUTracking/Global/GPUChainTracking.h
+++ b/GPU/GPUTracking/Global/GPUChainTracking.h
@@ -71,6 +71,7 @@ struct CfFragment;
 class GPUTPCClusterFinder;
 struct GPUSettingsProcessing;
 struct GPUSettingsRec;
+struct GPUTPCExtraADC;
 
 class GPUChainTracking : public GPUChain
 {
@@ -298,13 +299,15 @@ class GPUChainTracking : public GPUChain
   int32_t RunChainFinalize();
   void OutputSanityCheck();
   int32_t RunTPCTrackingSectors_internal();
-  int32_t RunTPCClusterizer_prepare(bool restorePointers);
+  int32_t RunTPCClusterizer_prepare(bool restorePointers, const GPUTPCExtraADC& extraADCs);
 #ifndef GPUCA_RUN2
-  std::pair<uint32_t, uint32_t> RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane);
+  std::pair<uint32_t, uint32_t> RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane, const GPUTPCExtraADC& extraADCs);
   void RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int32_t stage, bool doGPU, int32_t lane);
   std::pair<uint32_t, uint32_t> TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment);
   std::pair<uint32_t, uint32_t> TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment);
   void TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment);
+  void TPCClusterizerTransferExtraADC(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs);
+  void TPCClusterizerCheckExtraADCZeros(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs);
 #endif
   void RunTPCTrackingMerger_MergeBorderTracks(uint8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType);
   void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
index a2a07be7832ca..750cbee7051bf 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
@@ -17,6 +17,7 @@
 #include "GPUChainTrackingDebug.h"
 #include "GPULogging.h"
 #include "GPUO2DataTypes.h"
+#include "GPUTPCExtraADC.h"
 #include "GPUMemorySizeScalers.h"
 #include "GPUTrackingInputProvider.h"
 #include "GPUNewCalibValues.h"
@@ -56,10 +57,13 @@
 
 #include "utils/VcShim.h"
 #include "utils/strtag.h"
-#include <fstream>
+#include "utils/vecpod.h"
 #include <numeric>
+#include <random>
 #include <vector>
 
+// #define INSERT_SATURATED_SIGNALS
+
 using namespace o2::gpu;
 using namespace o2::tpc;
 using namespace o2::tpc::constants;
@@ -155,11 +159,171 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfF
   }
 }
 
+void GPUChainTracking::TPCClusterizerTransferExtraADC(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs)
+{
+  const int32_t iSector = clusterer.mISector;
+  const auto& fragment = clusterer.mPmemory->fragment;
+  const auto& digits = extraADCs.digitsBySector[iSector];
+
+  if (fragment.index != 0) {
+    return;
+  }
+
+  if (digits.empty()) {
+    return;
+  }
+
+  const size_t chargeMapSize = TPCMapMemoryLayout<PackedCharge>::items(GetProcessingSettings().overrideClusterizerFragmentLen);
+  const size_t chargeMapSizeBytes = chargeMapSize * sizeof(PackedCharge);
+
+  vecpod<uint16_t> chargeMapHostData;
+  chargeMapHostData.resize(chargeMapSize);
+
+  CfArray2D<PackedCharge> chargeMapHost(reinterpret_cast<PackedCharge*>(chargeMapHostData.data()));
+
+  vecpod<CfChargePos> extraPositions;
+  extraPositions.reserve(digits.size());
+
+  GPUMemCpy(RecoStep::TPCClusterFinding, chargeMapHostData.data(), clustererShadow.mPchargeMap, chargeMapSizeBytes, lane, false);
+  SynchronizeStream(lane);
+
+  for (const auto& d : digits) {
+    if (!fragment.contains(d.getTimeStamp())) {
+      continue;
+    }
+
+    CfChargePos pos{(tpccf::Row)d.getRow(), (tpccf::Pad)d.getPad(), (tpccf::TPCFragmentTime)(d.getTimeStamp() - fragment.start)};
+    chargeMapHost[pos] = PackedCharge(d.getChargeFloat());
+
+    extraPositions.push_back(pos);
+  }
+
+  GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPchargeMap, chargeMapHostData.data(), chargeMapSizeBytes, lane, true);
+
+  const size_t nPositions = clusterer.mPmemory->counters.nPositions;
+  const size_t extraPositionsOffset = nPositions - extraPositions.size();
+  GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPpositions + extraPositionsOffset, extraPositions.data(), extraPositions.size() * sizeof(CfChargePos), lane, true);
+}
+
+void GPUChainTracking::TPCClusterizerCheckExtraADCZeros(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int lane, const GPUTPCExtraADC& extraADCs)
+{
+  const int32_t iSector = clusterer.mISector;
+  const auto& fragment = clusterer.mPmemory->fragment;
+  const auto& digits = extraADCs.digitsBySector[iSector];
+
+  if (fragment.index != 0) {
+    return;
+  }
+
+  if (digits.empty()) {
+    return;
+  }
+
+  const size_t chargeMapSize = TPCMapMemoryLayout<PackedCharge>::items(GetProcessingSettings().overrideClusterizerFragmentLen);
+  const size_t chargeMapSizeBytes = chargeMapSize * sizeof(PackedCharge);
+
+  vecpod<uint16_t> chargeMapHostData;
+  chargeMapHostData.resize(chargeMapSize);
+
+  CfArray2D<PackedCharge> chargeMapHost(reinterpret_cast<PackedCharge*>(chargeMapHostData.data()));
+
+  GPUMemCpy(RecoStep::TPCClusterFinding, chargeMapHostData.data(), clustererShadow.mPchargeMap, chargeMapSizeBytes, lane, false);
+  SynchronizeStream(lane);
+
+  size_t nNonZeroADCs = 0;
+
+  for (const auto& d : digits) {
+    if (!fragment.contains(d.getTimeStamp())) {
+      continue;
+    }
+
+    CfChargePos pos{(tpccf::Row)d.getRow(), (tpccf::Pad)d.getPad(), (tpccf::TPCFragmentTime)(d.getTimeStamp() - fragment.start)};
+
+    auto adc = chargeMapHost[pos].unpack();
+
+    if (adc != 0) {
+      nNonZeroADCs++;
+    }
+  }
+
+  if (nNonZeroADCs > 0) {
+    GPUInfo("Non Zero ADCs: %zu", nNonZeroADCs);
+  } else {
+    GPUInfo("Cleared all extra ADC values!", nNonZeroADCs);
+  }
+}
+
 namespace
 {
 struct TPCCFDecodeScanTmp {
   int32_t zsPtrFirst, zsPageFirst, zsPtrLast, zsPageLast, hasData, pageCounter;
 };
+
+// Additional ADC values must be generated at start of clusterizer
+// This is required, so enough memory is allocated for the charge points
+// And ADCs can be injected by "simply"
+//  -> copying chargeMap + chargePositions to host
+//  -> writing additional adcs to chargeMap + positions
+//  -> copying values to device
+GPUTPCExtraADC GenerateSaturatedSignals(size_t seed = 42)
+{
+  constexpr int32_t MinTailLength = 50;
+  constexpr int32_t MaxTailLength = 200;
+  constexpr int32_t TailWidth = 3; // Assume tails are 3 pads wide at the moment
+
+  constexpr GPUTPCGeometry geo;
+
+  GPUTPCExtraADC adcs;
+
+  const int32_t nHIPs = 50;
+  const int32_t firstTB = 0;                   // Place all HIPs in first fragment for now
+  const int32_t lastTB = 4000 - MaxTailLength; // Don't allow cut off tails at fragment borders
+  const int32_t tailADC = 250;                 // charge should decrease over time, but for now just hardcode ADC above the threshold
+
+  std::mt19937 gen{(uint32_t)seed};
+  std::uniform_int_distribution<> randomRow(0, GPUTPCGeometry::NROWS - 1);
+  std::uniform_int_distribution<> randomTB(firstTB, lastTB);
+  std::uniform_int_distribution<> randomTailLength(MinTailLength, MaxTailLength);
+  // std::normal_distribution<> tailLengthNoise(8, 2.0);
+
+  for (int32_t iHIP = 0; iHIP < nHIPs; iHIP++) {
+
+    const int32_t row = randomRow(gen);
+    const int32_t nPads = geo.NPads(row);
+    std::uniform_int_distribution<> randomPad(0, nPads - 1);
+
+    const int32_t basePad = randomPad(gen);
+    const int32_t baseTb = randomTB(gen);
+
+    auto& digits = adcs.digitsBySector[0];
+
+    const int32_t tailLength = randomTailLength(gen);
+
+    for (int32_t dPad = -TailWidth; dPad <= TailWidth; dPad++) {
+      const int32_t iPad = basePad + dPad;
+      if (iPad < 0 || iPad >= nPads) {
+        continue;
+      }
+
+      for (int32_t dTime = 0; dTime < tailLength; dTime++) {
+        const int32_t iTime = baseTb + dTime;
+
+        if (iTime >= 4000) {
+          break;
+        }
+
+        const auto adc = dTime == 0 && dPad == 0 ? 1023 : tailADC;
+
+        digits.emplace_back(0, adc, row, iPad, iTime);
+      }
+    }
+  }
+
+  GPUInfo("Generated %zu ADCs!", adcs.digitsBySector[0].size());
+
+  return adcs;
+}
+
 } // namespace
 
 std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment)
@@ -437,13 +601,16 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust
   }
 }
 
-std::pair<uint32_t, uint32_t> GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane)
+std::pair<uint32_t, uint32_t> GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane, const GPUTPCExtraADC& extraADCs)
 {
   bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding;
   if (mCFContext->abandonTimeframe) {
     return {0, 0};
   }
-  const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment);
+  auto retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment);
+  if (fragment.index == 0) {
+    retVal.first += extraADCs.digitsBySector[iSector].size();
+  }
   if (doGPU) {
     GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
     GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
@@ -473,7 +640,7 @@ std::pair<uint32_t, uint32_t> GPUChainTracking::RunTPCClusterizer_transferZS(int
   return retVal;
 }
 
-int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers)
+int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers, const GPUTPCExtraADC& extraADCs)
 {
   bool doGPU = mRec->GetRecoStepsGPU() & gpudatatypes::RecoStep::TPCClusterFinding;
   if (restorePointers) {
@@ -569,7 +736,7 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers)
   mCFContext->fragmentFirst = CfFragment{std::max<int32_t>(mCFContext->tpcMaxTimeBin + 1, maxFragmentLen), maxFragmentLen};
   for (int32_t iSector = 0; iSector < GetProcessingSettings().nTPCClustererLanes && iSector < NSECTORS; iSector++) {
     if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) {
-      mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector);
+      mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector, extraADCs);
     }
   }
 
@@ -595,7 +762,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
   mRec->PushNonPersistentMemory(qStr2Tag("TPCCLUST"));
   const auto& threadContext = GetThreadContext();
   const bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding;
-  if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer)) {
+
+  GPUTPCExtraADC extraADCs;
+#ifdef INSERT_SATURATED_SIGNALS
+  extraADCs = GenerateSaturatedSignals();
+#endif
+
+  if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer, extraADCs)) {
     return 1;
   }
   if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) {
@@ -625,7 +798,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
     SetupGPUProcessor(&processors()->tpcClusterer[iSector], true); // Now we allocate
   }
   if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) {
-    RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline)
+    RunTPCClusterizer_prepare(true, extraADCs); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline)
   }
 
   if (doGPU && mIOPtrs.tpcZS) {
@@ -769,6 +942,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
   const bool propagateMCLabels = buildNativeHost && GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC;
   const bool sortClusters = buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4);
 
+  if (GetProcessingSettings().runMC && (!processors()->ioPtrs.tpcPackedDigits || !processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC)) {
+    GPUWarning("Requested to process MC labels, but no labels present");
+  }
+
   auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;
 
   mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;
@@ -938,7 +1115,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
             f = mCFContext->fragmentFirst;
           }
           if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) {
-            mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane);
+            mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane, extraADCs);
           }
         }
         GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
@@ -949,9 +1126,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         if (!mIOPtrs.tpcZS) {
           runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::fillFromDigits>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
         }
-        if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) {
-          clusterer.DumpChargeMap(*mDebugFile, "Charges");
-        }
+
+        TPCClusterizerTransferExtraADC(clusterer, clustererShadow, lane, extraADCs);
 
         if (propagateMCLabels) {
           runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::fillIndexMap>({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}});
@@ -960,14 +1136,33 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0);
         checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true);
         checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter;
+        // TODO Move hipTailFilter flag to ProcessingSettings?
+        // TODO Add some warning when re enabling pad filter with this flag, so it's not just silently enabled when disabling was requested
+        checkForNoisyPads |= rec()->GetParam().rec.tpc.hipTailFilter;
+
+        if (rec()->GetParam().rec.tpc.hipTailFilter && !doGPU) {
+          GPUError("HIP tail filter enabled, but this is currently not supported on CPU");
+        }
 
         if (checkForNoisyPads) {
+          if (rec()->GetParam().rec.tpc.hipTailFilter) {
+            runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPhipTailsByRow, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPhipTailsByRow) * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow);
+            runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPnHIPTails, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPnHIPTails));
+          }
           const int32_t nBlocks = GPUTPCCFCheckPadBaseline::GetNBlocks(doGPU);
 
           runKernel<GPUTPCCFCheckPadBaseline>({GetGridBlk(nBlocks, lane), {iSector}});
           getKernelTimer<GPUTPCCFCheckPadBaseline>(RecoStep::TPCClusterFinding, iSector, TPC_REAL_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false);
         }
 
+        DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile);
+        // Avoid additional sync when also dumping digits
+        const bool debugSyncChargeMap = !(GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCClustererDigits);
+        // DumpChargeMap should run after noisy pad filter to avoid yet another dump of intermediate data. When chargemap without pad filter is required, disable pad filter instead.
+        DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, debugSyncChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Charges");
+
+        TPCClusterizerCheckExtraADCZeros(clusterer, clustererShadow, lane, extraADCs);
+
         runKernel<GPUTPCCFPeakFinder>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
         if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) {
           clusterer.DumpPeakMap(*mDebugFile, "Peaks");
@@ -1002,6 +1197,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
         GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
         GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
 
+        if (clusterer.mPmemory->counters.nPositions == 0) {
+          return;
+        }
+
         if (doGPU) {
           SynchronizeStream(lane);
         }
@@ -1015,156 +1214,183 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
           runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUTPCGeometry::NROWS * sizeof(*clustererShadow.mPclusterInRow));
         }
 
-        if (clusterer.mPmemory->counters.nClusters == 0) {
-          return;
-        }
-
-        if (GetProcessingSettings().nn.applyNNclusterizer) {
+        const auto nRegularClusters = clusterer.mPmemory->counters.nClusters;
+        if (nRegularClusters != 0) {
+          if (GetProcessingSettings().nn.applyNNclusterizer) {
 #ifdef GPUCA_HAS_ONNX
-          GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
-          GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
-          GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
+            GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane];
+            GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
+            GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
 
-          // int withMC = (doGPU && propagateMCLabels);
+            // int withMC = (doGPU && propagateMCLabels);
 
-          if (nn_settings.nnClusterizerApplyCfDeconvolution) {
-            runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
-          } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
-            runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false);
-          }
-
-          // float time_clusterizer = 0, time_fill = 0, time_networks = 0;
-          if (nn_settings.nnClusterizerVerbosity > 2) {
-            LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Starting loop over batched data. clustererNNShadow.mNnClusterizerBatchedMode=" << clustererNNShadow.mNnClusterizerBatchedMode << ", numLoops=" << std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode) << ", numClusters=" << clusterer.mPmemory->counters.nClusters << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
-          }
-          for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) {
-            if (nn_settings.nnClusterizerVerbosity > 3) {
-              LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Start. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+            if (nn_settings.nnClusterizerApplyCfDeconvolution) {
+              runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
+            } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
+              runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false);
             }
-            uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode;
-            size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
 
-            // Filling the data
-            if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) {
-              // Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNGPU>({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeThreads , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
-            } else {
-              // Fills the whole input matrix at once -> better performance on CPU, but worse parallelizability
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNCPU>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
-            }
-            if (nn_settings.nnClusterizerVerbosity > 3) {
-              LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done filling data. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+            // float time_clusterizer = 0, time_fill = 0, time_networks = 0;
+            if (nn_settings.nnClusterizerVerbosity > 2) {
+              LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Starting loop over batched data. clustererNNShadow.mNnClusterizerBatchedMode=" << clustererNNShadow.mNnClusterizerBatchedMode << ", numLoops=" << std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode) << ", numClusters=" << clusterer.mPmemory->counters.nClusters << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
             }
-
-            if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Publishing the deconvolution flags
+            for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) {
               if (nn_settings.nnClusterizerVerbosity > 3) {
-                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done setting deconvolution flags. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Start. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
               }
-            }
-
-            // NN evaluations
-            if(clustererNNShadow.mNnClusterizerUseClassification) {
-              if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
-              if (clustererNNShadow.mNnInferenceInputDType == 0) {
-                if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                  (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
-                } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                  (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
-                }
-              } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
-                if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                  (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
-                } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                  (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
-                }
+              uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode;
+              size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
+
+              // Filling the data
+              if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) {
+                // Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNGPU>({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeThreads , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
+              } else {
+                // Fills the whole input matrix at once -> better performance on CPU, but worse parallelizability
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNCPU>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
               }
-              if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
               if (nn_settings.nnClusterizerVerbosity > 3) {
-                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done filling data. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
               }
-            }
-            if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
-              if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
-              if (clustererNNShadow.mNnInferenceInputDType == 0) {
-                if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                  (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
-                } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                  (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32);
+
+              if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart); // Publishing the deconvolution flags
+                if (nn_settings.nnClusterizerVerbosity > 3) {
+                  LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done setting deconvolution flags. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+                }
+              }
+
+              // NN evaluations
+              if(clustererNNShadow.mNnClusterizerUseClassification) {
+                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
+                if (clustererNNShadow.mNnInferenceInputDType == 0) {
+                  if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
+                  }
+                } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
+                  if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
+                  } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                    (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
+                  }
                 }
-              } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
-                if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                  (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16);
-                } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                  (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
+                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
+                if (nn_settings.nnClusterizerVerbosity > 3) {
+                  LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
                 }
               }
-              if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
-              if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
-                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
+              if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
+                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
                 if (clustererNNShadow.mNnInferenceInputDType == 0) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32);
                   }
                 } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
                   if (clustererNNShadow.mNnInferenceOutputDType == 0) {
-                    (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16);
                   } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
-                    (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
+                    (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
+                  }
+                }
+                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
+                if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
+                  if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
+                  if (clustererNNShadow.mNnInferenceInputDType == 0) {
+                    if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32);
+                    }
+                  } else if (clustererNNShadow.mNnInferenceInputDType == 1) {
+                    if (clustererNNShadow.mNnInferenceOutputDType == 0) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16);
+                    } else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
+                      (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
+                    }
                   }
+                  if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }
+                }
+                if (nn_settings.nnClusterizerVerbosity > 3) {
+                  LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+                }
+              }
+
+              // Publishing kernels for class labels and regression results
+              // In case classification should not be used, this kernel should still be executed to fill the mOutputDataClass array with default values
+              if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
+              } else {
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
+              }
+              if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
+                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results
+                if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
+                  runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 2 regression results
                 }
-                if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }
               }
               if (nn_settings.nnClusterizerVerbosity > 3) {
-                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done publishing. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
               }
             }
 
-            // Publishing kernels for class labels and regression results
-            // In case classification should not be used, this kernel should still be executed to fill the mOutputDataClass array with default values
-            if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
-            } else {
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
-            }
-            if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
-              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results
-              if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
-                runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 2 regression results
+            if (clustererNNShadow.mNnClusterizerUseCfRegression) {
+              if(!nn_settings.nnClusterizerApplyCfDeconvolution) { // If it is already applied don't do it twice, otherwise apply now
+                runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
+              }
+              DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMapSplit, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
+              runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
+              if (nn_settings.nnClusterizerVerbosity > 3) {
+                LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with CF regression. (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
               }
             }
-            if (nn_settings.nnClusterizerVerbosity > 3) {
-              LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done publishing. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
+#else
+            GPUFatal("Project not compiled with neural network clusterization. Aborting.");
+#endif
+          } else {
+            runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
+            DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMapSplit, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
+            runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
+          } // if (GetProcessingSettings().nn.applyNNclusterizer)
+
+          if (doGPU && propagateMCLabels) {
+            TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
+            if (doGPU) {
+              SynchronizeStream(lane);
             }
+            runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels
           }
+        } // if (nRegularClusters != 0) {
 
-          if (clustererNNShadow.mNnClusterizerUseCfRegression) {
-            if(!nn_settings.nnClusterizerApplyCfDeconvolution) { // If it is already applied don't do it twice, otherwise apply now
-              runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
-            }
-            DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
-            runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
-            if (nn_settings.nnClusterizerVerbosity > 3) {
-              LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with CF regression. (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
-            }
+
+        // TODO: Move this right after CheckPadBaseline once tail zeroing is moved into this kernel.
+        if (rec()->GetParam().rec.tpc.hipTailFilter) {
+          runKernel<GPUTPCCFHIPTailConnector>({GetGridBlk(GPUTPCGeometry::NROWS, lane), {iSector}});
+          runKernel<GPUTPCCFHIPClusterizer>({GetGridBlk(GPUTPCGeometry::NROWS, lane), {iSector}});
+          if (doGPU && (nRegularClusters == 0 || GetProcessingSettings().debugLevel >= 3)) {
+            TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
+            SynchronizeStream(lane);
           }
-#else
-          GPUFatal("Project not compiled with neural network clusterization. Aborting.");
-#endif
-        } else {
-          runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
-          DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
-          runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
         }
 
-        if (doGPU && propagateMCLabels) {
-          TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
-          if (doGPU) {
-            SynchronizeStream(lane);
+        bool hasClusters = nRegularClusters != 0;
+
+        // Paranoid edge case: If no regular clusters were found, need to still check that no HIP clusters were created
+        // HIPClusterizer kernel doesn't update counters.nClusters, because:
+        // - 64bit atomic support in OpenCL is flaky
+        // - nClusters is only used internally by clusterizer to track #peaks that will probably become clusters,
+        //   so storing the number of HIP clusters there is only asking for trouble anyway
+        if (rec()->GetParam().rec.tpc.hipTailFilter && nRegularClusters == 0) {
+          for (uint32_t row = 0; row < GPUTPCGeometry::NROWS; row++) {
+            hasClusters |= clusterer.mPclusterInRow[row] != 0;
           }
-          runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels
+        }
+
+        if (!hasClusters) {
+          return;
         }
 
         if (GetProcessingSettings().debugLevel >= 3) {
@@ -1173,8 +1399,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
 
         TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane);
         laneHasData[lane] = true;
-        // Include clusters in default debug mask, exclude other debug output by default
-        DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off
       });
       mRec->SetNActiveThreadsOuterLoop(1);
     }
@@ -1192,6 +1416,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
 
       if (laneHasData[lane]) {
         anyLaneHasData = true;
+        // Include clusters in default debug mask, exclude other debug output by default.
+        // The cluster buffers are accumulated per sector, so dump them once after all fragments.
+        DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off
         if (buildNativeGPU && GetProcessingSettings().tpccfGatherKernel) {
           runKernel<GPUTPCCFGather>({GetGridBlk(GPUTPCGeometry::NROWS, mRec->NStreams() - 1), {iSector}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]);
         }
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
index dda15d403407e..6ca50f24351c4 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx
@@ -16,6 +16,7 @@
 #include "GPUChainTrackingDebug.h"
 #include "GPULogging.h"
 #include "GPUO2DataTypes.h"
+#include "GPUTPCExtraADC.h"
 #include "GPUTrackingInputProvider.h"
 #include "GPUTPCCFChainContext.h"
 #include "TPCClusterDecompressor.h"
@@ -63,7 +64,7 @@ int32_t GPUChainTracking::RunTPCCompression()
   if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) {
     SynchronizeEventAndRelease(mEvents->single);
     auto* foreignChain = (GPUChainTracking*)GetNextChainInQueue();
-    foreignChain->RunTPCClusterizer_prepare(false);
+    foreignChain->RunTPCClusterizer_prepare(false, {});
     foreignChain->mCFContext->ptrClusterNativeSave = processorsShadow()->ioPtrs.clustersNative;
   }
 #endif
diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h
index a0be9d833d5a9..39e48f9f14d9a 100644
--- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h
+++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h
@@ -45,7 +45,8 @@ enum GPUChainTrackingDebugFlags : uint32_t {
   TPCClustererPeaks = 1 << 19,
   TPCClustererSuppressedPeaks = 1 << 20,
   TPCClustererChargeMap = 1 << 21,
-  TPCClustererZeroedCharges = 1 << 22
+  TPCClustererChargeMapSplit = 1 << 22,
+  TPCClustererZeroedCharges = 1 << 23
 };
 
 template <class T, class S, typename... Args>
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
index 204d9d6a8b81a..105df23453624 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
@@ -16,14 +16,187 @@
 #include "CfArray2D.h"
 #include "PackedCharge.h"
 #include "clusterFinderDefs.h"
+#include "DataFormatsTPC/ClusterNative.h"
 
 #ifndef GPUCA_GPUCODE
 #include "utils/VcShim.h"
 #endif
 
+#if 0
+#define DPRINT(...) printf(__VA_ARGS__)
+#define DPRINTB(...) \
+  if (iThread == 0)  \
+  printf(__VA_ARGS__)
+#define DPRINTB_IF(test, ...) \
+  if (iThread == 0 && (test)) \
+  printf(__VA_ARGS__)
+#else
+#define DPRINT(...) ((void)0)
+#define DPRINTB(...) ((void)0)
+#define DPRINTB_IF(test, ...) ((void)0)
+#endif
+
 using namespace o2::gpu;
 using namespace o2::gpu::tpccf;
 
+using Kernel = GPUTPCCFCheckPadBaseline;
+
+static GPUdi() HIPTailDescriptor* GetHIPTails(GPUTPCClusterFinder& clusterer, int32_t row)
+{
+  // HIP TAILS: indexing starts at 1, so 0 index indicates no connection
+  return clusterer.mPhipTailsByRow + row * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow;
+}
+
+static GPUdi() Charge UpdateHIPTailFilter(Charge filteredCharge, Charge charge, Charge alpha)
+{
+  return filteredCharge + alpha * (charge - filteredCharge);
+}
+
+static GPUdi() float HIPTailTimeMean(const HIPTailDescriptor& tail)
+{
+  const float length = tail.tailEnd > tail.tailStart ? float(tail.tailEnd - tail.tailStart) : 1.f;
+  return tail.tailStart + 0.5f * (length - 1.f);
+}
+
+static GPUdi() float HIPTailTimeVariance(const HIPTailDescriptor& tail)
+{
+  const float length = tail.tailEnd > tail.tailStart ? float(tail.tailEnd - tail.tailStart) : 1.f;
+  return (length * length - 1.f) * (1.f / 12.f);
+}
+
+// Collect tails marked for closing across the workgroup using a prefix scan,
+// then cooperatively zero the charge map entries for each closed tail.
+// Caller must set acc.activeHIPTail.end before calling if the tail is open.
+static GPUdi() uint16_t CloseHIPTails(
+  Kernel::GPUSharedMemory& smem,
+  GPUTPCClusterFinder& clusterer,
+  int32_t iThread, int32_t nThreads,
+  int16_t iPadHandle,
+  CfChargePos basePos,
+  CfArray2D<PackedCharge>& chargeMap,
+  Kernel::PadChargeAccu& acc,
+  bool shouldCloseTail)
+{
+  const uint32_t row = basePos.row();
+  const uint16_t nClosedTails = work_group_count(shouldCloseTail);
+
+  auto* nHIPTails = clusterer.mPnHIPTails;
+  auto* hipTails = GetHIPTails(clusterer, row);
+
+  if (nClosedTails > 0) {
+    int16_t iClosedTail = work_group_scan_inclusive_add((int16_t)shouldCloseTail) - 1;
+    const bool shouldStoreTail = shouldCloseTail && acc.activeHIPTail.Length() > 0;
+    uint16_t nStoredTails = work_group_count(shouldStoreTail);
+    int16_t iStoredTail = work_group_scan_inclusive_add((int16_t)shouldStoreTail) - 1;
+
+    // Use exactly one atomic add per closing call to reduce differences in
+    // tail ordering between runs.
+    if (nStoredTails > 0) {
+      if (iThread == 0) {
+        smem.tailStoreBase = CAMath::AtomicAdd(&nHIPTails[row], (uint32_t)nStoredTails);
+      }
+      GPUbarrier();
+    }
+    if (shouldCloseTail) {
+      smem.tailsClosedPad[iClosedTail] = iPadHandle;
+      smem.tailsClosed[iClosedTail] = acc.activeHIPTail;
+      smem.tailsClosedStoreIdx[iClosedTail] = GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow;
+
+      if (shouldStoreTail) {
+        const uint32_t idx = smem.tailStoreBase + iStoredTail + 1;
+        smem.tailsClosedStoreIdx[iClosedTail] = idx;
+        if (idx < GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow) {
+          hipTails[idx] = {0, 0, (uint16_t)iPadHandle,
+                           (uint16_t)acc.activeHIPTail.start, (uint16_t)acc.activeHIPTail.end,
+                           0.f, 0.f};
+        }
+      }
+
+      acc.tailFilterCharge = 0;
+      acc.activeHIPTail.Reset();
+    }
+
+    GPUbarrier();
+  }
+
+  // TODO: performance improvement -> parallelize this loop across tails
+  for (uint16_t iTail = 0; iTail < nClosedTails; iTail++) {
+    const auto tailPad = smem.tailsClosedPad[iTail];
+    const auto tail = smem.tailsClosed[iTail];
+    const uint32_t tailStoreIdx = smem.tailsClosedStoreIdx[iTail];
+
+    Charge qTot = 0.f;
+    Charge qMax = 0.f;
+    for (uint16_t iTime = iThread; iTime < tail.Length(); iTime += nThreads) {
+      const int16_t time = tail.start + iTime;
+      auto pos = basePos.delta({tailPad, time});
+      const Charge q = chargeMap[pos].unpack();
+      qTot += q;
+      qMax = CAMath::Max(qMax, q);
+      chargeMap[pos] = PackedCharge{0};
+    }
+
+    smem.tailQTotScratch[iThread] = qTot;
+    smem.tailQMaxScratch[iThread] = qMax;
+    GPUbarrier();
+    for (uint16_t active = nThreads; active > 1;) {
+      const uint16_t stride = (active + 1) / 2;
+      if (iThread < active - stride) {
+        smem.tailQTotScratch[iThread] += smem.tailQTotScratch[iThread + stride];
+        smem.tailQMaxScratch[iThread] = CAMath::Max(smem.tailQMaxScratch[iThread], smem.tailQMaxScratch[iThread + stride]);
+      }
+      active = stride;
+      GPUbarrier();
+    }
+
+    if (iThread == 0 && tailStoreIdx < GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow) {
+      HIPTailDescriptor& tailDescriptor = hipTails[tailStoreIdx];
+      tailDescriptor.qTot = smem.tailQTotScratch[0];
+      tailDescriptor.qMax = smem.tailQMaxScratch[0];
+    }
+  }
+
+  return nClosedTails;
+}
+
+template <bool CheckHIPTrigger, bool CheckHIPTailEnd>
+static GPUdi() void ScanCachedCharges(Kernel::GPUSharedMemory& smem, uint16_t timeOffset, uint16_t pad, Charge hipTailThreshold, Charge hipTailFilterAlpha, Kernel::PadChargeAccu& acc)
+{
+  for (int32_t i = 0; i < Kernel::NumOfCachedTBs; i++) {
+    const Charge qs = smem.charges[i][pad];
+    const int16_t curTB = timeOffset + i;
+
+    acc.totalCharges += qs > 0;
+    acc.consecCharges = qs > 0 ? acc.consecCharges + 1 : 0;
+    acc.maxConsecCharges = CAMath::Max(acc.consecCharges, acc.maxConsecCharges);
+    acc.maxCharge = CAMath::Max<Charge>(qs, acc.maxCharge);
+
+    if (qs >= hipTailThreshold) {
+      if (acc.aboveThresholdStart < 0) {
+        acc.aboveThresholdStart = curTB;
+      }
+    } else {
+      acc.aboveThresholdStart = -1;
+    }
+
+    if constexpr (CheckHIPTrigger) {
+      if (acc.HIPtb < 0 && qs >= Charge(Kernel::MaxADC)) {
+        acc.HIPtb = acc.aboveThresholdStart; // start of rising edge, not first sat TB
+        smem.tails[pad] = {acc.HIPtb, 0};    // Broadcast HIP start TB to neighboring pads / threads
+      }
+    }
+
+    if constexpr (CheckHIPTailEnd) {
+      if (acc.activeHIPTail.IsOpen()) {
+        acc.tailFilterCharge = UpdateHIPTailFilter(acc.tailFilterCharge, qs, hipTailFilterAlpha);
+        if (acc.tailFilterCharge < hipTailThreshold) {
+          acc.activeHIPTail.end = curTB;
+        }
+      }
+    }
+  }
+}
+
 template <>
 GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
 {
@@ -49,6 +222,9 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
   }
 
   const CfFragment& fragment = clusterer.mPmemory->fragment;
+  const bool hipFilterOn = clusterer.Param().rec.tpc.hipTailFilter;
+  const Charge hipTailThreshold = clusterer.Param().rec.tpc.hipTailFilterThreshold;
+  const Charge hipTailFilterAlpha = clusterer.Param().rec.tpc.hipTailFilterAlpha;
   CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
 
   constexpr GPUTPCGeometry geo;
@@ -57,45 +233,139 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
   const auto nPads = geo.NPads(iRow);
   const CfChargePos basePos{(Row)iRow, 0, 0};
 
-  int32_t totalCharges = 0;
-  int32_t consecCharges = 0;
-  int32_t maxConsecCharges = 0;
-  Charge maxCharge = 0;
+  PadChargeAccu acc;
 
   const int16_t iPadOffset = iThread % MaxNPadsPerRow;
   const int16_t iTimeOffset = iThread / MaxNPadsPerRow;
   const int16_t iPadHandle = iThread;
   const bool handlePad = iPadHandle < nPads;
 
-  const auto firstTB = fragment.firstNonOverlapTimeBin();
-  const auto lastTB = fragment.lastNonOverlapTimeBin();
+  if (iPadHandle < MaxNPadsPerRow) {
+    smem.tails[iPadHandle] = {-1, -1};
+  }
+  GPUbarrier();
+
+  // Pad filter scans the entire fragments including overlap.
+  // Minimal runtime overhead and prevents headaches later on as
+  // saturated signal in overlap region can create tails in the next fragment
+  // even when cleared in current fragment as they're decoded twice
+  const TPCFragmentTime firstTB = 0;
+  const TPCFragmentTime lastTB = fragment.length;
 
-  for (auto t = firstTB; t < lastTB; t += NumOfCachedTBs) {
+  for (uint16_t t = firstTB; t < lastTB; t += NumOfCachedTBs) {
 
-    const TPCFragmentTime iTime = t + iTimeOffset;
+    bool thisThreadHasTrigger = false;
+    for (uint16_t tt = 0; tt < NumOfCachedTBs; tt += TimebinsPerCacheline) {
+      const TPCFragmentTime iTimeLoad = t + tt + iTimeOffset;
 
-    const CfChargePos pos = basePos.delta({iPadOffset, iTime});
+      const CfChargePos pos = basePos.delta({iPadOffset, iTimeLoad});
 
-    smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0;
+      const Charge ql = iTimeLoad < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0;
+      smem.charges[tt + iTimeOffset][iPadOffset] = ql;
 
-    GPUbarrier();
+      thisThreadHasTrigger |= ql >= Charge(MaxADC);
+    }
+
+    bool hasHIPTrigger = false;
+    if (hipFilterOn) {
+      hasHIPTrigger = work_group_any(thisThreadHasTrigger);
+    } else {
+      // Need a barrier here even if HIP filter is disabled
+      GPUbarrier();
+    }
+
+    acc.HIPtb = -1;
 
     if (handlePad) {
-      for (int32_t i = 0; i < NumOfCachedTBs; i++) {
-        const Charge q = smem.charges[i][iPadHandle];
-        totalCharges += (q > 0);
-        consecCharges = (q > 0) ? consecCharges + 1 : 0;
-        maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges);
-        maxCharge = CAMath::Max<Charge>(q, maxCharge);
+
+      // TODO: is this really necessary?
+      // Why is the old version so much slower, when we just add short branches to the loop???
+      if (!hasHIPTrigger) [[likely]] {
+        if (!acc.activeHIPTail.IsOpen()) {
+          ScanCachedCharges<false, false>(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc);
+        } else {
+          ScanCachedCharges<false, true>(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc);
+        }
+      } else {
+        if (!acc.activeHIPTail.IsOpen()) {
+          ScanCachedCharges<true, false>(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc);
+        } else {
+          ScanCachedCharges<true, true>(smem, t, iPadHandle, hipTailThreshold, hipTailFilterAlpha, acc);
+        }
       }
     }
 
     GPUbarrier();
-  }
+
+    if (hasHIPTrigger) [[unlikely]] {
+
+      DPRINTB("%d: Trigger!\n", iBlock);
+
+      if (handlePad && acc.HIPtb < 0) {
+
+        // Search neighboring pads for trigger
+        for (int16_t i = -SSClusterPadWidth; i < 0; i++) {
+          const auto p = iPadHandle + i;
+          if (p > -1) {
+            acc.HIPtb = CAMath::Max(smem.tails[p].start, acc.HIPtb);
+          }
+        }
+
+        for (int16_t i = 1; i <= SSClusterPadWidth; i++) {
+          const auto p = iPadHandle + i;
+          if (p < MaxNPadsPerRow) {
+            acc.HIPtb = CAMath::Max(smem.tails[p].start, acc.HIPtb);
+          }
+        }
+      }
+
+      bool shouldCloseTail = acc.HIPtb > -1 && acc.activeHIPTail.HasValue();
+      if (shouldCloseTail && acc.activeHIPTail.IsOpen()) {
+        DPRINT("%d: end = %d\n", iThread, acc.HIPtb);
+        acc.activeHIPTail.end = acc.HIPtb;
+      }
+
+      CloseHIPTails(smem, clusterer, iThread, nThreads, iPadHandle, basePos, chargeMap, acc, shouldCloseTail);
+
+      GPUbarrier();
+
+      if (acc.HIPtb > -1) {
+        DPRINT("%d: start = %d\n", iThread, acc.HIPtb);
+        acc.activeHIPTail.SetOpen(acc.HIPtb);
+        acc.tailFilterCharge = Charge(MaxADC);
+      }
+
+      // Clear smem between iterations to prevent stale entries
+      if (handlePad) {
+        smem.tails[iPadHandle].Reset();
+      }
+
+      GPUbarrier();
+
+    } // if (hipTriggerFound)
+
+  } // for (uint16_t t = firstTB; t < lastTB; t += NumOfCachedTBs)
 
   if (handlePad) {
-    updatePadBaseline(basePos.gpad + iPadHandle, clusterer, totalCharges, maxConsecCharges, maxCharge);
+    updatePadBaseline(basePos.gpad + iPadHandle, clusterer, acc.totalCharges, acc.maxConsecCharges, acc.maxCharge);
+  }
+
+  // --- Close remaining tails
+  const bool shouldCloseTail = acc.activeHIPTail.HasValue();
+
+  // Call `work_group_any` here, instead of always counting.
+  // This is important as `work_group_count` is a lot slower
+  // and has a lot of overhead if no HIPs were found.
+  if (work_group_any(shouldCloseTail)) {
+    if (shouldCloseTail && acc.activeHIPTail.IsOpen()) {
+      acc.activeHIPTail.end = lastTB;
+    }
+
+    [[maybe_unused]] const uint16_t nClosedTails = CloseHIPTails(smem, clusterer, iThread, nThreads, iPadHandle, basePos, chargeMap, acc, shouldCloseTail);
+
+    DPRINTB_IF(nClosedTails > 0, "%d: Close remaining tails (%d)\n", iBlock, nClosedTails);
   }
+
 #endif
 }
 
@@ -172,3 +442,138 @@ GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTP
     clusterer.mPpadIsNoisy[pad] = true;
   }
 }
+
+// ======== HIP Tail Connector Kernel ========
+
+template <>
+GPUd() void GPUTPCCFHIPTailConnector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
+{
+  if (iBlock >= (int32_t)GPUTPCGeometry::NROWS) {
+    return;
+  }
+  const uint32_t row = iBlock;
+
+  const uint32_t nTails = CAMath::Min(clusterer.mPnHIPTails[row], (uint32_t)MaxHIPTailsPerRow - 1);
+
+  // HIP TAILS: indexing starts at 1, so 0 index indicates no connection
+  HIPTailDescriptor* tails = GetHIPTails(clusterer, row);
+
+#ifdef GPUCA_DETERMINISTIC_MODE
+  // Races in tail comparisons and atomic swap can lead to slightly different clusters.
+  // So need a sequential fallback for deterministic mode
+  if (iThread > 0) {
+    return;
+  }
+  nThreads = 1;
+  GPUCommonAlgorithm::sortInBlock(tails + 1, tails + nTails + 1, [](auto&& t1, auto&& t2) {
+    if (t1.pad != t2.pad) {
+      return t1.pad < t2.pad;
+    }
+    return t1.tailStart < t2.tailStart;
+  });
+#endif
+
+  for (uint32_t iTail = iThread + 1; iTail <= nTails; iTail += nThreads) {
+    auto* tail = &tails[iTail];
+
+    // TODO: this is needed because tailStarts may vary due to rising edge
+    // Better approach would be to also track the triggered timebin and match that instead
+    uint16_t overlapWindowStart = tail->tailStart >= 5 ? tail->tailStart - 5 : 0;
+    uint16_t overlapWindowEnd = tail->tailStart + 5;
+
+    for (uint32_t jTail = iTail + 1; jTail <= nTails; jTail++) {
+      auto* tailNext = &tails[jTail];
+      if (tailNext->iPrev > 0) {
+        continue;
+      }
+
+      const bool overlapPad = tailNext->pad >= tail->pad - GPUTPCCFCheckPadBaseline::SSClusterPadWidth && tailNext->pad <= tail->pad + GPUTPCCFCheckPadBaseline::SSClusterPadWidth;
+      const bool overlapTime = tailNext->tailStart >= overlapWindowStart && tailNext->tailStart < overlapWindowEnd;
+
+      if (overlapPad && overlapTime) {
+        if (CAMath::AtomicCAS(&tailNext->iPrev, 0u, iTail)) {
+          tail->iNext = jTail;
+          break;
+        }
+      }
+    }
+  }
+}
+
+// ======== HIP Clusterizer Kernel ========
+
+template <>
+GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
+{
+  if (iBlock >= (int32_t)GPUTPCGeometry::NROWS) {
+    return;
+  }
+
+  const uint32_t row = iBlock;
+  uint32_t nTails = clusterer.mPnHIPTails[row];
+  nTails = CAMath::Min(nTails, (uint32_t)MaxHIPTailsPerRow - 1);
+
+  HIPTailDescriptor* tails = GetHIPTails(clusterer, row);
+  const auto& fragment = clusterer.mPmemory->fragment;
+
+  for (uint32_t iTail = iThread + 1; iTail <= nTails; iTail += nThreads) {
+
+    auto* tail = &tails[iTail];
+
+    if (tail->iPrev != 0) {
+      continue;
+    }
+
+    float qTot = tail->qTot;
+    float qMax = tail->qMax;
+    const float firstWeight = tail->qTot;
+    const float firstPad = tail->pad;
+    const float firstTime = HIPTailTimeMean(*tail);
+    float padSum = firstWeight * firstPad;
+    float padSqSum = firstWeight * firstPad * firstPad;
+    float timeSum = firstWeight * firstTime;
+
+    uint32_t tailStart = tail->tailStart;
+    uint32_t tailEnd = tail->tailEnd;
+
+    while (tail->iNext != 0) {
+
+      tail = &tails[tail->iNext];
+
+      const float tailWeight = tail->qTot;
+      const float tailPad = tail->pad;
+      const float tailTime = HIPTailTimeMean(*tail);
+      qMax = CAMath::Max(qMax, tail->qMax);
+      qTot += tail->qTot;
+      padSum += tailWeight * tailPad;
+      padSqSum += tailWeight * tailPad * tailPad;
+      timeSum += tailWeight * tailTime;
+      tailStart = CAMath::Min<uint32_t>(tailStart, tail->tailStart);
+      tailEnd = CAMath::Max<uint32_t>(tailEnd, tail->tailEnd);
+    }
+
+    const float weightSum = CAMath::Max(qTot, 1.f);
+    float padMean = padSum / weightSum;
+    float timeMean = timeSum / weightSum; // TODO: Use timebin of saturated signal instead! Time mean is biased for long tails.
+    float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean));
+
+    tpc::ClusterNative cn;
+    cn.qMax = qMax;
+    cn.setSaturatedQtot(qTot);
+    cn.setSaturatedTailLength(tailEnd - tailStart);
+    float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer;
+    cn.setTimeFlags(clusterTime, 0);
+    cn.setPad(padMean);
+    cn.setSigmaPad(padSigma);
+
+    if (cn.qMax >= 1023) {
+      // Cut off clusters where the tail connection failed for some reason
+      // TODO: Deduplicate with GPUTPCCFClusterizer::sortIntoBuckets (can't call cross-kernel).
+      // TODO: Add error reporting for row cluster overflow.
+      uint32_t index = CAMath::AtomicAdd(&clusterer.mPclusterInRow[row], 1u);
+      if (index < clusterer.mNMaxClusterPerRow) {
+        clusterer.mPclusterByRow[clusterer.mNMaxClusterPerRow * row + index] = cn;
+      }
+    }
+  }
+}
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h
index 7638b95ee7f0b..f78f91a548ac9 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h
@@ -15,6 +15,11 @@
 /// Kernel identifies noisy TPC pads by analyzing charge patterns over time.
 /// A pad is marked noisy if it exceeds thresholds for total or consecutive
 /// time bins with charge, unless the charge exceeds a saturation threshold.
+///
+/// Optionally detects Highly Ionising Particle (HIP) tails: when a saturated
+/// ADC value (1023) is found, the tail region on the triggering pad and its
+/// neighbors is zeroed in the charge map until an exponential charge filter
+/// drops below a configurable threshold.
 
 #ifndef O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H
 #define O2_GPU_GPU_TPC_CF_CHECK_PAD_BASELINE_H
@@ -29,6 +34,16 @@
 namespace o2::gpu
 {
 
+struct HIPTailDescriptor {
+  uint32_t iPrev;
+  uint32_t iNext;
+  uint16_t pad;
+  uint16_t tailStart;
+  uint16_t tailEnd;
+  float qTot;
+  float qMax;
+};
+
 class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
 {
 
@@ -39,15 +54,65 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
     EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline,
     NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline,
     NumCLsPerWarp = GPUCA_WARP_SIZE / EntriesPerCacheline,
-    NumOfCachedTBs = TimebinsPerCacheline,
+    NumOfCachedTBs = TimebinsPerCacheline * 8,
     // Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow].
     // Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs
     // for all threads, avoiding out-of-bounds access.
     MaxNPadsPerRow = CAMath::nextMultipleOf<PadsPerCacheline>(GPUTPCGeometry::MaxNPadsPerRow()),
+
+    MaxADC = 1023,
+
+    NThreads = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFCheckPadBaseline),
+    SSClusterPadWidth = 5,
   };
 
-  struct GPUSharedMemory {
+  union HipTailRange {
+    struct {
+      int16_t start;
+      int16_t end;
+    };
+
+    // Be careful with using default initialized values.
+    // Need default constructor, so can be placed in shared memory.
+    // Might be zero initialized, but invalid tail needs start = end = -1 instead.
+    GPUdDefault() HipTailRange() = default;
+    GPUdi() HipTailRange(int16_t st, int16_t e) : start(st), end(e) {}
+
+    GPUdi() bool HasValue() const { return start > -1; }
+    GPUdi() bool IsOpen() const { return start > -1 && end < 0; }
+
+    GPUdi() void SetOpen(int16_t st)
+    {
+      start = st;
+      end = -1;
+    }
+
+    GPUdi() int16_t Length() const { return end - start; }
+
+    GPUdi() void Reset() { start = end = -1; }
+  };
+
+  struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int16_t, NThreads> {
     tpccf::Charge charges[NumOfCachedTBs][MaxNPadsPerRow];
+    HipTailRange tails[MaxNPadsPerRow];
+    uint8_t tailsClosedPad[MaxNPadsPerRow];
+    HipTailRange tailsClosed[MaxNPadsPerRow];
+    uint32_t tailsClosedStoreIdx[MaxNPadsPerRow];
+    tpccf::Charge tailQTotScratch[NThreads];
+    tpccf::Charge tailQMaxScratch[NThreads];
+    uint32_t tailStoreBase;
+  };
+
+  // Accumulated values from scanning cached charges in a pad
+  struct PadChargeAccu {
+    int32_t totalCharges = 0;
+    int32_t consecCharges = 0;
+    int32_t maxConsecCharges = 0;
+    tpccf::Charge maxCharge = 0;
+    int16_t HIPtb = -1;
+    int16_t aboveThresholdStart = -1; // first TB of current above-hipTailThreshold streak; used to extend the tail back over the rising edge before saturation
+    HipTailRange activeHIPTail{-1, -1};
+    tpccf::Charge tailFilterCharge = 0;
   };
 
   typedef GPUTPCClusterFinder processorType;
@@ -79,6 +144,58 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
   GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge);
 };
 
+class GPUTPCCFHIPTailConnector : public GPUKernelTemplate
+{
+ public:
+  enum {
+    MaxHIPTails = 1 << 15,
+    MaxHIPTailsPerRow = MaxHIPTails,
+  };
+
+  struct GPUSharedMemory {
+  };
+
+  typedef GPUTPCClusterFinder processorType;
+  GPUhdi() static processorType* Processor(GPUConstantMem& processors)
+  {
+    return processors.tpcClusterer;
+  }
+
+  GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep()
+  {
+    return gpudatatypes::RecoStep::TPCClusterFinding;
+  }
+
+  template <int32_t iKernel = defaultKernel>
+  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
+};
+
+class GPUTPCCFHIPClusterizer : public GPUKernelTemplate
+{
+ public:
+  enum {
+    MaxHIPTails = GPUTPCCFHIPTailConnector::MaxHIPTails,
+    MaxHIPTailsPerRow = GPUTPCCFHIPTailConnector::MaxHIPTailsPerRow,
+  };
+
+  struct GPUSharedMemory {
+  };
+
+  typedef GPUTPCClusterFinder processorType;
+  GPUhdi() static processorType* Processor(GPUConstantMem& processors)
+  {
+    return processors.tpcClusterer;
+  }
+
+  GPUhdi() constexpr static gpudatatypes::RecoStep GetRecoStep()
+  {
+    return gpudatatypes::RecoStep::TPCClusterFinding;
+  }
+
+  template <int32_t iKernel = defaultKernel>
+  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
+};
+
 } // namespace o2::gpu
 
 #endif
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
index 7fef277138632..3d1ebbd54490e 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
@@ -477,7 +477,10 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa
   CfChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime);
   positions[positionOffset] = pos;
 
-  charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
+  // Only apply gain correction if ADC not fully saturated
+  if (charge < 1023.f) {
+    charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
+  }
 
   chargeMap[pos] = PackedCharge(charge);
 }
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
index 3ad463f469cd6..74b76f6bf7598 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
@@ -132,7 +132,7 @@ class GPUTPCCFDecodeZSLink : public GPUTPCCFDecodeZSLinkBase
 {
  public:
   // constants for decoding
-  static inline constexpr int32_t DECODE_BITS = o2::tpc::TPCZSHDRV2::TPC_ZS_NBITS_V34;
+  static inline constexpr int32_t DECODE_BITS = tpc::TPCZSHDRV2::TPC_ZS_NBITS_V34;
   static inline constexpr float DECODE_BITS_FACTOR = 1.f / (1 << (DECODE_BITS - 10));
   static inline constexpr uint32_t DECODE_MASK = (1 << DECODE_BITS) - 1;
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
index d79cdc2333b76..3248185a8be00 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
@@ -46,13 +46,20 @@ template <>
 GPUd() void GPUTPCCFMCLabelFlattener::Thread<GPUTPCCFMCLabelFlattener::setRowOffsets>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory&, processorType& clusterer)
 {
 #if !defined(GPUCA_GPUCODE)
-  Row row = get_global_id(0);
+  const Row row = get_global_id(0);
+  const size_t clusterInRow = clusterer.mPclusterInRow[row];
+
+  // Label Flattener assumes 1 label container per cluster,
+  // but HIP clusters don't support MC labels yet and containers are missing for those clusters.
+  // So append empty label container for each HIP cluster.
+  // Note: This assumes that HIP cluster are store behind regular clusters!
+  auto& labels = clusterer.mPlabelsByRow[row].data;
+  labels.resize(std::max(labels.size(), clusterInRow));
 
-  uint32_t clusterInRow = clusterer.mPclusterInRow[row];
   uint32_t labelCount = 0;
 
-  for (uint32_t i = 0; i < clusterInRow; i++) {
-    auto& interim = clusterer.mPlabelsByRow[row].data[i];
+  for (size_t i = 0; i < clusterInRow; i++) {
+    auto& interim = labels[i];
     labelCount += interim.labels.size();
   }
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx
index e34163d3803fe..67be936ab4627 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx
@@ -25,6 +25,7 @@
 
 #include "CfChargePos.h"
 #include "CfArray2D.h"
+#include "GPUTPCCFCheckPadBaseline.h"
 
 using namespace o2::gpu;
 using namespace o2::tpc;
@@ -95,6 +96,10 @@ void* GPUTPCClusterFinder::SetPointersScratch(void* mem)
   if ((mRec->GetRecoStepsGPU() & gpudatatypes::RecoStep::TPCClusterFinding)) {
     computePointerWithAlignment(mem, mPscanBuf, mBufSize * mNBufs);
   }
+  // TODO: Use memory scalers for MaxHIPTails.
+  // NOTE: Always allocate since Param() is not available during size computation.
+  computePointerWithAlignment(mem, mPhipTailsByRow, GPUTPCGeometry::NROWS * GPUTPCCFHIPClusterizer::MaxHIPTailsPerRow);
+  computePointerWithAlignment(mem, mPnHIPTails, GPUTPCGeometry::NROWS);
   return mem;
 }
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h
index 4d036c2056cc5..bc49d225133fa 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h
@@ -46,6 +46,7 @@ namespace o2::gpu
 {
 struct GPUTPCClusterMCInterimArray;
 struct TPCPadGainCalib;
+struct HIPTailDescriptor;
 
 struct CfChargePos;
 
@@ -113,6 +114,8 @@ class GPUTPCClusterFinder : public GPUProcessor
   tpc::ClusterNative* mPclusterByRow = nullptr;
   GPUTPCClusterMCInterimArray* mPlabelsByRow = nullptr;
   int32_t* mPscanBuf = nullptr;
+  HIPTailDescriptor* mPhipTailsByRow = nullptr;
+  uint32_t* mPnHIPTails = nullptr; // one counter per row
   Memory* mPmemory = nullptr;
 
   GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPscanBuf + iBuf * mBufSize; }
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
index 1e5030956df01..2b21af6a08bed 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx
@@ -15,7 +15,6 @@
 #include "GPUTPCClusterFinder.h"
 #include "GPUReconstruction.h"
 #include "CfArray2D.h"
-#include "DataFormatsTPC/Digit.h"
 #include "DataFormatsTPC/ClusterNative.h"
 #include "GPUSettings.h"
 
@@ -155,7 +154,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out)
 
 void GPUTPCClusterFinder::DumpClusters(std::ostream& out)
 {
-  out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n";
+  out << "\nClusterer - Clusters - Sector " << mISector << " - All Fragments\n";
 
   for (uint32_t i = 0; i < GPUTPCGeometry::NROWS; i++) {
     size_t N = mPclusterInRow[i];
@@ -167,7 +166,13 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out)
 
     out << "Row: " << i << ": " << N << "\n";
     for (const auto& cl : sortedCluster) {
-      out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n";
+      uint32_t qTot = cl.qTot;
+      uint32_t sigmaTime = cl.sigmaTimePacked;
+      if (cl.isSaturated()) {
+        qTot = cl.getSaturatedQtot();
+        sigmaTime = cl.getSaturatedTailLength();
+      }
+      out << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << sigmaTime << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << qTot << "\n";
     }
   }
 }
diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake
index 2176ea2dc3804..3041c2b869de2 100644
--- a/GPU/GPUTracking/kernels.cmake
+++ b/GPU/GPUTracking/kernels.cmake
@@ -103,6 +103,8 @@ o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow"      "GPUTP
 o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels"                            LB)
 o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels"                            LB)
 o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline"                              "= TPCCLUSTERFINDER"                                    LB)
+o2_gpu_add_kernel("GPUTPCCFHIPTailConnector"                              "GPUTPCCFCheckPadBaseline TPCCLUSTERFINDER"             LB)
+o2_gpu_add_kernel("GPUTPCCFHIPClusterizer"                                "GPUTPCCFCheckPadBaseline TPCCLUSTERFINDER"             LB)
 o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap"                 "= TPCCLUSTERFINDER"                                    LB)
 o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits"               "= TPCCLUSTERFINDER"                                    LB)
 o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart"            "= TPCCLUSTERFINDER"                                    LB int8_t setPositions)
diff --git a/GPU/GPUTracking/utils/VcShim.h b/GPU/GPUTracking/utils/VcShim.h
index 21a9a6a5c95c2..2bbc1d471bbbb 100644
--- a/GPU/GPUTracking/utils/VcShim.h
+++ b/GPU/GPUTracking/utils/VcShim.h
@@ -19,7 +19,7 @@
 
 #ifndef GPUCA_NO_VC
 
-#include <Vc/Vc>
+#include <Vc/Vc> // IWYU pragma: export
 
 #else
 
diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx
index 7f7deddafe1c8..4bfedc117dec7 100644
--- a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx
+++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx
@@ -38,6 +38,10 @@ void CorrectionMapsHelper::setCorrMapMShape(std::unique_ptr<TPCFastTransform>&&
 void CorrectionMapsHelper::updateLumiScale(bool report)
 {
   if (!canUseCorrections()) {
+    if (mLumiScaleMode != LumiScaleMode::NoCorrection) {
+      LOGP(warning, "Negative meanLumi={} detected, switching to NoCorrection mode for backward compatibility", mMeanLumi);
+      mLumiScaleMode = LumiScaleMode::NoCorrection;
+    }
     mLumiScale = -1.f;
   } else if ((mLumiScaleMode == LumiScaleMode::DerivativeMap) || (mLumiScaleMode == LumiScaleMode::DerivativeMapMC)) {
     mLumiScale = mMeanLumiRef ? (mInstLumi - mMeanLumi) / mMeanLumiRef : 0.f;
@@ -54,7 +58,40 @@ void CorrectionMapsHelper::updateLumiScale(bool report)
 //________________________________________________________
 void CorrectionMapsHelper::reportScaling()
 {
-  LOGP(info, "Map scaling update: LumiScaleType={} instLumi(CTP)={} instLumi(scaling)={} meanLumiRef={}, meanLumi={} -> LumiScale={} lumiScaleMode={}, M-Shape map valid: {}, M-Shape default: {}",
-       mLumiScaleType == LumiScaleType::NoScaling ? "NoScaling" : (mLumiScaleType == LumiScaleType::CTPLumi ? "LumiCTP" : "TPCScaler"), getInstLumiCTP(), getInstLumi(), getMeanLumiRef(), getMeanLumi(), getLumiScale(),
-       mLumiScaleMode == LumiScaleMode::Linear ? "Linear" : "Derivative", (mCorrMapMShape != nullptr), isCorrMapMShapeDummy());
+  auto lumiTypeName = [](LumiScaleType t) {
+    switch (t) {
+      case LumiScaleType::NoScaling:
+        return "NoScaling";
+      case LumiScaleType::CTPLumi:
+        return "CTPLumi";
+      case LumiScaleType::TPCScaler:
+        return "TPCScaler";
+      default:
+        return "Unknown";
+    }
+  };
+
+  const bool mshapeValid = (mCorrMapMShape != nullptr) && !isCorrMapMShapeDummy();
+
+  if (mLumiScaleMode == LumiScaleMode::NoCorrection) {
+    LOGP(info, "Map scaling update: mode=NoCorrection (corrections disabled, dummy map in use)");
+  } else if (mLumiScaleMode == LumiScaleMode::StaticMapOnly) {
+    LOGP(info, "Map scaling update: mode=StaticMapOnly (static reference map, no lumi scaling), M-Shape correction: {}", mshapeValid ? "applied" : "not applied");
+  } else {
+    auto lumiModeName = [](LumiScaleMode m) {
+      switch (m) {
+        case LumiScaleMode::Linear:
+          return "Linear";
+        case LumiScaleMode::DerivativeMap:
+          return "DerivativeMap";
+        case LumiScaleMode::DerivativeMapMC:
+          return "DerivativeMapMC";
+        default:
+          return "Unknown";
+      }
+    };
+    LOGP(info, "Map scaling update: LumiScaleType={} instLumi(CTP)={} instLumi(scaling)={} meanLumiRef={} meanLumi={} -> LumiScale={} lumiScaleMode={}, M-Shape correction: {}",
+         lumiTypeName(mLumiScaleType), getInstLumiCTP(), getInstLumi(), getMeanLumiRef(), getMeanLumi(), getLumiScale(),
+         lumiModeName(mLumiScaleMode), mshapeValid ? "applied" : "not applied");
+  }
 }
diff --git a/GPU/TPCFastTransformation/CorrectionMapsTypes.h b/GPU/TPCFastTransformation/CorrectionMapsTypes.h
index e239b668ab751..092a2927ebe3e 100644
--- a/GPU/TPCFastTransformation/CorrectionMapsTypes.h
+++ b/GPU/TPCFastTransformation/CorrectionMapsTypes.h
@@ -22,14 +22,18 @@ enum class LumiScaleType : int {
   Unset = -1,    ///< init value
   NoScaling = 0, ///< no scaling, use map as is
   CTPLumi = 1,   ///< use CTP luminosity for scaling
-  TPCScaler = 2  ///< use TPC scaler for scaling
+  TPCScaler = 2, ///< use TPC scaler for scaling
+  Count          ///< sentinel - keep last
 };
 
 enum class LumiScaleMode : int {
-  Unset = -1,         ///< init value
-  Linear = 0,         ///< map(lumi) = (mean_map - referenceMap) * lumiScale + referenceMap
-  DerivativeMap = 1,  ///< map(lumi) = mean_map + lumiScale * (derivativeMap) where derivativeMap = (mean_map_A - mean_map_B)
-  DerivativeMapMC = 2 ///< same DerivativeMap, but for MC
+  Unset = -1,          ///< init value
+  Linear = 0,          ///< map(lumi) = (mean_map - referenceMap) * lumiScale + referenceMap
+  DerivativeMap = 1,   ///< map(lumi) = mean_map + lumiScale * (derivativeMap) where derivativeMap = (mean_map_A - mean_map_B)
+  DerivativeMapMC = 2, ///< same DerivativeMap, but for MC
+  NoCorrection = 3,    ///< no corrections at all
+  StaticMapOnly = 4,   ///< use only static map instead of main map
+  Count                ///< sentinel - keep last
 };
 
 struct CorrectionMapsGloOpts {
diff --git a/log.txt b/log.txt
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/prodtests/full-system-test/aggregator-workflow.sh b/prodtests/full-system-test/aggregator-workflow.sh
index 0dc30df93669d..e8469465d0572 100755
--- a/prodtests/full-system-test/aggregator-workflow.sh
+++ b/prodtests/full-system-test/aggregator-workflow.sh
@@ -155,25 +155,35 @@ if workflow_has_parameter CALIB_PROXIES; then
     if [[ -n ${CALIBDATASPEC_BARREL_SPORADIC:-} ]]; then
       add_W o2-dpl-raw-proxy "--dataspec \"$CALIBDATASPEC_BARREL_SPORADIC\" $(get_proxy_connection barrel_sp input sporadic)" "" 0
     fi
-  elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]]; then
+  elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == TPC_CMV ]]; then
     if [[ $EPNSYNCMODE != 1 ]]; then
-      echo "ERROR: TPC IDC / SAC calib workflow enabled without EPNSYNCMODE, please note that there will not be input data for it" 1>&2
+      echo "ERROR: TPC IDC / SAC / CMV calib workflow enabled without EPNSYNCMODE, please note that there will not be input data for it" 1>&2
     fi
     CHANNELS_LIST=
     [[ $EPNSYNCMODE == 0 ]] && FLP_ADDRESS="tcp://localhost:29950"
     if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]] || [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]] || [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then
-      # define port for FLP
-      : ${TPC_IDC_FLP_PORT:=29950}
+      # define port for FLP and channel prefix
+      TPC_FLP_PORT=
+      TPC_FLP_CHAN_PREFIX=
+      if [[ $AGGREGATOR_TASKS == TPC_CMV ]] && [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then
+        TPC_FLP_PORT=29952
+        TPC_FLP_CHAN_PREFIX=tpccmv
+      elif [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]] && [[ -n ${CALIBDATASPEC_TPCIDC_A:-} || -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then
+        TPC_FLP_PORT=29950
+        TPC_FLP_CHAN_PREFIX=tpcidc
+      fi
       # expand FLPs; TPC uses from 001 to 145, but 145 is reserved for SAC
-      if [[ "${GEN_TOPO_DEPLOYMENT_TYPE:-}" == "ALICE_STAGING" ]]; then
-        FLP_ADDRESS="tcp://alio2-cr1-mvs03-ib:${TPC_IDC_FLP_PORT}"
-        CHANNELS_LIST+="type=pull,name=tpcidc_flp,transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;"
-      else
-        for flp in $(seq -f "%03g" 1 144); do
-          [[ ! $FLP_IDS =~ (^|,)"$flp"(,|$) ]] && continue
-          [[ $EPNSYNCMODE == 1 ]] && FLP_ADDRESS="tcp://alio2-cr1-flp${flp}-ib:${TPC_IDC_FLP_PORT}"
-          CHANNELS_LIST+="type=pull,name=tpcidc_flp${flp},transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;"
-        done
+      if [[ -n $TPC_FLP_PORT ]]; then
+        if [[ "${GEN_TOPO_DEPLOYMENT_TYPE:-}" == "ALICE_STAGING" ]]; then
+          FLP_ADDRESS="tcp://alio2-cr1-mvs03-ib:${TPC_FLP_PORT}"
+          CHANNELS_LIST+="type=pull,name=${TPC_FLP_CHAN_PREFIX}_flp,transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;"
+        else
+          for flp in $(seq -f "%03g" 1 144); do
+            [[ ! $FLP_IDS =~ (^|,)"$flp"(,|$) ]] && continue
+            [[ $EPNSYNCMODE == 1 ]] && FLP_ADDRESS="tcp://alio2-cr1-flp${flp}-ib:${TPC_FLP_PORT}"
+            CHANNELS_LIST+="type=pull,name=${TPC_FLP_CHAN_PREFIX}_flp${flp},transport=zeromq,address=$FLP_ADDRESS,method=connect,rateLogging=10;"
+          done
+        fi
       fi
     fi
     if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then
@@ -184,22 +194,25 @@ if workflow_has_parameter CALIB_PROXIES; then
     fi
     if [[ -n $CHANNELS_LIST ]]; then
       DATASPEC_LIST=
-      if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]]; then
-        add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_A\""
-      fi
-      if [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then
-        add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_C\""
-      fi
-      if [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then
-        add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCCMV\""
-      fi
-      if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then
-        add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCSAC\""
+      if [[ $AGGREGATOR_TASKS == TPC_CMV ]]; then
+        if [[ -n ${CALIBDATASPEC_TPCCMV:-} ]]; then
+          add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCCMV\""
+        fi
+      else
+        if [[ -n ${CALIBDATASPEC_TPCIDC_A:-} ]]; then
+          add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_A\""
+        fi
+        if [[ -n ${CALIBDATASPEC_TPCIDC_C:-} ]]; then
+          add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCIDC_C\""
+        fi
+        if [[ -n ${CALIBDATASPEC_TPCSAC:-} ]]; then
+          add_semicolon_separated DATASPEC_LIST "\"$CALIBDATASPEC_TPCSAC\""
+        fi
       fi
       if [[ -z ${O2_TPC_IDC_CMV_IO_THREADS:-} ]]; then
         O2_TPC_IDC_CMV_IO_THREADS=4;
-    fi
-     add_W o2-dpl-raw-proxy "--proxy-name tpcidc --io-threads ${O2_TPC_IDC_CMV_IO_THREADS} --dataspec \"$DATASPEC_LIST\" --sporadic-outputs --channel-config \"$CHANNELS_LIST\" ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} $TIMEFRAME_SHM_LIMIT" "" 0
+      fi
+      add_W o2-dpl-raw-proxy "--proxy-name ${TPC_FLP_CHAN_PREFIX} --io-threads ${O2_TPC_IDC_CMV_IO_THREADS} --dataspec \"$DATASPEC_LIST\" --sporadic-outputs --channel-config \"$CHANNELS_LIST\" ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} $TIMEFRAME_SHM_LIMIT" "" 0
     fi
   elif [[ $AGGREGATOR_TASKS == CALO_TF ]]; then
     if [[ -n ${CALIBDATASPEC_CALO_TF:-} ]]; then
@@ -308,7 +321,10 @@ nTFs=$((1000 * 128 / ${NHBPERTF}))
 nTFs_SAC=$((10000 * 128 / ${NHBPERTF}))
 nBuffer=$((100 * 128 / ${NHBPERTF}))
 nBuffer_cmv=$((50 * 128 / ${NHBPERTF}))
-lanesCMVaggregate=${O2_TPC_CMV_AGGREGATE_NLANES:-8}
+lanesCMVaggregate=${O2_TPC_CMV_AGGREGATE_NLANES:-4}
+lanesCMVdistribute=${O2_TPC_CMV_DISTRIBUTE_NLANES:-2}
+cmvCompression=${O2_TPC_CMV_COMPRESSION:---use-sparse --cmv-zero-threshold 1.0 --cmv-dynamic-precision-mean 1.0 --cmv-dynamic-precision-sigma 8.0 --use-compression-huffman}
+cmvTimeframes=${O2_TPC_CMV_TIMEFRAMES:-4000}
 IDC_DELTA="--disable-IDCDelta true" # off by default
 # deltas are on by default; you need to request explicitly to switch them off;
 if [[ "${DISABLE_IDC_DELTA:-}" == "1" ]]; then IDC_DELTA=""; fi
@@ -316,24 +332,25 @@ if [[ "${ENABLE_IDC_DELTA_FILE:-}" == "1" ]]; then IDC_DELTA+=" --dump-IDCDelta-
 
 if [[ "${DISABLE_IDC_PAD_MAP_WRITING:-}" == 1 ]]; then TPC_WRITING_PAD_STATUS_MAP=""; else TPC_WRITING_PAD_STATUS_MAP="--enableWritingPadStatusMap true"; fi
 
-if ! workflow_has_parameter CALIB_LOCAL_INTEGRATED_AGGREGATOR && [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == ALL ]]; then
-  if [[ $CALIB_TPC_IDC == 1 ]]; then
-    add_W o2-tpc-idc-distribute "--crus ${crus} --timeframes ${nTFs} --output-lanes ${lanesFactorize} --send-precise-timestamp true --condition-tf-per-query ${nTFs}  --n-TFs-buffer ${nBuffer}"
-    add_W o2-tpc-idc-factorize "--n-TFs-buffer ${nBuffer} --input-lanes ${lanesFactorize} --crus ${crus} --timeframes ${nTFs} --nthreads-grouping ${threadFactorize} --nthreads-IDC-factorization ${threadFactorize} --sendOutputFFT true --enable-CCDB-output true --enablePadStatusMap true ${TPC_WRITING_PAD_STATUS_MAP} --use-precise-timestamp true $IDC_DELTA" "TPCIDCGroupParam.groupPadsSectorEdges=32211"
-    add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --inputLanes ${lanesFactorize} --nFourierCoeff 40 --nthreads 8"
-  fi
-  if [[ $CALIB_TPC_CMV == 1 ]]; then
-    if [[ -z ${O2_TPC_CMV_COMPRESSION:-} ]]; then O2_TPC_CMV_COMPRESSION="--use-sparse --cmv-zero-threshold 1.0 --cmv-dynamic-precision-mean 1.0 --cmv-dynamic-precision-sigma 8.0 --use-compression-huffman"; fi
-    if [[ -z ${O2_TPC_CMV_TIMEFRAMES:-} ]]; then O2_TPC_CMV_TIMEFRAMES="2000"; fi
-    add_W o2-tpc-cmv-distribute "--crus ${crus} --lanes 1 --output-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --timeframes ${O2_TPC_CMV_TIMEFRAMES} --send-precise-timestamp "
-    add_W o2-tpc-cmv-aggregate "--crus ${crus} --input-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --nthreads-compression 4 --timeframes ${O2_TPC_CMV_TIMEFRAMES} --use-precise-timestamp  ${O2_TPC_CMV_COMPRESSION} --output-dir $CALIB_DIR --meta-output-dir $EPN2EOS_METAFILES_DIR "
-  fi
-  if [[ $CALIB_TPC_SAC == 1 ]]; then
-    add_W o2-tpc-sac-distribute "--timeframes ${nTFs_SAC} --output-lanes 1 "
-    add_W o2-tpc-sac-factorize "--timeframes ${nTFs_SAC} --nthreads-SAC-factorization 4 --input-lanes 1 --compression 2"
-    add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --nFourierCoeff 40 --process-SACs true --inputLanes 1"
+if ! workflow_has_parameter CALIB_LOCAL_INTEGRATED_AGGREGATOR; then
+  if [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC || $AGGREGATOR_TASKS == ALL ]]; then
+    if [[ $CALIB_TPC_IDC == 1 ]]; then
+      add_W o2-tpc-idc-distribute "--crus ${crus} --timeframes ${nTFs} --output-lanes ${lanesFactorize} --send-precise-timestamp true --condition-tf-per-query ${nTFs}  --n-TFs-buffer ${nBuffer}"
+      add_W o2-tpc-idc-factorize "--n-TFs-buffer ${nBuffer} --input-lanes ${lanesFactorize} --crus ${crus} --timeframes ${nTFs} --nthreads-grouping ${threadFactorize} --nthreads-IDC-factorization ${threadFactorize} --sendOutputFFT true --enable-CCDB-output true --enablePadStatusMap true ${TPC_WRITING_PAD_STATUS_MAP} --use-precise-timestamp true $IDC_DELTA" "TPCIDCGroupParam.groupPadsSectorEdges=32211"
+      add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --inputLanes ${lanesFactorize} --nFourierCoeff 40 --nthreads 8"
+    fi
+    if [[ $CALIB_TPC_SAC == 1 ]]; then
+      add_W o2-tpc-sac-distribute "--timeframes ${nTFs_SAC} --output-lanes 1 "
+      add_W o2-tpc-sac-factorize "--timeframes ${nTFs_SAC} --nthreads-SAC-factorization 4 --input-lanes 1 --compression 2"
+      add_W o2-tpc-idc-ft-aggregator "--rangeIDC 200 --nFourierCoeff 40 --process-SACs true --inputLanes 1"
+    fi
+  elif [[ $AGGREGATOR_TASKS == TPC_CMV || $AGGREGATOR_TASKS == ALL ]]; then
+    if [[ $CALIB_TPC_CMV == 1 ]]; then
+      add_W o2-tpc-cmv-distribute "--crus ${crus} --lanes ${lanesCMVdistribute} --output-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --timeframes ${cmvTimeframes} --send-precise-timestamp "
+      add_W o2-tpc-cmv-aggregate "--crus ${crus} --input-lanes ${lanesCMVaggregate} --n-TFs-buffer ${nBuffer_cmv} --nthreads-compression 8 --timeframes ${cmvTimeframes} --use-precise-timestamp  ${cmvCompression} --output-dir $CALIB_DIR --meta-output-dir $EPN2EOS_METAFILES_DIR "
+      CCDB_POPULATOR_UPLOAD_PATH=none
+    fi
   fi
-  [[ $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC ]] && [[ $CALIB_TPC_IDC == 0 && $CALIB_TPC_SAC == 0 && $CALIB_TPC_CMV == 1 ]] && CCDB_POPULATOR_UPLOAD_PATH="none"
 fi
 
 # Calo cal
diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh
index 5ab54f9cf4b43..6351f2236d1a1 100755
--- a/prodtests/full-system-test/dpl-workflow.sh
+++ b/prodtests/full-system-test/dpl-workflow.sh
@@ -71,6 +71,50 @@ elif [[ -z ${SYNCRAWMODE:-} ]]; then
   SYNCRAWMODE=0
 fi
 
+# ---------------------------------------------------------------------------------------------------------------------
+# build incoming raw inputs specs
+define_raw_inputs()
+{
+  PROXY_INSPEC="dd:FLP/DISTSUBTIMEFRAME/0"
+  PROXY_IN_N=0
+  for i in ${INPUT_DETECTOR_LIST//,/ }; do
+    if has_detector_flp_processing $i; then
+      case $i in
+        TOF)
+          PROXY_INTYPE="CRAWDATA";;
+        FT0 | FV0 | FDD)
+          PROXY_INTYPE="DIGITSBC/0 DIGITSCH/0";;
+        PHS)
+          PROXY_INTYPE="CELLS CELLTRIGREC";;
+        CPV)
+          PROXY_INTYPE="DIGITS/0 DIGITTRIGREC/0 RAWHWERRORS";;
+        EMC)
+          PROXY_INTYPE="CELLS/0 CELLSTRGR/0 DECODERERR";;
+        CTP)
+          PROXY_INTYPE="LUMI/0 RAWDATA"
+          CTP_CONFIG=" --no-lumi "
+          ;;
+        *)
+          echo Input type for detector $i with FLP processing not defined 1>&2
+          exit 1;;
+      esac
+    else
+      PROXY_INTYPE=RAWDATA
+    fi
+    for j in $PROXY_INTYPE; do
+      PROXY_INNAME="RAWIN$PROXY_IN_N"
+      let PROXY_IN_N=$PROXY_IN_N+1
+      PROXY_INSPEC+=";$PROXY_INNAME:$i/$j"
+    done
+  done
+  # do we have DPL_RAWTFDUMP_TRIGGER trigger (e.g. TPC/CMVTRIGGER)? If so, add its spec
+  if has_detector TPC && [[ -n ${DPL_RAWTFDUMP_TRIGGER:-} ]]; then
+    PROXY_INNAME="RAWIN$PROXY_IN_N"
+    let PROXY_IN_N=$PROXY_IN_N+1
+    PROXY_INSPEC+=";$PROXY_INNAME:${DPL_RAWTFDUMP_TRIGGER}"
+  fi
+}
+
 # ---------------------------------------------------------------------------------------------------------------------
 # Set some individual workflow arguments depending on configuration
 GPU_INPUT=zsraw
@@ -109,6 +153,7 @@ EVE_OPT=" --jsons-folder $EDJSONS_DIR"
 : ${ALPIDE_ERR_DUMPS:=}
 : ${ITSSTAGGERED:=}
 : ${MFTSTAGGERED:=}
+: ${PROXY_INSPEC:=}
 
 [[ -z $ALPIDE_ERR_DUMPS ]] && [[ $EPNSYNCMODE == 1 && $RUNTYPE == "PHYSICS" ]] && ALPIDE_ERR_DUMPS=1 || ALPIDE_ERR_DUMPS=0
 
@@ -483,38 +528,7 @@ if [[ -n $INPUT_DETECTOR_LIST ]]; then
     add_W o2-raw-tf-reader-workflow "--delay $TFDELAY $TFRAWOPT --loop $TFLOOP $NTIMEFRAMES_CMD --input-data ${TFName} ${INPUT_FILE_COPY_CMD+--copy-cmd} ${INPUT_FILE_COPY_CMD:-} --onlyDet $INPUT_DETECTOR_LIST ${TIMEFRAME_SHM_LIMIT+--timeframes-shm-limit} ${TIMEFRAME_SHM_LIMIT:-}"
   elif [[ $EXTINPUT == 1 ]]; then
     PROXY_CHANNEL="name=readout-proxy,type=pull,method=connect,address=ipc://${UDS_PREFIX}${INRAWCHANNAME},transport=shmem,rateLogging=$EPNSYNCMODE"
-    PROXY_INSPEC="dd:FLP/DISTSUBTIMEFRAME/0"
-    PROXY_IN_N=0
-    for i in ${INPUT_DETECTOR_LIST//,/ }; do
-      if has_detector_flp_processing $i; then
-        case $i in
-          TOF)
-            PROXY_INTYPE="CRAWDATA";;
-          FT0 | FV0 | FDD)
-            PROXY_INTYPE="DIGITSBC/0 DIGITSCH/0";;
-          PHS)
-            PROXY_INTYPE="CELLS CELLTRIGREC";;
-          CPV)
-            PROXY_INTYPE="DIGITS/0 DIGITTRIGREC/0 RAWHWERRORS";;
-          EMC)
-            PROXY_INTYPE="CELLS/0 CELLSTRGR/0 DECODERERR";;
-          CTP)
-            PROXY_INTYPE="LUMI/0 RAWDATA"
-            CTP_CONFIG=" --no-lumi "
-            ;;
-          *)
-            echo Input type for detector $i with FLP processing not defined 1>&2
-            exit 1;;
-        esac
-      else
-        PROXY_INTYPE=RAWDATA
-      fi
-      for j in $PROXY_INTYPE; do
-        PROXY_INNAME="RAWIN$PROXY_IN_N"
-        let PROXY_IN_N=$PROXY_IN_N+1
-        PROXY_INSPEC+=";$PROXY_INNAME:$i/$j"
-      done
-    done
+    define_raw_inputs
     [[ -n ${TIMEFRAME_RATE_LIMIT:-} ]] && [[ $TIMEFRAME_RATE_LIMIT != 0 ]] && PROXY_CHANNEL+=";name=metric-feedback,type=pull,method=connect,address=ipc://${UDS_PREFIX}metric-feedback-${O2JOBID:-$NUMAID},transport=shmem,rateLogging=0"
     if [[ $EPNSYNCMODE == 1 ]]; then
       RAWPROXY_CONFIG="--print-input-sizes 1000"
@@ -544,8 +558,18 @@ if [[ -z ${WORKFLOW_DETECTORS_USE_GLOBAL_READER_TRACKS} ]] && [[ -z ${WORKFLOW_D
 fi
 
 # ---------------------------------------------------------------------------------------------------------------------
-# Raw decoder workflows - disabled in async mode
+
 if [[ $CTFINPUT == 0 && $DIGITINPUT == 0 ]]; then
+# Check if raw TF data dump was requested, RAWTF_DUMPRATE must be in %
+  if [[ ${DPL_RAWTFDUMP:-} == 1 ]]; then
+     [[ -z ${PROXY_INSPEC} ]] && define_raw_inputs
+     CONFIG_RAWTFDUMP="--dataspec \"${PROXY_INSPEC}\" --output-dir \"${RAWTF_DIR:-$CTF_DIR}\" --meta-output-dir \"${EPN2EOS_METAFILES_DIR}\" --max-dump-rate ${RAWTF_DUMPRATE:-0.1} "
+     CONFIG_RAWTFDUMP+=" --min-file-size ${RAWTF_MINSIZE:-$CTF_MINSIZE} --max-tf-per-file ${RAWTF_MAX_PER_FILE:-$CTF_MAX_PER_FILE} --mute-warn-period ${RAWTF_MUTE_PERIOD:-200} --max-warn ${RAWTF_MAX_WARN:-5} "
+     [[ -n ${DPL_RAWTFDUMP_TRIGGER:-} ]] && CONFIG_RAWTFDUMP+=" --triggerspec \"DMPTRG:${DPL_RAWTFDUMP_TRIGGER}\" "
+     add_W o2-raw-tf-dump-workflow "$CONFIG_RAWTFDUMP"
+  fi
+
+# Raw decoder workflows - disabled in async mode
   if has_detector TPC && [[ "${TPC_CONVERT_LINKZS_TO_RAW:-}" == "1" ]]; then
     GPU_INPUT=zsonthefly
     RAWTODIGITOPTIONS=