From e531104978cafb172ce683d4d84427795caf78e3 Mon Sep 17 00:00:00 2001
From: Timo Koch <timokoch@uio.no>
Date: Wed, 19 Jul 2023 16:34:37 +0200
Subject: [PATCH] [experimental][assembly] Make assembler/localassembler work

The steps were
* Add multistage assemblers draft
* Combine residual and jacobian step
* Move computation of split residuals into the localassembler
* Remove isImplicit and cleanup
* Separate local assembler implementations from old impl
* simplify Dirichlet/constraints
* Get rid of implicit template argument
The decision can be made at runtime.
---
 .../experimental/assembly/cclocalassembler.hh | 542 +++++++++++++++
 .../assembly/cvfelocalassembler.hh            | 571 ++++++++++++++++
 .../assembly/fvlocalassemblerbase.hh          | 304 +++++++++
 .../assembly/multistagefvassembler.hh         | 469 +++++++++++++
 .../assembly/multistagefvlocaloperator.hh     | 112 ++++
 .../multistagemultidomainfvassembler.hh       | 617 ++++++++++++++++++
 .../assembly/subdomaincclocalassembler.hh     | 378 +++++++++++
 .../assembly/subdomaincvfelocalassembler.hh   | 386 +++++++++++
 .../timestepping/multistagetimestepper.hh     |  30 +-
 9 files changed, 3392 insertions(+), 17 deletions(-)
 create mode 100644 dumux/experimental/assembly/cclocalassembler.hh
 create mode 100644 dumux/experimental/assembly/cvfelocalassembler.hh
 create mode 100644 dumux/experimental/assembly/fvlocalassemblerbase.hh
 create mode 100644 dumux/experimental/assembly/multistagefvassembler.hh
 create mode 100644 dumux/experimental/assembly/multistagefvlocaloperator.hh
 create mode 100644 dumux/experimental/assembly/multistagemultidomainfvassembler.hh
 create mode 100644 dumux/experimental/assembly/subdomaincclocalassembler.hh
 create mode 100644 dumux/experimental/assembly/subdomaincvfelocalassembler.hh

diff --git a/dumux/experimental/assembly/cclocalassembler.hh b/dumux/experimental/assembly/cclocalassembler.hh
new file mode 100644
index 0000000000..ca5149c0f3
--- /dev/null
+++ b/dumux/experimental/assembly/cclocalassembler.hh
@@ -0,0 +1,542 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \brief An assembler for Jacobian and residual contribution per element (cell-centered methods)
+ */
+#ifndef DUMUX_EXPERIMENTAL_CC_LOCAL_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_CC_LOCAL_ASSEMBLER_HH
+
+#include <dune/common/reservedvector.hh>
+#include <dune/grid/common/gridenums.hh> // for GhostEntity
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/reservedblockvector.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numericdifferentiation.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/assembly/numericepsilon.hh>
+#include <dumux/assembly/diffmethod.hh>
+#include <dumux/assembly/entitycolor.hh>
+#include <dumux/assembly/partialreassembler.hh>
+#include <dumux/discretization/fluxstencil.hh>
+#include <dumux/discretization/cellcentered/elementsolution.hh>
+
+#include <dumux/experimental/assembly/fvlocalassemblerbase.hh>
+
+namespace Dumux::Experimental {
+
+#ifndef DOXYGEN
+namespace Detail::CC {
+
+struct NoOperator
+{
+    template<class... Args>
+    constexpr void operator()(Args&&...) const {}
+};
+
+template<class X, class Y>
+using Impl = std::conditional_t<!std::is_same_v<X, void>, X, Y>;
+
+} // end namespace Detail
+#endif // DOXYGEN
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \brief A base class for all local cell-centered assemblers
+ * \tparam TypeTag The TypeTag
+ * \tparam Assembler The assembler type
+ * \tparam Implementation The actual implementation
+ */
+template<class TypeTag, class Assembler, class Implementation>
+class CCLocalAssemblerBase : public Experimental::FVLocalAssemblerBase<TypeTag, Assembler, Implementation>
+{
+    using ParentType = Experimental::FVLocalAssemblerBase<TypeTag, Assembler, Implementation>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using FVElementGeometry =  typename GridGeometry::LocalView;
+    using Element =  typename FVElementGeometry::Element;
+    using SubControlVolumeFace =  typename GridGeometry::SubControlVolumeFace;
+    using GridView = typename GridGeometry::GridView;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using ElementVolumeVariables = typename GetPropType<TypeTag, Properties::GridVolumeVariables>::LocalView;
+    using NumEqVector = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+
+public:
+
+    using ParentType::ParentType;
+
+    template <class ResidualVector, class StageParams, class PartialReassembler = DefaultPartialReassembler, class CouplingFunction = Detail::CC::NoOperator>
+    void assembleJacobianAndResidual(JacobianMatrix& jac, ResidualVector& res, GridVariables& gridVariables,
+                                     const StageParams& stageParams, ResidualVector& temporal, ResidualVector& spatial,
+                                     ResidualVector& constrainedDofs,
+                                     const PartialReassembler* partialReassembler = nullptr,
+                                     const CouplingFunction& maybeAssembleCouplingBlocks = {})
+    {
+        this->asImp_().bindLocalViews();
+        const auto globalI = this->fvGeometry().gridGeometry().elementMapper().index(this->element());
+
+        this->localResidual().spatialWeight(1.0);
+        this->localResidual().temporalWeight(1.0);
+
+        spatial[globalI] = this->evalLocalFluxAndSourceResidual(this->curElemVolVars())[0];
+        temporal[globalI] = this->localResidual().evalStorage(this->fvGeometry(), this->curElemVolVars())[0];
+        res[globalI] = spatial[globalI]*stageParams.spatialWeight(stageParams.size()-1)
+                       + temporal[globalI]*stageParams.temporalWeight(stageParams.size()-1);
+
+        this->localResidual().spatialWeight(stageParams.spatialWeight(stageParams.size()-1));
+        this->localResidual().temporalWeight(stageParams.temporalWeight(stageParams.size()-1));
+
+
+        if (partialReassembler
+            && partialReassembler->elementColor(globalI) == EntityColor::green)
+        {
+            // assemble the coupling blocks for coupled models (does nothing if not coupled)
+            maybeAssembleCouplingBlocks(res[globalI]);
+        }
+        else
+        {
+            this->asImp_().assembleJacobian(jac, gridVariables, res[globalI]); // forward to the internal implementation
+
+            // assemble the coupling blocks for coupled models (does nothing if not coupled)
+            maybeAssembleCouplingBlocks(res[globalI]);
+        }
+    }
+
+    /*!
+     * \brief Assemble the residual only
+     */
+    template <class ResidualVector>
+    void assembleResidual(ResidualVector& res)
+    {
+        this->asImp_().bindLocalViews();
+        const auto globalI = this->assembler().gridGeometry().elementMapper().index(this->element());
+        res[globalI] = this->asImp_().evalLocalResidual()[0]; // forward to the internal implementation
+
+        using Problem = GetPropType<TypeTag, Properties::Problem>;
+        if constexpr (Problem::enableInternalDirichletConstraints())
+        {
+            const auto applyDirichlet = [&] (const auto& scvI,
+                                             const auto& dirichletValues,
+                                             const auto eqIdx,
+                                             const auto pvIdx)
+            {
+                res[scvI.dofIndex()][eqIdx] = this->curElemVolVars()[scvI].priVars()[pvIdx] - dirichletValues[pvIdx];
+            };
+
+            this->asImp_().enforceInternalDirichletConstraints(applyDirichlet);
+        }
+    }
+
+    /*!
+     * \brief Evaluates the fluxes (element can potentially be a neighbor)
+     */
+    NumEqVector evalFlux(const Element& neighbor,
+                         const SubControlVolumeFace& scvf) const
+    {
+        return this->localResidual().evalFlux(
+            this->asImp_().problem(), neighbor,
+            this->fvGeometry(), this->curElemVolVars(),
+            this->elemFluxVarsCache(), scvf
+        );
+    }
+
+    /*!
+     * \brief Assemble the residual only
+     */
+    template<class SubResidualVector>
+    void assembleCurrentResidual(SubResidualVector& spatialRes, SubResidualVector& temporalRes)
+    {
+        this->asImp_().bindLocalViews();
+        const auto globalI = this->fvGeometry().gridGeometry().elementMapper().index(this->element());
+        spatialRes[globalI] = this->evalLocalFluxAndSourceResidual(this->curElemVolVars())[0];
+        temporalRes[globalI] = this->localResidual().evalStorage(this->fvGeometry(), this->curElemVolVars())[0];
+
+        if constexpr (Problem::enableInternalDirichletConstraints())
+            DUNE_THROW(Dune::NotImplemented, "Not implemented");
+    }
+
+    /*!
+     * \brief Update the coupling context for coupled models.
+     * \note This does nothing per default (not a coupled model).
+     */
+    template<class... Args>
+    void maybeUpdateCouplingContext(Args&&...) {}
+
+    /*!
+     * \brief Update the additional domain derivatives for coupled models.
+     * \note This does nothing per default (not a coupled model).
+     */
+    template<class... Args>
+    void maybeEvalAdditionalDomainDerivatives(Args&&...) {}
+};
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \brief An assembler for Jacobian and residual contribution per element (cell-centered methods)
+ * \tparam TypeTag The TypeTag
+ * \tparam diffMethod The differentiation method to residual compute derivatives
+ */
+template<class TypeTag, class Assembler, DiffMethod diffMethod = DiffMethod::numeric, class Implementation = void>
+class CCLocalAssembler;
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \brief Cell-centered scheme local assembler using numeric differentiation
+ */
+template<class TypeTag, class Assembler, class Implementation>
+class CCLocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>
+: public CCLocalAssemblerBase<TypeTag, Assembler,
+                              Detail::CC::Impl<Implementation, CCLocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>>>
+{
+    using ThisType = CCLocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>;
+    using ParentType = CCLocalAssemblerBase<TypeTag, Assembler, Detail::CC::Impl<Implementation, ThisType>>;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using NumEqVector = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using Element = typename FVElementGeometry::Element;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using Problem = typename GridVariables::GridVolumeVariables::Problem;
+
+    static constexpr int numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+    static constexpr int dim = GetPropType<TypeTag, Properties::GridGeometry>::GridView::dimension;
+
+    using FluxStencil = Dumux::FluxStencil<FVElementGeometry>;
+    static constexpr int maxElementStencilSize = GridGeometry::maxElementStencilSize;
+    static constexpr bool enableGridFluxVarsCache = GetPropType<TypeTag, Properties::GridVariables>::GridFluxVariablesCache::cachingEnabled;
+
+public:
+    using ParentType::ParentType;
+
+    using LocalResidual = typename ParentType::LocalResidual;
+    using ElementResidualVector = typename LocalResidual::ElementResidualVector;
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     *
+     * \return The element residual at the current solution.
+     */
+    void assembleJacobian(JacobianMatrix& A, GridVariables& gridVariables, const NumEqVector& origResidual)
+    {
+        if (this->isImplicit())
+            assembleJacobianImplicit_(A, gridVariables, origResidual);
+        else
+            assembleJacobianExplicit_(A, gridVariables, origResidual);
+    }
+
+private:
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them to the global matrix.
+     * \return The element residual at the current solution.
+     */
+    void assembleJacobianImplicit_(JacobianMatrix& A, GridVariables& gridVariables, const NumEqVector& origResidual)
+    {
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+        // Calculate derivatives of all dofs in stencil with respect to the dofs in the element. In the //
+        // neighboring elements we do so by computing the derivatives of the fluxes which depend on the //
+        // actual element. In the actual element we evaluate the derivative of the entire residual.     //
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        const auto& gridGeometry = this->fvGeometry().gridGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // get stencil information
+        const auto globalI = gridGeometry.elementMapper().index(element);
+        const auto& connectivityMap = gridGeometry.connectivityMap();
+        const auto numNeighbors = connectivityMap[globalI].size();
+
+        // container to store the neighboring elements
+        Dune::ReservedVector<Element, maxElementStencilSize> neighborElements;
+        neighborElements.resize(numNeighbors);
+
+        // assemble the undeflected residual
+        using Residuals = ReservedBlockVector<NumEqVector, maxElementStencilSize>;
+        Residuals origResiduals(numNeighbors + 1); origResiduals = 0.0;
+        origResiduals[0] = origResidual;
+
+        // lambda for convenient evaluation of the fluxes across scvfs in the neighbors
+        // if the neighbor is a ghost we don't want to add anything to their residual
+        // so we return 0 and omit computing the flux
+        auto evalNeighborFlux = [&] (const auto& neighbor, const auto& scvf)
+        {
+            if (neighbor.partitionType() == Dune::GhostEntity)
+                return NumEqVector(0.0);
+            else
+                return this->evalFlux(neighbor, scvf);
+        };
+
+        // get the elements in which we need to evaluate the fluxes
+        // and calculate these in the undeflected state
+        unsigned int j = 1;
+        for (const auto& dataJ : connectivityMap[globalI])
+        {
+            neighborElements[j-1] = gridGeometry.element(dataJ.globalJ);
+            for (const auto scvfIdx : dataJ.scvfsJ)
+                origResiduals[j] += evalNeighborFlux(neighborElements[j-1], fvGeometry.scvf(scvfIdx));
+
+            ++j;
+        }
+
+        // reference to the element's scv (needed later) and corresponding vol vars
+        const auto& scv = fvGeometry.scv(globalI);
+        auto& curVolVars = ParentType::getVolVarAccess(gridVariables.curGridVolVars(), curElemVolVars, scv);
+
+        // save a copy of the original privars and vol vars in order
+        // to restore the original solution after deflection
+        const auto& curSol = this->asImp_().curSol();
+        const auto origPriVars = curSol[globalI];
+        const auto origVolVars = curVolVars;
+
+        // element solution container to be deflected
+        auto elemSol = elementSolution<FVElementGeometry>(origPriVars);
+
+        // derivatives in the neighbors with respect to the current elements
+        // in index 0 we save the derivative of the element residual with respect to it's own dofs
+        Residuals partialDerivs(numNeighbors + 1);
+
+        for (int pvIdx = 0; pvIdx < numEq; ++pvIdx)
+        {
+            partialDerivs = 0.0;
+
+            auto evalResiduals = [&](Scalar priVar)
+            {
+                Residuals partialDerivsTmp(numNeighbors + 1);
+                partialDerivsTmp = 0.0;
+                // update the volume variables and the flux var cache
+                elemSol[0][pvIdx] = priVar;
+                this->asImp_().maybeUpdateCouplingContext(scv, elemSol, pvIdx);
+                curVolVars.update(elemSol, this->asImp_().problem(), element, scv);
+                elemFluxVarsCache.update(element, fvGeometry, curElemVolVars);
+                if (enableGridFluxVarsCache)
+                    gridVariables.gridFluxVarsCache().updateElement(element, fvGeometry, curElemVolVars);
+
+                // calculate the residual with the deflected primary variables
+                partialDerivsTmp[0] = this->evalLocalResidual()[0];
+
+                // calculate the fluxes in the neighbors with the deflected primary variables
+                for (std::size_t k = 0; k < numNeighbors; ++k)
+                    for (auto scvfIdx : connectivityMap[globalI][k].scvfsJ)
+                        partialDerivsTmp[k+1] += evalNeighborFlux(neighborElements[k], fvGeometry.scvf(scvfIdx));
+
+                return partialDerivsTmp;
+            };
+
+            // derive the residuals numerically
+            static const NumericEpsilon<Scalar, numEq> eps_{this->asImp_().problem().paramGroup()};
+            static const int numDiffMethod = getParamFromGroup<int>(this->asImp_().problem().paramGroup(), "Assembly.NumericDifferenceMethod");
+            NumericDifferentiation::partialDerivative(evalResiduals, elemSol[0][pvIdx], partialDerivs, origResiduals,
+                                                      eps_(elemSol[0][pvIdx], pvIdx), numDiffMethod);
+
+            // Correct derivative for ghost elements, i.e. set a 1 for the derivative w.r.t. the
+            // current primary variable and a 0 elsewhere. As we always solve for a delta of the
+            // solution with respect to the initial one, this results in a delta of 0 for ghosts.
+            if (this->elementIsGhost())
+            {
+                partialDerivs[0] = 0.0;
+                partialDerivs[0][pvIdx] = 1.0;
+            }
+
+            // restore the original state of the scv's volume variables
+            curVolVars = origVolVars;
+
+            // restore the current element solution
+            elemSol[0][pvIdx] = origPriVars[pvIdx];
+
+            // restore the undeflected state of the coupling context
+            this->asImp_().maybeUpdateCouplingContext(scv, elemSol, pvIdx);
+
+            // add the current partial derivatives to the global jacobian matrix
+            // no special treatment is needed if globalJ is a ghost because then derivatives have been assembled to 0 above
+            if constexpr (Problem::enableInternalDirichletConstraints())
+            {
+                // check if own element has internal Dirichlet constraint
+                const auto internalDirichletConstraintsOwnElement = this->asImp_().problem().hasInternalDirichletConstraint(this->element(), scv);
+                const auto dirichletValues = this->asImp_().problem().internalDirichlet(this->element(), scv);
+
+                for (int eqIdx = 0; eqIdx < numEq; ++eqIdx)
+                {
+                    if (internalDirichletConstraintsOwnElement[eqIdx])
+                    {
+                        origResiduals[0][eqIdx] = origVolVars.priVars()[eqIdx] - dirichletValues[eqIdx];
+                        A[globalI][globalI][eqIdx][pvIdx] = (eqIdx == pvIdx) ? 1.0 : 0.0;
+                    }
+                    else
+                        A[globalI][globalI][eqIdx][pvIdx] += partialDerivs[0][eqIdx];
+                }
+
+                // off-diagonal entries
+                j = 1;
+                for (const auto& dataJ : connectivityMap[globalI])
+                {
+                    const auto& neighborElement = neighborElements[j-1];
+                    const auto& neighborScv = fvGeometry.scv(dataJ.globalJ);
+                    const auto internalDirichletConstraintsNeighbor = this->asImp_().problem().hasInternalDirichletConstraint(neighborElement, neighborScv);
+
+                    for (int eqIdx = 0; eqIdx < numEq; ++eqIdx)
+                    {
+                        if (internalDirichletConstraintsNeighbor[eqIdx])
+                            A[dataJ.globalJ][globalI][eqIdx][pvIdx] = 0.0;
+                        else
+                            A[dataJ.globalJ][globalI][eqIdx][pvIdx] += partialDerivs[j][eqIdx];
+                    }
+
+                    ++j;
+                }
+            }
+            else // no internal Dirichlet constraints specified
+            {
+                for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                {
+                    // the diagonal entries
+                    A[globalI][globalI][eqIdx][pvIdx] += partialDerivs[0][eqIdx];
+
+                    // off-diagonal entries
+                    j = 1;
+                    for (const auto& dataJ : connectivityMap[globalI])
+                        A[dataJ.globalJ][globalI][eqIdx][pvIdx] += partialDerivs[j++][eqIdx];
+                }
+            }
+        }
+
+        // restore original state of the flux vars cache in case of global caching.
+        // This has to be done in order to guarantee that everything is in an undeflected
+        // state before the assembly of another element is called. In the case of local caching
+        // this is obsolete because the elemFluxVarsCache used here goes out of scope after this.
+        // We only have to do this for the last primary variable, for all others the flux var cache
+        // is updated with the correct element volume variables before residual evaluations
+        if (enableGridFluxVarsCache)
+            gridVariables.gridFluxVarsCache().updateElement(element, fvGeometry, curElemVolVars);
+
+        // evaluate additional derivatives that might arise from the coupling (no-op if not coupled)
+        ElementResidualVector orig{origResidual};
+        this->asImp_().maybeEvalAdditionalDomainDerivatives(orig, A, gridVariables);
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     *
+     * \return The element residual at the current solution.
+     */
+    void assembleJacobianExplicit_(JacobianMatrix& A, GridVariables& gridVariables, const NumEqVector& origResidual)
+    {
+        if (this->assembler().isStationaryProblem())
+            DUNE_THROW(Dune::InvalidStateException, "Using explicit jacobian assembler with stationary local residual");
+
+        // assemble the undeflected residual
+        auto storageResidual = origResidual;
+
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+        // Calculate derivatives of all dofs in stencil with respect to the dofs in the element. In the //
+        // neighboring elements all derivatives are zero. For the assembled element only the storage    //
+        // derivatives are non-zero.                                                                    //
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        const auto& gridGeometry = this->fvGeometry().gridGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+
+        // reference to the element's scv (needed later) and corresponding vol vars
+        const auto globalI = gridGeometry.elementMapper().index(element);
+        const auto& scv = fvGeometry.scv(globalI);
+        auto& curVolVars = ParentType::getVolVarAccess(gridVariables.curGridVolVars(), curElemVolVars, scv);
+
+        // save a copy of the original privars and vol vars in order
+        // to restore the original solution after deflection
+        const auto& curSol = this->asImp_().curSol();
+        const auto origPriVars = curSol[globalI];
+        const auto origVolVars = curVolVars;
+
+        // element solution container to be deflected
+        auto elemSol = elementSolution<FVElementGeometry>(origPriVars);
+
+        // derivatives in the neighbors with respect to the current elements
+        NumEqVector partialDeriv;
+        for (int pvIdx = 0; pvIdx < numEq; ++pvIdx)
+        {
+            // reset derivatives of element dof with respect to itself
+            partialDeriv = 0.0;
+
+            auto evalStorage = [&](Scalar priVar)
+            {
+                // update the volume variables and calculate
+                // the residual with the deflected primary variables
+                elemSol[0][pvIdx] = priVar;
+                curVolVars.update(elemSol, this->asImp_().problem(), element, scv);
+                return this->evalStorage()[0];
+            };
+
+            // for non-ghosts compute the derivative numerically
+            if (!this->elementIsGhost())
+            {
+                static const NumericEpsilon<Scalar, numEq> eps_{this->asImp_().problem().paramGroup()};
+                static const int numDiffMethod = getParamFromGroup<int>(this->asImp_().problem().paramGroup(), "Assembly.NumericDifferenceMethod");
+                NumericDifferentiation::partialDerivative(evalStorage, elemSol[0][pvIdx], partialDeriv, storageResidual,
+                                                          eps_(elemSol[0][pvIdx], pvIdx), numDiffMethod);
+            }
+
+            // for ghost elements we assemble a 1.0 where the primary variable and zero everywhere else
+            // as we always solve for a delta of the solution with respect to the initial solution this
+            // results in a delta of zero for ghosts
+            else partialDeriv[pvIdx] = 1.0;
+
+            // restore the original state of the scv's volume variables
+            curVolVars = origVolVars;
+
+            // restore the current element solution
+            elemSol[0][pvIdx] = origPriVars[pvIdx];
+
+            // add the current partial derivatives to the global jacobian matrix
+            // only diagonal entries for explicit jacobians
+            if constexpr (Problem::enableInternalDirichletConstraints())
+            {
+                // check if own element has internal Dirichlet constraint
+                const auto internalDirichletConstraints = this->asImp_().problem().hasInternalDirichletConstraint(this->element(), scv);
+                const auto dirichletValues = this->asImp_().problem().internalDirichlet(this->element(), scv);
+
+                for (int eqIdx = 0; eqIdx < numEq; ++eqIdx)
+                {
+                    // TODO: we need to set constrained dof flags for these dofs and not modify the residual here
+                    // this function only takes care of the Jacobian
+                    if (internalDirichletConstraints[eqIdx])
+                    {
+                        storageResidual[eqIdx] = origVolVars.priVars()[eqIdx] - dirichletValues[eqIdx];
+                        A[globalI][globalI][eqIdx][pvIdx] = (eqIdx == pvIdx) ? 1.0 : 0.0;
+                    }
+                    else
+                        A[globalI][globalI][eqIdx][pvIdx] += partialDeriv[eqIdx];
+                }
+            }
+            else
+            {
+                for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                    A[globalI][globalI][eqIdx][pvIdx] += partialDeriv[eqIdx];
+            }
+        }
+    }
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/cvfelocalassembler.hh b/dumux/experimental/assembly/cvfelocalassembler.hh
new file mode 100644
index 0000000000..7c58d0115a
--- /dev/null
+++ b/dumux/experimental/assembly/cvfelocalassembler.hh
@@ -0,0 +1,571 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \brief An assembler for Jacobian and residual contribution per element (CVFE methods)
+ */
+#ifndef DUMUX_EXPERIMENTAL_CVFE_LOCAL_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_CVFE_LOCAL_ASSEMBLER_HH
+
+#include <dune/common/exceptions.hh>
+#include <dune/common/hybridutilities.hh>
+#include <dune/common/reservedvector.hh>
+#include <dune/grid/common/gridenums.hh>
+#include <dune/istl/matrixindexset.hh>
+#include <dune/istl/bvector.hh>
+
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numericdifferentiation.hh>
+
+#include <dumux/assembly/numericepsilon.hh>
+#include <dumux/assembly/diffmethod.hh>
+#include <dumux/experimental/assembly/fvlocalassemblerbase.hh>
+#include <dumux/assembly/partialreassembler.hh>
+#include <dumux/assembly/entitycolor.hh>
+
+#include <dumux/discretization/cvfe/elementsolution.hh>
+
+#include <dumux/assembly/volvardeflectionhelper_.hh>
+
+namespace Dumux::Experimental {
+
+#ifndef DOXYGEN
+namespace Detail::CVFE {
+
+struct NoOperator
+{
+    template<class... Args>
+    constexpr void operator()(Args&&...) const {}
+};
+
+template<class X, class Y>
+using Impl = std::conditional_t<!std::is_same_v<X, void>, X, Y>;
+
+} // end namespace Detail
+#endif // DOXYGEN
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \brief A base class for all local CVFE assemblers
+ * \tparam TypeTag The TypeTag
+ * \tparam Assembler The assembler type
+ * \tparam Implementation The actual implementation
+ */
+template<class TypeTag, class Assembler, class Implementation>
+class CVFELocalAssemblerBase : public Experimental::FVLocalAssemblerBase<TypeTag, Assembler, Implementation>
+{
+    using ParentType = Experimental::FVLocalAssemblerBase<TypeTag, Assembler, Implementation>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using ElementVolumeVariables = typename GridVariables::GridVolumeVariables::LocalView;
+    using SolutionVector = typename Assembler::SolutionVector;
+
+    static constexpr int numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+    static constexpr int dim = GetPropType<TypeTag, Properties::GridGeometry>::GridView::dimension;
+
+public:
+
+    using ParentType::ParentType;
+
+    using LocalResidual = typename ParentType::LocalResidual;
+    using ElementResidualVector = typename LocalResidual::ElementResidualVector;
+
+
+    void bindLocalViews()
+    {
+        ParentType::bindLocalViews();
+        this->elemBcTypes().update(this->asImp_().problem(), this->element(), this->fvGeometry());
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix. The element residual is written into the right hand side.
+     */
+    template <class ResidualVector, class StageParams, class PartialReassembler = DefaultPartialReassembler, class CouplingFunction = Detail::CVFE::NoOperator>
+    void assembleJacobianAndResidual(JacobianMatrix& jac, ResidualVector& res, GridVariables& gridVariables,
+                                     const StageParams& stageParams, ResidualVector& temporal, ResidualVector& spatial,
+                                     ResidualVector& constrainedDofs,
+                                     const PartialReassembler* partialReassembler = nullptr,
+                                     const CouplingFunction& maybeAssembleCouplingBlocks = {})
+    {
+        this->asImp_().bindLocalViews();
+        const auto eIdxGlobal = this->asImp_().problem().gridGeometry().elementMapper().index(this->element());
+
+        this->localResidual().spatialWeight(1.0);
+        this->localResidual().temporalWeight(1.0);
+
+        const auto sWeight = stageParams.spatialWeight(stageParams.size()-1);
+        const auto tWeight = stageParams.temporalWeight(stageParams.size()-1);
+
+        const auto flux = this->evalLocalFluxAndSourceResidual(this->curElemVolVars());
+        const auto storage = this->localResidual().evalStorage(this->fvGeometry(), this->curElemVolVars());
+        ElementResidualVector origResidual(flux.size());
+        origResidual = 0.0;
+        for (const auto& scv : scvs(this->fvGeometry()))
+        {
+            spatial[scv.dofIndex()] += flux[scv.localDofIndex()];
+            temporal[scv.dofIndex()] += storage[scv.localDofIndex()];
+            origResidual[scv.localDofIndex()] += flux[scv.localDofIndex()]*sWeight + storage[scv.localDofIndex()]*tWeight;
+            res[scv.dofIndex()] += origResidual[scv.localDofIndex()];
+        }
+
+        this->localResidual().spatialWeight(sWeight);
+        this->localResidual().temporalWeight(tWeight);
+
+        if (partialReassembler && partialReassembler->elementColor(eIdxGlobal) == EntityColor::green)
+        {
+            // assemble the coupling blocks for coupled models (does nothing if not coupled)
+            maybeAssembleCouplingBlocks(origResidual);
+        }
+        else if (!this->elementIsGhost())
+        {
+            this->asImp_().assembleJacobian(jac, gridVariables, origResidual, partialReassembler); // forward to the internal implementation
+
+            // assemble the coupling blocks for coupled models (does nothing if not coupled)
+            maybeAssembleCouplingBlocks(origResidual);
+        }
+        else
+        {
+            // Treatment of ghost elements
+            assert(this->elementIsGhost());
+
+            // handle dofs per codimension
+            const auto& gridGeometry = this->asImp_().problem().gridGeometry();
+            Dune::Hybrid::forEach(std::make_integer_sequence<int, dim>{}, [&](auto d)
+            {
+                constexpr int codim = dim - d;
+                const auto& localCoeffs = gridGeometry.feCache().get(this->element().type()).localCoefficients();
+                for (int idx = 0; idx < localCoeffs.size(); ++idx)
+                {
+                    const auto& localKey = localCoeffs.localKey(idx);
+
+                    // skip if we are not handling this codim right now
+                    if (localKey.codim() != codim)
+                        continue;
+
+                    // do not change the non-ghost entities
+                    auto entity = this->element().template subEntity<codim>(localKey.subEntity());
+                    if (entity.partitionType() == Dune::InteriorEntity || entity.partitionType() == Dune::BorderEntity)
+                        continue;
+
+                    // WARNING: this only works if the mapping from codim+subEntity to
+                    // global dofIndex is unique (on dof per entity of this codim).
+                    // For more general mappings, we should use a proper local-global mapping here.
+                    // For example through dune-functions.
+                    const auto dofIndex = gridGeometry.dofMapper().index(entity);
+
+                    // this might be a vector-valued dof
+                    using BlockType = typename JacobianMatrix::block_type;
+                    BlockType &J = jac[dofIndex][dofIndex];
+                    for (int j = 0; j < BlockType::rows; ++j)
+                        J[j][j] = 1.0;
+
+                    // set residual for the ghost dof
+                    res[dofIndex] = 0;
+                    constrainedDofs[dofIndex] = 1;
+                }
+            });
+        }
+
+        auto applyDirichlet = [&] (const auto& scvI,
+                                   const auto& dirichletValues,
+                                   const auto eqIdx,
+                                   const auto pvIdx)
+        {
+            res[scvI.dofIndex()][eqIdx] = this->curElemVolVars()[scvI].priVars()[pvIdx] - dirichletValues[pvIdx];
+            constrainedDofs[scvI.dofIndex()][eqIdx] = 1;
+
+            auto& row = jac[scvI.dofIndex()];
+            for (auto col = row.begin(); col != row.end(); ++col)
+                row[col.index()][eqIdx] = 0.0;
+
+            jac[scvI.dofIndex()][scvI.dofIndex()][eqIdx][pvIdx] = 1.0;
+
+            // if a periodic dof has Dirichlet values also apply the same Dirichlet values to the other dof
+            if (this->asImp_().problem().gridGeometry().dofOnPeriodicBoundary(scvI.dofIndex()))
+            {
+                const auto periodicDof = this->asImp_().problem().gridGeometry().periodicallyMappedDof(scvI.dofIndex());
+                res[periodicDof][eqIdx] = this->curElemVolVars()[scvI].priVars()[pvIdx] - dirichletValues[pvIdx];
+                constrainedDofs[periodicDof][eqIdx] = 1;
+                const auto end = jac[periodicDof].end();
+                for (auto it = jac[periodicDof].begin(); it != end; ++it)
+                    (*it) = periodicDof != it.index() ? 0.0 : 1.0;
+            }
+        };
+
+        this->asImp_().enforceDirichletConstraints(applyDirichlet);
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     */
+    void assembleJacobian(JacobianMatrix& jac, GridVariables& gridVariables)
+    {
+        this->asImp_().bindLocalViews();
+        this->asImp_().assembleJacobian(jac, gridVariables); // forward to the internal implementation
+
+        auto applyDirichlet = [&] (const auto& scvI,
+                                   const auto& dirichletValues,
+                                   const auto eqIdx,
+                                   const auto pvIdx)
+        {
+            auto& row = jac[scvI.dofIndex()];
+            for (auto col = row.begin(); col != row.end(); ++col)
+                row[col.index()][eqIdx] = 0.0;
+
+            jac[scvI.dofIndex()][scvI.dofIndex()][eqIdx][pvIdx] = 1.0;
+        };
+
+        this->asImp_().enforceDirichletConstraints(applyDirichlet);
+    }
+
+    /*!
+     * \brief Assemble the residual only
+     */
+    template <class ResidualVector>
+    void assembleResidual(ResidualVector& res)
+    {
+        this->asImp_().bindLocalViews();
+        const auto residual = this->evalLocalResidual();
+
+        for (const auto& scv : scvs(this->fvGeometry()))
+            res[scv.dofIndex()] += residual[scv.localDofIndex()];
+
+        auto applyDirichlet = [&] (const auto& scvI,
+                                   const auto& dirichletValues,
+                                   const auto eqIdx,
+                                   const auto pvIdx)
+        {
+            res[scvI.dofIndex()][eqIdx] = this->curElemVolVars()[scvI].priVars()[pvIdx] - dirichletValues[pvIdx];
+        };
+
+        this->asImp_().enforceDirichletConstraints(applyDirichlet);
+    }
+
+    /*!
+     * \brief Assemble the residual only
+     */
+    template<class ResidualVector>
+    void assembleCurrentResidual(ResidualVector& spatialRes, ResidualVector& temporalRes)
+    {
+        this->asImp_().bindLocalViews();
+        const auto flux = this->evalLocalFluxAndSourceResidual(this->curElemVolVars());
+        const auto storage = this->localResidual().evalStorage(this->fvGeometry(), this->curElemVolVars());
+        for (const auto& scv : scvs(this->fvGeometry()))
+        {
+            spatialRes[scv.dofIndex()] += flux[scv.localDofIndex()];
+            temporalRes[scv.dofIndex()] += storage[scv.localDofIndex()];
+        }
+    }
+
+    //! Enforce Dirichlet constraints
+    template<typename ApplyFunction>
+    void enforceDirichletConstraints(const ApplyFunction& applyDirichlet)
+    {
+        // enforce Dirichlet boundary conditions
+        this->asImp_().evalDirichletBoundaries(applyDirichlet);
+        // take care of internal Dirichlet constraints (if enabled)
+        this->asImp_().enforceInternalDirichletConstraints(applyDirichlet);
+    }
+
+    /*!
+     * \brief Evaluates Dirichlet boundaries
+     */
+    template< typename ApplyDirichletFunctionType >
+    void evalDirichletBoundaries(ApplyDirichletFunctionType applyDirichlet)
+    {
+        // enforce Dirichlet boundaries by overwriting partial derivatives with 1 or 0
+        // and set the residual to (privar - dirichletvalue)
+        if (this->elemBcTypes().hasDirichlet())
+        {
+            for (const auto& scvI : scvs(this->fvGeometry()))
+            {
+                const auto bcTypes = this->elemBcTypes().get(this->fvGeometry(), scvI);
+                if (bcTypes.hasDirichlet())
+                {
+                    const auto dirichletValues = this->asImp_().problem().dirichlet(this->element(), scvI);
+
+                    // set the Dirichlet conditions in residual and jacobian
+                    for (int eqIdx = 0; eqIdx < numEq; ++eqIdx)
+                    {
+                        if (bcTypes.isDirichlet(eqIdx))
+                        {
+                            const auto pvIdx = bcTypes.eqToDirichletIndex(eqIdx);
+                            assert(0 <= pvIdx && pvIdx < numEq);
+                            applyDirichlet(scvI, dirichletValues, eqIdx, pvIdx);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /*!
+     * \brief Update the coupling context for coupled models.
+     * \note This does nothing per default (not a coupled model).
+     */
+    template<class... Args>
+    void maybeUpdateCouplingContext(Args&&...) {}
+
+    /*!
+     * \brief Update the additional domain derivatives for coupled models.
+     * \note This does nothing per default (not a coupled model).
+     */
+    template<class... Args>
+    void maybeEvalAdditionalDomainDerivatives(Args&&...) {}
+};
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \brief An assembler for Jacobian and residual contribution per element (CVFE methods)
+ * \tparam TypeTag The TypeTag
+ * \tparam diffMethod The differentiation method to residual compute derivatives
+ * \tparam Implementation via CRTP
+ */
+template<class TypeTag, class Assembler, DiffMethod diffMethod = DiffMethod::numeric, class Implementation = void>
+class CVFELocalAssembler;
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \brief Control volume finite element local assembler using numeric differentiation
+ */
+template<class TypeTag, class Assembler, class Implementation>
+class CVFELocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>
+: public CVFELocalAssemblerBase<TypeTag, Assembler,
+                                Detail::CVFE::Impl<Implementation, CVFELocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>>>
+{
+    using ThisType = CVFELocalAssembler<TypeTag, Assembler, DiffMethod::numeric, Implementation>;
+    using ParentType = CVFELocalAssemblerBase<TypeTag, Assembler, Detail::CVFE::Impl<Implementation, ThisType>>;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using VolumeVariables = GetPropType<TypeTag, Properties::VolumeVariables>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+
+    static constexpr int numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+    static constexpr int dim = GetPropType<TypeTag, Properties::GridGeometry>::GridView::dimension;
+
+    static constexpr bool enableGridFluxVarsCache
+        = GridVariables::GridFluxVariablesCache::cachingEnabled;
+    static constexpr bool solutionDependentFluxVarsCache
+        = GridVariables::GridFluxVariablesCache::FluxVariablesCache::isSolDependent;
+
+public:
+
+    using LocalResidual = typename ParentType::LocalResidual;
+    using ElementResidualVector = typename LocalResidual::ElementResidualVector;
+    using ParentType::ParentType;
+
+    template <class PartialReassembler = DefaultPartialReassembler>
+    void assembleJacobian(JacobianMatrix& A, GridVariables& gridVariables,
+                          const ElementResidualVector& origResiduals,
+                          const PartialReassembler* partialReassembler = nullptr)
+    {
+        if (this->isImplicit())
+            assembleJacobianImplicit_(A, gridVariables, origResiduals, partialReassembler);
+        else
+            assembleJacobianExplicit_(A, gridVariables, origResiduals, partialReassembler);
+    }
+
+private:
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     *
+     * \return The element residual at the current solution.
+     */
+    template <class PartialReassembler = DefaultPartialReassembler>
+    void assembleJacobianImplicit_(JacobianMatrix& A, GridVariables& gridVariables,
+                                   const ElementResidualVector& origResiduals,
+                                   const PartialReassembler* partialReassembler = nullptr)
+    {
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        const auto& curSol = this->asImp_().curSol();
+
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+        //                                                                                              //
+        // Calculate derivatives of all dofs in stencil with respect to the dofs in the element. In the //
+        // neighboring elements we do so by computing the derivatives of the fluxes which depend on the //
+        // actual element. In the actual element we evaluate the derivative of the entire residual.     //
+        //                                                                                              //
+        //////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // if all volvars in the stencil have to be updated or if it's enough to only update the
+        // volVars for the scv whose associated dof has been deflected
+        static const bool updateAllVolVars = getParamFromGroup<bool>(
+            this->asImp_().problem().paramGroup(), "Assembly.BoxVolVarsDependOnAllElementDofs", false
+        );
+
+        // create the element solution
+        auto elemSol = elementSolution(element, curSol, fvGeometry.gridGeometry());
+
+        // create the vector storing the partial derivatives
+        ElementResidualVector partialDerivs(fvGeometry.numScv());
+
+        Dumux::Detail::VolVarsDeflectionHelper deflectionHelper(
+            [&] (const auto& scv) -> VolumeVariables& {
+                return this->getVolVarAccess(gridVariables.curGridVolVars(), curElemVolVars, scv);
+            },
+            fvGeometry,
+            updateAllVolVars
+        );
+
+        // calculation of the derivatives
+        for (const auto& scv : scvs(fvGeometry))
+        {
+            // dof index and corresponding actual pri vars
+            const auto dofIdx = scv.dofIndex();
+            deflectionHelper.setCurrent(scv);
+
+            // calculate derivatives w.r.t to the privars at the dof at hand
+            for (int pvIdx = 0; pvIdx < numEq; pvIdx++)
+            {
+                partialDerivs = 0.0;
+
+                auto evalResiduals = [&](Scalar priVar)
+                {
+                    // update the volume variables and compute element residual
+                    elemSol[scv.localDofIndex()][pvIdx] = priVar;
+                    deflectionHelper.deflect(elemSol, scv, this->asImp_().problem());
+                    if constexpr (solutionDependentFluxVarsCache)
+                    {
+                        elemFluxVarsCache.update(element, fvGeometry, curElemVolVars);
+                        if constexpr (enableGridFluxVarsCache)
+                            gridVariables.gridFluxVarsCache().updateElement(element, fvGeometry, curElemVolVars);
+                    }
+                    this->asImp_().maybeUpdateCouplingContext(scv, elemSol, pvIdx);
+                    return this->evalLocalResidual();
+                };
+
+                // derive the residuals numerically
+                static const NumericEpsilon<Scalar, numEq> eps_{this->asImp_().problem().paramGroup()};
+                static const int numDiffMethod = getParamFromGroup<int>(this->asImp_().problem().paramGroup(), "Assembly.NumericDifferenceMethod");
+                NumericDifferentiation::partialDerivative(evalResiduals, elemSol[scv.localDofIndex()][pvIdx], partialDerivs, origResiduals,
+                                                          eps_(elemSol[scv.localDofIndex()][pvIdx], pvIdx), numDiffMethod);
+
+                // update the global stiffness matrix with the current partial derivatives
+                for (const auto& scvJ : scvs(fvGeometry))
+                {
+                    // don't add derivatives for green dofs
+                    if (!partialReassembler
+                        || partialReassembler->dofColor(scvJ.dofIndex()) != EntityColor::green)
+                    {
+                        for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                        {
+                            // A[i][col][eqIdx][pvIdx] is the rate of change of
+                            // the residual of equation 'eqIdx' at dof 'i'
+                            // depending on the primary variable 'pvIdx' at dof
+                            // 'col'.
+                            A[scvJ.dofIndex()][dofIdx][eqIdx][pvIdx] += partialDerivs[scvJ.localDofIndex()][eqIdx];
+                        }
+                    }
+                }
+
+                // restore the original state of the scv's volume variables
+                deflectionHelper.restore(scv);
+
+                // restore the original element solution
+                elemSol[scv.localDofIndex()][pvIdx] = curSol[scv.dofIndex()][pvIdx];
+                this->asImp_().maybeUpdateCouplingContext(scv, elemSol, pvIdx);
+            }
+        }
+
+        // restore original state of the flux vars cache in case of global caching.
+        // In the case of local caching this is obsolete because the elemFluxVarsCache used here goes out of scope after this.
+        if constexpr (enableGridFluxVarsCache)
+            gridVariables.gridFluxVarsCache().updateElement(element, fvGeometry, curElemVolVars);
+
+        // evaluate additional derivatives that might arise from the coupling (no-op if not coupled)
+        this->asImp_().maybeEvalAdditionalDomainDerivatives(origResiduals, A, gridVariables);
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     * \return The element residual at the current solution.
+     */
+    template <class PartialReassembler = DefaultPartialReassembler>
+    void assembleJacobianExplicit_(JacobianMatrix& A, GridVariables& gridVariables,
+                                   const ElementResidualVector& origResiduals,
+                                   const PartialReassembler* partialReassembler = nullptr)
+    {
+        if (partialReassembler)
+            DUNE_THROW(Dune::NotImplemented, "partial reassembly for explicit time discretization");
+
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        const auto& curSol = this->asImp_().curSol();
+        auto&& curElemVolVars = this->curElemVolVars();
+
+        // create the element solution
+        auto elemSol = elementSolution(element, curSol, fvGeometry.gridGeometry());
+
+        // create the vector storing the partial derivatives
+        ElementResidualVector partialDerivs(fvGeometry.numScv());
+
+        // calculation of the derivatives
+        for (const auto& scv : scvs(fvGeometry))
+        {
+            // dof index and corresponding actual pri vars
+            const auto dofIdx = scv.dofIndex();
+            auto& curVolVars = this->getVolVarAccess(gridVariables.curGridVolVars(), curElemVolVars, scv);
+            const VolumeVariables origVolVars(curVolVars);
+
+            // calculate derivatives w.r.t to the privars at the dof at hand
+            for (int pvIdx = 0; pvIdx < numEq; pvIdx++)
+            {
+                partialDerivs = 0.0;
+
+                auto evalStorage = [&](Scalar priVar)
+                {
+                    elemSol[scv.localDofIndex()][pvIdx] = priVar;
+                    curVolVars.update(elemSol, this->asImp_().problem(), element, scv);
+                    return this->evalStorage();
+                };
+
+                // derive the residuals numerically
+                static const NumericEpsilon<Scalar, numEq> eps_{this->asImp_().problem().paramGroup()};
+                static const int numDiffMethod = getParamFromGroup<int>(this->asImp_().problem().paramGroup(), "Assembly.NumericDifferenceMethod");
+                NumericDifferentiation::partialDerivative(evalStorage, elemSol[scv.localDofIndex()][pvIdx], partialDerivs, origResiduals,
+                                                          eps_(elemSol[scv.localDofIndex()][pvIdx], pvIdx), numDiffMethod);
+
+                // update the global stiffness matrix with the current partial derivatives
+                for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                {
+                    // A[i][col][eqIdx][pvIdx] is the rate of change of
+                    // the residual of equation 'eqIdx' at dof 'i'
+                    // depending on the primary variable 'pvIdx' at dof
+                    // 'col'.
+                    A[dofIdx][dofIdx][eqIdx][pvIdx] += partialDerivs[scv.localDofIndex()][eqIdx];
+                }
+
+                // restore the original state of the scv's volume variables
+                curVolVars = origVolVars;
+
+                // restore the original element solution
+                elemSol[scv.localDofIndex()][pvIdx] = curSol[scv.dofIndex()][pvIdx];
+            }
+        }
+    }
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/fvlocalassemblerbase.hh b/dumux/experimental/assembly/fvlocalassemblerbase.hh
new file mode 100644
index 0000000000..18f59cdf5e
--- /dev/null
+++ b/dumux/experimental/assembly/fvlocalassemblerbase.hh
@@ -0,0 +1,304 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \copydoc Dumux::FVLocalAssemblerBase
+ */
+#ifndef DUMUX_EXPERIMENTAL_FV_LOCAL_ASSEMBLER_BASE_HH
+#define DUMUX_EXPERIMENTAL_FV_LOCAL_ASSEMBLER_BASE_HH
+
+#include <dune/common/reservedvector.hh>
+#include <dune/grid/common/gridenums.hh> // for GhostEntity
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/reservedblockvector.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/discretization/extrusion.hh>
+#include <dumux/assembly/diffmethod.hh>
+
+namespace Dumux::Experimental {
+
+/*!
+ * \ingroup Assembly
+ * \brief A base class for all local assemblers
+ * \tparam TypeTag The TypeTag
+ * \tparam Assembler The assembler type
+ * \tparam Implementation The assembler implementation
+ */
+template<class TypeTag, class Assembler, class Implementation>
+class FVLocalAssemblerBase
+{
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using GridView = typename GetPropType<TypeTag, Properties::GridGeometry>::GridView;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using SolutionVector = typename Assembler::SolutionVector;
+    using ElementBoundaryTypes = GetPropType<TypeTag, Properties::ElementBoundaryTypes>;
+    using FVElementGeometry = typename GetPropType<TypeTag, Properties::GridGeometry>::LocalView;
+    using SubControlVolume = typename FVElementGeometry::SubControlVolume;
+    using SubControlVolumeFace = typename FVElementGeometry::SubControlVolumeFace;
+    using GridVolumeVariables = GetPropType<TypeTag, Properties::GridVolumeVariables>;
+    using ElementVolumeVariables = typename GetPropType<TypeTag, Properties::GridVolumeVariables>::LocalView;
+    using VolumeVariables = GetPropType<TypeTag, Properties::VolumeVariables>;
+    using ElementFluxVariablesCache = typename GetPropType<TypeTag, Properties::GridFluxVariablesCache>::LocalView;
+    using Element = typename GridView::template Codim<0>::Entity;
+    static constexpr auto numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+
+public:
+    using LocalResidual = std::decay_t<decltype(std::declval<Assembler>().localResidual())>;
+    using ElementResidualVector = typename LocalResidual::ElementResidualVector;
+
+    /*!
+     * \brief The constructor. Delegates to the general constructor.
+     */
+    explicit FVLocalAssemblerBase(const Assembler& assembler,
+                                  const Element& element,
+                                  const SolutionVector& curSol)
+    : FVLocalAssemblerBase(assembler,
+                           element,
+                           curSol,
+                           localView(assembler.gridGeometry()),
+                           localView(assembler.gridVariables().curGridVolVars()),
+                           localView(assembler.gridVariables().gridFluxVarsCache()),
+                           assembler.localResidual(),
+                           element.partitionType() == Dune::GhostEntity,
+                           assembler.isImplicit())
+    {}
+
+    /*!
+     * \brief The constructor. General version explicitly expecting each argument.
+     */
+    explicit FVLocalAssemblerBase(const Assembler& assembler,
+                                  const Element& element,
+                                  const SolutionVector& curSol,
+                                  const FVElementGeometry& fvGeometry,
+                                  const ElementVolumeVariables& curElemVolVars,
+                                  const ElementFluxVariablesCache& elemFluxVarsCache,
+                                  const LocalResidual& localResidual,
+                                  const bool elementIsGhost,
+                                  const bool isImplicit)
+    : assembler_(assembler)
+    , element_(element)
+    , curSol_(curSol)
+    , fvGeometry_(fvGeometry)
+    , curElemVolVars_(curElemVolVars)
+    , elemFluxVarsCache_(elemFluxVarsCache)
+    , localOperator_(localResidual)
+    , elementIsGhost_(elementIsGhost)
+    , isImplicit_(isImplicit)
+    {}
+
+    /*!
+     * \brief Convenience function to evaluate the complete local residual for the current element. Automatically chooses the the appropriate
+     *        element volume variables.
+     */
+    ElementResidualVector evalLocalResidual() const
+    {
+        if (elementIsGhost())
+            return ElementResidualVector(0.0);
+
+        return evalLocalResidual(curElemVolVars());
+    }
+
+    /*!
+     * \brief Evaluates the complete local residual for the current element.
+     * \param elemVolVars The element volume variables
+     */
+    ElementResidualVector evalLocalResidual(const ElementVolumeVariables& elemVolVars) const
+    {
+        if (!assembler().isStationaryProblem())
+        {
+            ElementResidualVector residual = evalLocalFluxAndSourceResidual(elemVolVars);
+            residual += evalStorage();
+            return residual;
+        }
+        else
+            return evalLocalFluxAndSourceResidual(elemVolVars);
+    }
+
+    /*!
+     * \brief Convenience function to evaluate the flux and source terms (i.e, the terms without a time derivative)
+     *        of the local residual for the current element. Automatically chooses the the appropriate
+     *        element volume variables.
+     */
+    ElementResidualVector evalLocalFluxAndSourceResidual() const
+    {
+        return evalLocalFluxAndSourceResidual(curElemVolVars());
+    }
+
+    /*!
+     * \brief Evaluates the flux and source terms (i.e, the terms without a time derivative)
+     *        of the local residual for the current element.
+     *
+     * \param elemVolVars The element volume variables
+     */
+    ElementResidualVector evalLocalFluxAndSourceResidual(const ElementVolumeVariables& elemVolVars) const
+    {
+        return localOperator_.evalFluxAndSource(element_, fvGeometry_, elemVolVars, elemFluxVarsCache_, elemBcTypes_);
+    }
+
+    /*!
+     * \brief Convenience function to evaluate storage term (i.e, the term with a time derivative)
+     *        of the local residual for the current element. Automatically chooses the the appropriate
+     *        element volume variables.
+     */
+    ElementResidualVector evalStorage() const
+    {
+        return localOperator_.evalStorage(fvGeometry_, curElemVolVars_);
+    }
+
+    /*!
+     * \brief Convenience function bind and prepare all relevant variables required for the
+     *        evaluation of the local residual.
+     */
+    void bindLocalViews()
+    {
+        // get some references for convenience
+        const auto& element = this->element();
+        const auto& curSol = this->curSol();
+        auto&& fvGeometry = this->fvGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // bind the caches
+        fvGeometry.bind(element);
+        if (std::abs(this->localResidual().spatialWeight()) < 1e-6)
+            curElemVolVars.bindElement(element, fvGeometry, curSol);
+        else
+        {
+            curElemVolVars.bind(element, fvGeometry, curSol);
+            elemFluxVarsCache.bind(element, fvGeometry, curElemVolVars);
+        }
+    }
+
+    /*!
+     * \brief Enforces Dirichlet constraints if enabled in the problem
+     */
+    template<typename ApplyFunction, class P = Problem, typename std::enable_if_t<P::enableInternalDirichletConstraints(), int> = 0>
+    void enforceInternalDirichletConstraints(const ApplyFunction& applyDirichlet)
+    {
+        // enforce Dirichlet constraints strongly by overwriting partial derivatives with 1 or 0
+        // and set the residual to (privar - dirichletvalue)
+        for (const auto& scvI : scvs(this->fvGeometry()))
+        {
+            const auto internalDirichletConstraints = asImp_().problem().hasInternalDirichletConstraint(this->element(), scvI);
+            if (internalDirichletConstraints.any())
+            {
+                const auto dirichletValues = asImp_().problem().internalDirichlet(this->element(), scvI);
+                // set the Dirichlet conditions in residual and jacobian
+                for (int eqIdx = 0; eqIdx < internalDirichletConstraints.size(); ++eqIdx)
+                    if (internalDirichletConstraints[eqIdx])
+                        applyDirichlet(scvI, dirichletValues, eqIdx, eqIdx);
+            }
+        }
+    }
+
+    template<typename ApplyFunction, class P = Problem, typename std::enable_if_t<!P::enableInternalDirichletConstraints(), int> = 0>
+    void enforceInternalDirichletConstraints(const ApplyFunction& applyDirichlet)
+    {}
+
+    //! The problem
+    const Problem& problem() const
+    { return assembler_.problem(); }
+
+    //! The assembler
+    const Assembler& assembler() const
+    { return assembler_; }
+
+    //! The current element
+    const Element& element() const
+    { return element_; }
+
+    //! Returns if element is a ghost entity
+    bool elementIsGhost() const
+    { return elementIsGhost_; }
+
+    //! The current solution
+    const SolutionVector& curSol() const
+    { return curSol_; }
+
+    //! The global finite volume geometry
+    FVElementGeometry& fvGeometry()
+    { return fvGeometry_; }
+
+    //! The current element volume variables
+    ElementVolumeVariables& curElemVolVars()
+    { return curElemVolVars_; }
+
+    //! The element flux variables cache
+    ElementFluxVariablesCache& elemFluxVarsCache()
+    { return elemFluxVarsCache_; }
+
+    //! The local residual for the current element
+    LocalResidual& localResidual()
+    { return localOperator_; }
+
+    //! The element's boundary types
+    ElementBoundaryTypes& elemBcTypes()
+    { return elemBcTypes_; }
+
+    //! The finite volume geometry
+    const FVElementGeometry& fvGeometry() const
+    { return fvGeometry_; }
+
+    //! The current element volume variables
+    const ElementVolumeVariables& curElemVolVars() const
+    { return curElemVolVars_; }
+
+    //! The element flux variables cache
+    const ElementFluxVariablesCache& elemFluxVarsCache() const
+    { return elemFluxVarsCache_; }
+
+    //! The element's boundary types
+    const ElementBoundaryTypes& elemBcTypes() const
+    { return elemBcTypes_; }
+
+    //! The local residual for the current element
+    const LocalResidual& localResidual() const
+    { return localOperator_; }
+
+    //! If the time stepping scheme is implicit
+    bool isImplicit() const
+    { return isImplicit_; }
+
+protected:
+    Implementation &asImp_()
+    { return *static_cast<Implementation*>(this); }
+
+    const Implementation &asImp_() const
+    { return *static_cast<const Implementation*>(this); }
+
+    template<class T = TypeTag, typename std::enable_if_t<!GetPropType<T, Properties::GridVariables>::GridVolumeVariables::cachingEnabled, int> = 0>
+    VolumeVariables& getVolVarAccess(GridVolumeVariables& gridVolVars, ElementVolumeVariables& elemVolVars, const SubControlVolume& scv)
+    { return elemVolVars[scv]; }
+
+    template<class T = TypeTag, typename std::enable_if_t<GetPropType<T, Properties::GridVariables>::GridVolumeVariables::cachingEnabled, int> = 0>
+    VolumeVariables& getVolVarAccess(GridVolumeVariables& gridVolVars, ElementVolumeVariables& elemVolVars, const SubControlVolume& scv)
+    { return gridVolVars.volVars(scv); }
+
+private:
+
+    const Assembler& assembler_; //!< access pointer to assembler instance
+    const Element& element_; //!< the element whose residual is assembled
+    const SolutionVector& curSol_; //!< the current solution
+
+    FVElementGeometry fvGeometry_;
+    ElementVolumeVariables curElemVolVars_;
+    ElementFluxVariablesCache elemFluxVarsCache_;
+    ElementBoundaryTypes elemBcTypes_;
+
+    LocalResidual localOperator_; //!< the local residual evaluating the equations per element
+    bool elementIsGhost_; //!< whether the element's partitionType is ghost
+    bool isImplicit_; //!< whether the time stepping scheme is implicit
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/multistagefvassembler.hh b/dumux/experimental/assembly/multistagefvassembler.hh
new file mode 100644
index 0000000000..252c670ebf
--- /dev/null
+++ b/dumux/experimental/assembly/multistagefvassembler.hh
@@ -0,0 +1,469 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \brief A linear system assembler (residual and Jacobian) for finite volume schemes
+ */
+#ifndef DUMUX_EXPERIMENTAL_MULTISTAGE_FV_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_MULTISTAGE_FV_ASSEMBLER_HH
+
+#include <vector>
+#include <deque>
+#include <type_traits>
+#include <memory>
+
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/properties.hh>
+#include <dumux/common/gridcapabilities.hh>
+#include <dumux/common/typetraits/vector.hh>
+
+#include <dumux/discretization/method.hh>
+#include <dumux/linear/parallelhelpers.hh>
+#include <dumux/linear/dunevectors.hh>
+
+#include <dumux/assembly/coloring.hh>
+#include <dumux/assembly/jacobianpattern.hh>
+#include <dumux/assembly/diffmethod.hh>
+
+#include <dumux/parallel/multithreading.hh>
+#include <dumux/parallel/parallel_for.hh>
+
+#include <dumux/experimental/assembly/cvfelocalassembler.hh>
+#include <dumux/experimental/assembly/cclocalassembler.hh>
+#include <dumux/experimental/assembly/multistagefvlocaloperator.hh>
+
+#include <dumux/experimental/timestepping/multistagemethods.hh>
+#include <dumux/experimental/timestepping/multistagetimestepper.hh>
+
+namespace Dumux::Experimental::Detail {
+
+template<class DiscretizationMethod>
+struct LocalAssemblerChooser;
+
+template<class DM>
+struct LocalAssemblerChooser<DiscretizationMethods::CVFE<DM>>
+{
+    template<class TypeTag, class Impl, DiffMethod diffMethod>
+    using type = Experimental::CVFELocalAssembler<TypeTag, Impl, diffMethod>;
+};
+
+template<>
+struct LocalAssemblerChooser<DiscretizationMethods::CCMpfa>
+{
+    template<class TypeTag, class Impl, DiffMethod diffMethod>
+    using type = Experimental::CCLocalAssembler<TypeTag, Impl, diffMethod>;
+};
+
+template<>
+struct LocalAssemblerChooser<DiscretizationMethods::CCTpfa>
+{
+    template<class TypeTag, class Impl, DiffMethod diffMethod>
+    using type = Experimental::CCLocalAssembler<TypeTag, Impl, diffMethod>;
+};
+
+template<class TypeTag, class Impl, DiffMethod diffMethod>
+using LocalAssemblerChooser_t = typename LocalAssemblerChooser<
+    typename GetPropType<TypeTag, Properties::GridGeometry>::DiscretizationMethod
+>::template type<TypeTag, Impl, diffMethod>;
+
+} // end namespace Dumux::Detail
+
+namespace Dumux::Experimental {
+
+/*!
+ * \ingroup Assembly
+ * \brief A linear system assembler (residual and Jacobian) for finite volume schemes (box, tpfa, mpfa, ...)
+ * \tparam TypeTag The TypeTag
+ * \tparam diffMethod The differentiation method to residual compute derivatives
+ */
+template<class TypeTag, DiffMethod diffMethod>
+class MultiStageFVAssembler
+{
+    using GridGeo = GetPropType<TypeTag, Properties::GridGeometry>;
+    using GridView = typename GridGeo::GridView;
+    using LocalResidual = GetPropType<TypeTag, Properties::LocalResidual>;
+    using Element = typename GridView::template Codim<0>::Entity;
+    using ElementSeed = typename GridView::Grid::template Codim<0>::EntitySeed;
+
+    static constexpr bool isBox = GridGeo::discMethod == DiscretizationMethods::box;
+
+    using ThisType = MultiStageFVAssembler<TypeTag, diffMethod>;
+    using LocalAssembler = typename Detail::LocalAssemblerChooser_t<TypeTag, ThisType, diffMethod>;
+
+public:
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using StageParams = Experimental::MultiStageParams<Scalar>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using SolutionVector = GetPropType<TypeTag, Properties::SolutionVector>;
+    using ResidualType = typename Dumux::Detail::NativeDuneVectorType<SolutionVector>::type;
+
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+
+    using GridGeometry = GridGeo;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+
+    /*!
+     * \brief The constructor for instationary problems
+     * \note the grid variables might be temporarily changed during assembly (if caching is enabled)
+     *       it is however guaranteed that the state after assembly will be the same as before
+     */
+    MultiStageFVAssembler(std::shared_ptr<const Problem> problem,
+                          std::shared_ptr<const GridGeometry> gridGeometry,
+                          std::shared_ptr<GridVariables> gridVariables,
+                          std::shared_ptr<const Experimental::MultiStageMethod<Scalar>> msMethod,
+                          const SolutionVector& prevSol)
+    : timeSteppingMethod_(msMethod)
+    , problem_(problem)
+    , gridGeometry_(gridGeometry)
+    , gridVariables_(gridVariables)
+    , prevSol_(&prevSol)
+    {
+        enableMultithreading_ = SupportsColoring<typename GridGeometry::DiscretizationMethod>::value
+            && Grid::Capabilities::supportsMultithreading(gridGeometry_->gridView())
+            && !Multithreading::isSerial()
+            && getParam<bool>("Assembly.Multithreading", true);
+
+        maybeComputeColors_();
+    }
+
+    /*!
+     * \brief Assembles the global Jacobian of the residual
+     *        and the residual for the current solution.
+     */
+    template<class PartialReassembler = DefaultPartialReassembler>
+    void assembleJacobianAndResidual(const SolutionVector& curSol, const PartialReassembler* partialReassembler = nullptr)
+    {
+        resetJacobian_(partialReassembler);
+
+        resetResidual_();
+        spatialOperatorEvaluations_.back() = 0.0;
+        temporalOperatorEvaluations_.back() = 0.0;
+
+        if (stageParams_->size() != spatialOperatorEvaluations_.size())
+            DUNE_THROW(Dune::InvalidStateException, "Wrong number of residuals");
+
+        assemble_([&](const Element& element)
+        {
+            LocalAssembler localAssembler(*this, element, curSol);
+            localAssembler.assembleJacobianAndResidual(
+                *jacobian_, *residual_, *gridVariables_,
+                *stageParams_,
+                temporalOperatorEvaluations_.back(),
+                spatialOperatorEvaluations_.back(),
+                constrainedDofs_,
+                partialReassembler
+            );
+        });
+
+        // assemble the full residual for the time integration stage
+        auto constantResidualComponent = (*residual_);
+        constantResidualComponent = 0.0;
+        for (std::size_t k = 0; k < stageParams_->size()-1; ++k)
+        {
+            if (!stageParams_->skipTemporal(k))
+                constantResidualComponent.axpy(stageParams_->temporalWeight(k), temporalOperatorEvaluations_[k]);
+            if (!stageParams_->skipSpatial(k))
+                constantResidualComponent.axpy(stageParams_->spatialWeight(k), spatialOperatorEvaluations_[k]);
+        }
+
+        // masked summation of constant residual component onto this stage's resiudal component
+        for (std::size_t i = 0; i < constantResidualComponent.size(); ++i)
+            for (std::size_t ii = 0; ii < constantResidualComponent[i].size(); ++ii)
+                (*residual_)[i][ii] += constrainedDofs_[i][ii] > 0.5 ? 0.0 : constantResidualComponent[i][ii];
+    }
+
+    //! compute the residuals using the internal residual
+    void assembleResidual(const SolutionVector& curSol)
+    { DUNE_THROW(Dune::NotImplemented, "residual"); }
+
+    /*!
+     * \brief The version without arguments uses the default constructor to create
+     *        the jacobian and residual objects in this assembler if you don't need them outside this class
+     */
+    void setLinearSystem()
+    {
+        jacobian_ = std::make_shared<JacobianMatrix>();
+        jacobian_->setBuildMode(JacobianMatrix::random);
+        residual_ = std::make_shared<ResidualType>();
+
+        setResidualSize_(*residual_);
+        setJacobianPattern_();
+    }
+
+    /*!
+     * \brief Resizes jacobian and residual and recomputes colors
+     */
+    void updateAfterGridAdaption()
+    {
+        setResidualSize_(*residual_);
+        setJacobianPattern_();
+        maybeComputeColors_();
+    }
+
+    //! Returns the number of degrees of freedom
+    std::size_t numDofs() const
+    { return gridGeometry_->numDofs(); }
+
+    //! The problem
+    const Problem& problem() const
+    { return *problem_; }
+
+    //! The global finite volume geometry
+    const GridGeometry& gridGeometry() const
+    { return *gridGeometry_; }
+
+    //! The gridview
+    const GridView& gridView() const
+    { return gridGeometry().gridView(); }
+
+    //! The global grid variables
+    GridVariables& gridVariables()
+    { return *gridVariables_; }
+
+    //! The global grid variables
+    const GridVariables& gridVariables() const
+    { return *gridVariables_; }
+
+    //! The jacobian matrix
+    JacobianMatrix& jacobian()
+    { return *jacobian_; }
+
+    //! The residual vector (rhs)
+    ResidualType& residual()
+    { return *residual_; }
+
+    //! The solution of the previous time step
+    const SolutionVector& prevSol() const
+    { return *prevSol_; }
+
+    /*!
+     * \brief Create a local residual object (used by the local assembler)
+     */
+    MultiStageFVLocalOperator<LocalResidual> localResidual() const
+    { return { LocalResidual{problem_.get(), nullptr} }; }
+
+    /*!
+     * \brief Update the grid variables
+     */
+    void updateGridVariables(const SolutionVector &cursol)
+    { gridVariables().update(cursol); }
+
+    /*!
+     * \brief Reset the gridVariables
+     */
+    void resetTimeStep(const SolutionVector &cursol)
+    { gridVariables().resetTimeStep(cursol); }
+
+    void clearStages()
+    {
+        spatialOperatorEvaluations_.clear();
+        temporalOperatorEvaluations_.clear();
+        stageParams_.reset();
+    }
+
+    template<class StageParams>
+    void prepareStage(SolutionVector& x, StageParams params)
+    {
+        stageParams_ = std::move(params);
+        const auto curStage = stageParams_->size() - 1;
+
+        // in the first stage, also assemble the old residual
+        if (curStage == 1)
+        {
+            // update time in variables?
+            setProblemTime_(*problem_, stageParams_->timeAtStage(curStage));
+
+            resetResidual_(); // residual resized and zero
+            spatialOperatorEvaluations_.push_back(*residual_);
+            temporalOperatorEvaluations_.push_back(*residual_);
+
+            // assemble stage 0 residuals
+            assemble_([&](const auto& element)
+            {
+                LocalAssembler localAssembler(*this, element, *prevSol_);
+                localAssembler.localResidual().spatialWeight(1.0);
+                localAssembler.localResidual().temporalWeight(1.0);
+                localAssembler.assembleCurrentResidual(spatialOperatorEvaluations_.back(), temporalOperatorEvaluations_.back());
+            });
+        }
+
+        // update time in variables?
+        setProblemTime_(*problem_, stageParams_->timeAtStage(curStage));
+
+        resetResidual_(); // residual resized and zero
+        spatialOperatorEvaluations_.push_back(*residual_);
+        temporalOperatorEvaluations_.push_back(*residual_);
+    }
+
+    //! TODO get rid of this (called by Newton but shouldn't be necessary)
+    bool isStationaryProblem() const
+    { return false; }
+
+    bool isImplicit() const
+    { return timeSteppingMethod_->implicit(); }
+
+private:
+    /*!
+     * \brief Resizes the jacobian and sets the jacobian' sparsity pattern.
+     */
+    void setJacobianPattern_()
+    {
+        // resize the jacobian and the residual
+        const auto numDofs = this->numDofs();
+        jacobian_->setSize(numDofs, numDofs);
+
+        // create occupation pattern of the jacobian
+        if (timeSteppingMethod_->implicit())
+            getJacobianPattern<true>(gridGeometry()).exportIdx(*jacobian_);
+        else
+            getJacobianPattern<false>(gridGeometry()).exportIdx(*jacobian_);
+    }
+
+    //! Resizes the residual
+    void setResidualSize_(ResidualType& res)
+    { res.resize(numDofs()); }
+
+    //! Computes the colors
+    void maybeComputeColors_()
+    {
+        if (enableMultithreading_)
+            elementSets_ = computeColoring(gridGeometry()).sets;
+    }
+
+    // reset the residual vector to 0.0
+    void resetResidual_()
+    {
+        if(!residual_)
+        {
+            residual_ = std::make_shared<ResidualType>();
+            setResidualSize_(*residual_);
+        }
+
+        setResidualSize_(constrainedDofs_);
+
+        (*residual_) = 0.0;
+        constrainedDofs_ = 0.0;
+    }
+
+    // reset the Jacobian matrix to 0.0
+    template <class PartialReassembler = DefaultPartialReassembler>
+    void resetJacobian_(const PartialReassembler *partialReassembler = nullptr)
+    {
+        if(!jacobian_)
+        {
+            jacobian_ = std::make_shared<JacobianMatrix>();
+            jacobian_->setBuildMode(JacobianMatrix::random);
+            setJacobianPattern_();
+        }
+
+        if (partialReassembler)
+            partialReassembler->resetJacobian(*this);
+        else
+            *jacobian_ = 0.0;
+    }
+
+    /*!
+     * \brief A method assembling something per element
+     * \note Handles exceptions for parallel runs
+     * \throws NumericalProblem on all processes if an exception is thrown during assembly
+     */
+    template<typename AssembleElementFunc>
+    void assemble_(AssembleElementFunc&& assembleElement) const
+    {
+        // a state that will be checked on all processes
+        bool succeeded = false;
+
+        // try assembling using the local assembly function
+        try
+        {
+            if (enableMultithreading_)
+            {
+                assert(elementSets_.size() > 0);
+
+                // make this element loop run in parallel
+                // for this we have to color the elements so that we don't get
+                // race conditions when writing into the global matrix
+                // each color can be assembled using multiple threads
+                for (const auto& elements : elementSets_)
+                {
+                    Dumux::parallelFor(elements.size(), [&](const std::size_t i)
+                    {
+                        const auto element = gridView().grid().entity(elements[i]);
+                        assembleElement(element);
+                    });
+                }
+            }
+            else
+                for (const auto& element : elements(gridView()))
+                    assembleElement(element);
+
+            // if we get here, everything worked well on this process
+            succeeded = true;
+        }
+        // throw exception if a problem occurred
+        catch (NumericalProblem &e)
+        {
+            std::cout << "rank " << gridView().comm().rank()
+                      << " caught an exception while assembling:" << e.what()
+                      << "\n";
+            succeeded = false;
+        }
+
+        // make sure everything worked well on all processes
+        if (gridView().comm().size() > 1)
+            succeeded = gridView().comm().min(succeeded);
+
+        // if not succeeded rethrow the error on all processes
+        if (!succeeded)
+            DUNE_THROW(NumericalProblem, "A process did not succeed in linearizing the system");
+    }
+
+    // TODO make this nicer with a is_detected trait in a common location
+    template<class P>
+    void setProblemTime_(const P& p, const Scalar t)
+    { setProblemTimeImpl_(p, t, 0); }
+
+    template<class P>
+    auto setProblemTimeImpl_(const P& p, const Scalar t, int) -> decltype(p.setTime(0))
+    { p.setTime(t); }
+
+    template<class P>
+    void setProblemTimeImpl_(const P& p, const Scalar t, long)
+    {}
+
+    std::shared_ptr<const Experimental::MultiStageMethod<Scalar>> timeSteppingMethod_;
+    std::vector<ResidualType> spatialOperatorEvaluations_;
+    std::vector<ResidualType> temporalOperatorEvaluations_;
+    ResidualType constrainedDofs_;
+    std::shared_ptr<const StageParams> stageParams_;
+
+    //! pointer to the problem to be solved
+    std::shared_ptr<const Problem> problem_;
+
+    //! the finite volume geometry of the grid
+    std::shared_ptr<const GridGeometry> gridGeometry_;
+
+    //! the variables container for the grid
+    std::shared_ptr<GridVariables> gridVariables_;
+
+    //! an observing pointer to the previous solution for instationary problems
+    const SolutionVector* prevSol_ = nullptr;
+
+    //! shared pointers to the jacobian matrix and residual
+    std::shared_ptr<JacobianMatrix> jacobian_;
+    std::shared_ptr<ResidualType> residual_;
+
+    //! element sets for parallel assembly
+    bool enableMultithreading_ = false;
+    std::deque<std::vector<ElementSeed>> elementSets_;
+};
+
+} // namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/multistagefvlocaloperator.hh b/dumux/experimental/assembly/multistagefvlocaloperator.hh
new file mode 100644
index 0000000000..9fa58b3922
--- /dev/null
+++ b/dumux/experimental/assembly/multistagefvlocaloperator.hh
@@ -0,0 +1,112 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \brief A local operator wrapper for multi-stage time stepping schemes
+ */
+
+namespace Dumux::Experimental {
+
+template<class LocalOperator>
+class MultiStageFVLocalOperator
+{
+    using ElementOperatorResultVector = typename LocalOperator::ElementResidualVector;
+public:
+    // compatibility
+    using ElementResidualVector = ElementOperatorResultVector;
+
+    MultiStageFVLocalOperator(const LocalOperator& op)
+    : op_(op)
+    , spatialWeight_(1.0)
+    , temporalWeight_(1.0)
+    {}
+
+    // discretization-agnostic interface (apart from FV)
+    template<class FVGeometry, class ElemVolVars>
+    ElementOperatorResultVector evalStorage(
+        const FVGeometry& fvGeometry,
+        const ElemVolVars& elemVolVars
+    ) const {
+        ElementOperatorResultVector result(fvGeometry.numScv());
+
+        if (std::abs(temporalWeight_) > 1e-6)
+        {
+            for (const auto& scv : scvs(fvGeometry))
+                result[scv.localDofIndex()] +=
+                    op_.computeStorage(op_.problem(), scv, elemVolVars[scv])
+                    * elemVolVars[scv].extrusionFactor()
+                    * Extrusion_t<typename FVGeometry::GridGeometry>::volume(fvGeometry, scv)
+                    * temporalWeight_;
+        }
+
+        return result;
+    }
+
+    // discretization-agnostic interface (apart from FV)
+    template<class FVGeometry, class ElemVolVars, class ElemFluxVars, class ElemBCTypes>
+    ElementOperatorResultVector evalFluxAndSource(
+        const typename FVGeometry::Element&, // not needed, here for compatibility
+        const FVGeometry& fvGeometry,
+        const ElemVolVars& elemVolVars,
+        const ElemFluxVars& elemFluxVarsCache,
+        const ElemBCTypes& bcTypes
+    ) const {
+        ElementOperatorResultVector result(fvGeometry.numScv());
+
+        if (std::abs(spatialWeight_) > 1e-6)
+        {
+            result = op_.evalFluxAndSource(fvGeometry.element(), fvGeometry, elemVolVars, elemFluxVarsCache, bcTypes);
+            for (const auto& scv : scvs(fvGeometry))
+                result[scv.localDofIndex()] *= spatialWeight_;
+        }
+
+        return result;
+    }
+
+    // interface allowing for optimization when computing the cell-centered finite volume Jacobian
+    template<class Problem, class FVGeometry, class ElemVolVars, class ElemFluxVars>
+    auto evalFlux(
+        const Problem&, // not needed
+        const typename FVGeometry::Element& element, // can be neighbor
+        const FVGeometry& fvGeometry,
+        const ElemVolVars& elemVolVars,
+        const ElemFluxVars& elemFluxVarsCache,
+        const typename FVGeometry::SubControlVolumeFace& scvf
+    ) const {
+        using NumEqVector = std::decay_t<decltype(op_.evalFlux(op_.problem(), element, fvGeometry, elemVolVars, elemFluxVarsCache, scvf))>;
+        NumEqVector result(0.0);
+        if (std::abs(spatialWeight_) > 1e-6)
+        {
+            result = op_.evalFlux(op_.problem(), element, fvGeometry, elemVolVars, elemFluxVarsCache, scvf);
+            result *= spatialWeight_;
+        }
+        return result;
+    }
+
+    void spatialWeight(double w) { spatialWeight_ = w; }
+    double spatialWeight() const { return spatialWeight_; }
+
+    void temporalWeight(double w) { temporalWeight_ = w; }
+    double temporalWeight() const { return temporalWeight_; }
+
+    const auto& problem() const
+    { return op_.problem(); }
+
+    // some old interface (TODO: get rid of this)
+    // (stationary is also the wrong word by the way, systems with zero
+    // time derivative are in a steady-state but not necessarily stationary/not-moving at all)
+    // can we decide this somehow from the temporal weight?
+    bool isStationary() const
+    { return false; }
+
+private:
+    LocalOperator op_;
+    double spatialWeight_, temporalWeight_; // TODO: get correct type
+};
+
+} // end namespace Dumux::Experimental
diff --git a/dumux/experimental/assembly/multistagemultidomainfvassembler.hh b/dumux/experimental/assembly/multistagemultidomainfvassembler.hh
new file mode 100644
index 0000000000..03c370f61b
--- /dev/null
+++ b/dumux/experimental/assembly/multistagemultidomainfvassembler.hh
@@ -0,0 +1,617 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup MultiDomain
+ * \ingroup Assembly
+ * \brief A linear system assembler (residual and Jacobian) for finite volume schemes
+ *        with multiple domains
+ */
+#ifndef DUMUX_EXPERIMENTAL_MULTISTAGE_MULTIDOMAIN_FV_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_MULTISTAGE_MULTIDOMAIN_FV_ASSEMBLER_HH
+
+#include <vector>
+#include <type_traits>
+#include <tuple>
+#include <memory>
+
+#include <dune/common/hybridutilities.hh>
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/exceptions.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/typetraits/utility.hh>
+#include <dumux/common/gridcapabilities.hh>
+
+#include <dumux/discretization/method.hh>
+#include <dumux/assembly/diffmethod.hh>
+#include <dumux/assembly/jacobianpattern.hh>
+#include <dumux/linear/parallelhelpers.hh>
+#include <dumux/parallel/multithreading.hh>
+
+#include <dumux/multidomain/couplingjacobianpattern.hh>
+#include <dumux/multidomain/assemblerview.hh>
+
+#include <dumux/experimental/assembly/subdomaincclocalassembler.hh>
+#include <dumux/experimental/assembly/subdomaincvfelocalassembler.hh>
+#include <dumux/experimental/assembly/multistagefvlocaloperator.hh>
+
+#include <dumux/experimental/timestepping/multistagemethods.hh>
+#include <dumux/experimental/timestepping/multistagetimestepper.hh>
+
+namespace Dumux::Grid::Capabilities {
+
+namespace Detail {
+// helper for multi-domain models
+template<class T, std::size_t... I>
+bool allGridsSupportsMultithreadingImpl(const T& gridGeometries, std::index_sequence<I...>)
+{
+    return (... && supportsMultithreading(std::get<I>(gridGeometries)->gridView()));
+}
+} // end namespace Detail
+
+// helper for multi-domain models (all grids have to support multithreading)
+template<class... GG>
+bool allGridsSupportsMultithreading(const std::tuple<GG...>& gridGeometries)
+{
+    return Detail::allGridsSupportsMultithreadingImpl<std::tuple<GG...>>(gridGeometries, std::make_index_sequence<sizeof...(GG)>());
+}
+
+} // end namespace Dumux::Grid::Capabilities
+
+namespace Dumux {
+
+/*!
+ * \ingroup MultiDomain
+ * \ingroup Assembly
+ * \brief Type trait that is specialized for coupling manager supporting multithreaded assembly
+ * \note A coupling manager implementation that wants to enable multithreaded assembly has to specialize this trait
+ */
+template<class CM>
+struct CouplingManagerSupportsMultithreadedAssembly : public std::false_type
+{};
+
+} // end namespace Dumux
+
+namespace Dumux::Experimental {
+
+/*!
+ * \ingroup MultiDomain
+ * \ingroup Assembly
+ * \brief A linear system assembler (residual and Jacobian) for finite volume schemes (box, tpfa, mpfa, ...)
+ *        with multiple domains
+ * \tparam MDTraits the multidimensional traits
+ * \tparam diffMethod the differentiation method to residual compute derivatives
+ */
+template<class MDTraits, class CMType, DiffMethod diffMethod>
+class MultiStageMultiDomainFVAssembler
+{
+    template<std::size_t id>
+    using SubDomainTypeTag = typename MDTraits::template SubDomain<id>::TypeTag;
+
+public:
+    using Traits = MDTraits;
+
+    using Scalar = typename MDTraits::Scalar;
+    using StageParams = Experimental::MultiStageParams<Scalar>;
+
+    //! TODO get rid of this GetPropType
+    template<std::size_t id>
+    using LocalResidual = GetPropType<SubDomainTypeTag<id>, Properties::LocalResidual>;
+
+    template<std::size_t id>
+    using GridVariables = typename MDTraits::template SubDomain<id>::GridVariables;
+
+    template<std::size_t id>
+    using GridGeometry = typename MDTraits::template SubDomain<id>::GridGeometry;
+
+    template<std::size_t id>
+    using Problem = typename MDTraits::template SubDomain<id>::Problem;
+
+    using JacobianMatrix = typename MDTraits::JacobianMatrix;
+    using SolutionVector = typename MDTraits::SolutionVector;
+    using ResidualType = typename MDTraits::ResidualVector;
+
+    using CouplingManager = CMType;
+
+private:
+
+    using ProblemTuple = typename MDTraits::template TupleOfSharedPtrConst<Problem>;
+    using GridGeometryTuple = typename MDTraits::template TupleOfSharedPtrConst<GridGeometry>;
+    using GridVariablesTuple = typename MDTraits::template TupleOfSharedPtr<GridVariables>;
+
+    using TimeLoop = TimeLoopBase<Scalar>;
+    using ThisType = MultiStageMultiDomainFVAssembler<MDTraits, CouplingManager, diffMethod>;
+
+    template<std::size_t id>
+    using SubDomainAssemblerView = MultiDomainAssemblerSubDomainView<ThisType, id>;
+
+    template<class DiscretizationMethod, std::size_t id>
+    struct SubDomainAssemblerType;
+
+    template<std::size_t id>
+    struct SubDomainAssemblerType<DiscretizationMethods::CCTpfa, id>
+    {
+        using type = Experimental::SubDomainCCLocalAssembler<id, SubDomainTypeTag<id>, SubDomainAssemblerView<id>, diffMethod>;
+    };
+
+    template<std::size_t id>
+    struct SubDomainAssemblerType<DiscretizationMethods::CCMpfa, id>
+    {
+        using type = Experimental::SubDomainCCLocalAssembler<id, SubDomainTypeTag<id>, SubDomainAssemblerView<id>, diffMethod>;
+    };
+
+    template<std::size_t id, class DM>
+    struct SubDomainAssemblerType<DiscretizationMethods::CVFE<DM>, id>
+    {
+        using type = Experimental::SubDomainCVFELocalAssembler<id, SubDomainTypeTag<id>, SubDomainAssemblerView<id>, diffMethod>;
+    };
+
+    template<std::size_t id>
+    using SubDomainAssembler = typename SubDomainAssemblerType<typename GridGeometry<id>::DiscretizationMethod, id>::type;
+
+public:
+    /*!
+     * \brief The constructor for instationary problems
+     * \note the grid variables might be temporarily changed during assembly (if caching is enabled)
+     *       it is however guaranteed that the state after assembly will be the same as before
+     */
+    MultiStageMultiDomainFVAssembler(ProblemTuple problem,
+                                     GridGeometryTuple gridGeometry,
+                                     GridVariablesTuple gridVariables,
+                                     std::shared_ptr<CouplingManager> couplingManager,
+                                     std::shared_ptr<const Experimental::MultiStageMethod<Scalar>> msMethod,
+                                     const SolutionVector& prevSol)
+    : couplingManager_(couplingManager)
+    , timeSteppingMethod_(msMethod)
+    , problemTuple_(std::move(problem))
+    , gridGeometryTuple_(std::move(gridGeometry))
+    , gridVariablesTuple_(std::move(gridVariables))
+    , prevSol_(&prevSol)
+    {
+        std::cout << "Instantiated assembler for an instationary problem." << std::endl;
+
+        enableMultithreading_ = CouplingManagerSupportsMultithreadedAssembly<CouplingManager>::value
+            && Grid::Capabilities::allGridsSupportsMultithreading(gridGeometryTuple_)
+            && !Multithreading::isSerial()
+            && getParam<bool>("Assembly.Multithreading", true);
+
+        maybeComputeColors_();
+    }
+
+    /*!
+     * \brief Assembles the global Jacobian of the residual
+     *        and the residual for the current solution.
+     */
+    void assembleJacobianAndResidual(const SolutionVector& curSol)
+    {
+        resetJacobian_();
+
+        resetResidual_();
+        spatialOperatorEvaluations_.back() = 0.0;
+        temporalOperatorEvaluations_.back() = 0.0;
+
+        if (stageParams_->size() != spatialOperatorEvaluations_.size())
+            DUNE_THROW(Dune::InvalidStateException, "Wrong number of residuals");
+
+        using namespace Dune::Hybrid;
+        forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto domainId)
+        {
+            // assemble the spatial and temporal residual of the current time step and the Jacobian
+            // w.r.t to the current solution (the current solution on the current stage)
+            auto& jacRow = (*jacobian_)[domainId];
+            auto& spatial = spatialOperatorEvaluations_.back()[domainId];
+            auto& temporal = temporalOperatorEvaluations_.back()[domainId];
+
+            assemble_(domainId, [&](const auto& element)
+            {
+                MultiDomainAssemblerSubDomainView view{*this, domainId};
+                SubDomainAssembler<domainId()> subDomainAssembler(view, element, curSol, *couplingManager_);
+                subDomainAssembler.assembleJacobianAndResidual(
+                    jacRow, (*residual_)[domainId],
+                    gridVariablesTuple_,
+                    *stageParams_, temporal, spatial,
+                    constrainedDofs_[domainId]
+                );
+            });
+
+            // assemble the full residual for the time integration stage
+            auto constantResidualComponent = (*residual_)[domainId];
+            constantResidualComponent = 0.0;
+            for (std::size_t k = 0; k < stageParams_->size()-1; ++k)
+            {
+                if (!stageParams_->skipTemporal(k))
+                    constantResidualComponent.axpy(stageParams_->temporalWeight(k), temporalOperatorEvaluations_[k][domainId]);
+                if (!stageParams_->skipSpatial(k))
+                    constantResidualComponent.axpy(stageParams_->spatialWeight(k), spatialOperatorEvaluations_[k][domainId]);
+            }
+
+            // masked summation of constant residual component onto this stage's resiudal component
+            for (std::size_t i = 0; i < constantResidualComponent.size(); ++i)
+                for (std::size_t ii = 0; ii < constantResidualComponent[i].size(); ++ii)
+                    (*residual_)[domainId][i][ii] += constrainedDofs_[domainId][i][ii] > 0.5 ? 0.0 : constantResidualComponent[i][ii];
+        });
+    }
+
+    //! compute the residuals using the internal residual
+    void assembleResidual(const SolutionVector& curSol)
+    { DUNE_THROW(Dune::NotImplemented, "residual"); }
+
+    /*!
+     * \brief The version without arguments uses the default constructor to create
+     *        the jacobian and residual objects in this assembler if you don't need them outside this class
+     */
+    void setLinearSystem()
+    {
+        jacobian_ = std::make_shared<JacobianMatrix>();
+        residual_ = std::make_shared<ResidualType>();
+
+        setJacobianBuildMode_(*jacobian_);
+        setJacobianPattern_(*jacobian_);
+        setResidualSize_(*residual_);
+    }
+
+    /*!
+     * \brief Updates the grid variables with the given solution
+     */
+    void updateGridVariables(const SolutionVector& curSol)
+    {
+        using namespace Dune::Hybrid;
+        forEach(integralRange(Dune::Hybrid::size(gridVariablesTuple_)), [&](const auto domainId)
+        { this->gridVariables(domainId).update(curSol[domainId]); });
+    }
+
+    /*!
+     * \brief Resets the grid variables to the last time step
+     */
+    void resetTimeStep(const SolutionVector& curSol)
+    {
+        using namespace Dune::Hybrid;
+        forEach(integralRange(Dune::Hybrid::size(gridVariablesTuple_)), [&](const auto domainId)
+        { this->gridVariables(domainId).resetTimeStep(curSol[domainId]); });
+    }
+
+    //! the number of dof locations of domain i
+    template<std::size_t i>
+    std::size_t numDofs(Dune::index_constant<i> domainId) const
+    { return std::get<domainId>(gridGeometryTuple_)->numDofs(); }
+
+    //! the problem of domain i
+    template<std::size_t i>
+    const auto& problem(Dune::index_constant<i> domainId) const
+    { return *std::get<domainId>(problemTuple_); }
+
+    //! the finite volume grid geometry of domain i
+    template<std::size_t i>
+    const auto& gridGeometry(Dune::index_constant<i> domainId) const
+    { return *std::get<domainId>(gridGeometryTuple_); }
+
+    //! the grid view of domain i
+    template<std::size_t i>
+    const auto& gridView(Dune::index_constant<i> domainId) const
+    { return gridGeometry(domainId).gridView(); }
+
+    //! the grid variables of domain i
+    template<std::size_t i>
+    GridVariables<i>& gridVariables(Dune::index_constant<i> domainId)
+    { return *std::get<domainId>(gridVariablesTuple_); }
+
+    //! the grid variables of domain i
+    template<std::size_t i>
+    const GridVariables<i>& gridVariables(Dune::index_constant<i> domainId) const
+    { return *std::get<domainId>(gridVariablesTuple_); }
+
+    //! the coupling manager
+    const CouplingManager& couplingManager() const
+    { return *couplingManager_; }
+
+    //! the full Jacobian matrix
+    JacobianMatrix& jacobian()
+    { return *jacobian_; }
+
+    //! the full residual vector
+    ResidualType& residual()
+    { return *residual_; }
+
+    //! the solution before time integration
+    const SolutionVector& prevSol() const
+    { return *prevSol_; }
+
+    /*!
+     * \brief Create a local residual object (used by the local assembler)
+     */
+    template<std::size_t i>
+    MultiStageFVLocalOperator<LocalResidual<i>> localResidual(Dune::index_constant<i> domainId) const
+    { return { LocalResidual<i>{std::get<domainId>(problemTuple_).get(), nullptr} }; }
+
+    void clearStages()
+    {
+        spatialOperatorEvaluations_.clear();
+        temporalOperatorEvaluations_.clear();
+        stageParams_.reset();
+    }
+
+    template<class StageParams>
+    void prepareStage(SolutionVector& x, StageParams params)
+    {
+        stageParams_ = std::move(params);
+        const auto curStage = stageParams_->size() - 1;
+
+        // in the first stage, also assemble the old residual
+        if (curStage == 1)
+        {
+            // update time in variables?
+            using namespace Dune::Hybrid;
+            forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto domainId)
+            {
+                setProblemTime_(*std::get<domainId>(problemTuple_), stageParams_->timeAtStage(curStage));
+            });
+
+            resetResidual_(); // residual resized and zero
+            spatialOperatorEvaluations_.push_back(*residual_);
+            temporalOperatorEvaluations_.push_back(*residual_);
+
+            // assemble stage 0 residuals
+            using namespace Dune::Hybrid;
+            forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto domainId)
+            {
+                auto& spatial = spatialOperatorEvaluations_.back()[domainId];
+                auto& temporal = temporalOperatorEvaluations_.back()[domainId];
+                assemble_(domainId, [&](const auto& element)
+                {
+                    MultiDomainAssemblerSubDomainView view{*this, domainId};
+                    SubDomainAssembler<domainId()> subDomainAssembler(view, element, x, *couplingManager_);
+                    subDomainAssembler.localResidual().spatialWeight(1.0);
+                    subDomainAssembler.localResidual().temporalWeight(1.0);
+                    subDomainAssembler.assembleCurrentResidual(spatial, temporal);
+                });
+            });
+        }
+
+        // update time in variables?
+        using namespace Dune::Hybrid;
+        forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto domainId)
+        {
+            setProblemTime_(*std::get<domainId>(problemTuple_), stageParams_->timeAtStage(curStage));
+        });
+
+        resetResidual_(); // residual resized and zero
+        spatialOperatorEvaluations_.push_back(*residual_);
+        temporalOperatorEvaluations_.push_back(*residual_);
+    }
+
+    //! TODO get rid of this (called by Newton but shouldn't be necessary)
+    bool isStationaryProblem() const
+    { return false; }
+
+    bool isImplicit() const
+    { return timeSteppingMethod_->implicit(); }
+
+protected:
+    //! the coupling manager coupling the sub domains
+    std::shared_ptr<CouplingManager> couplingManager_;
+
+private:
+    /*!
+     * \brief Sets the jacobian build mode
+     */
+    void setJacobianBuildMode_(JacobianMatrix& jac) const
+    {
+        using namespace Dune::Hybrid;
+        forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto i)
+        {
+            forEach(jac[i], [&](auto& jacBlock)
+            {
+                using BlockType = std::decay_t<decltype(jacBlock)>;
+                if (jacBlock.buildMode() == BlockType::BuildMode::unknown)
+                    jacBlock.setBuildMode(BlockType::BuildMode::random);
+                else if (jacBlock.buildMode() != BlockType::BuildMode::random)
+                    DUNE_THROW(Dune::NotImplemented, "Only BCRS matrices with random build mode are supported at the moment");
+            });
+        });
+    }
+
+    /*!
+     * \brief Sets the jacobian sparsity pattern.
+     */
+    void setJacobianPattern_(JacobianMatrix& jac) const
+    {
+        using namespace Dune::Hybrid;
+        forEach(std::make_index_sequence<JacobianMatrix::N()>(), [&](const auto domainI)
+        {
+            forEach(integralRange(Dune::Hybrid::size(jac[domainI])), [&](const auto domainJ)
+            {
+                const auto pattern = this->getJacobianPattern_(domainI, domainJ);
+                pattern.exportIdx(jac[domainI][domainJ]);
+            });
+        });
+    }
+
+    /*!
+     * \brief Resizes the residual
+     */
+    void setResidualSize_(ResidualType& res) const
+    {
+        using namespace Dune::Hybrid;
+        forEach(integralRange(Dune::Hybrid::size(res)), [&](const auto domainId)
+        { res[domainId].resize(this->numDofs(domainId)); });
+    }
+
+    // reset the residual vector to 0.0
+    void resetResidual_()
+    {
+        if(!residual_)
+        {
+            residual_ = std::make_shared<ResidualType>();
+            setResidualSize_(*residual_);
+        }
+
+        setResidualSize_(constrainedDofs_);
+
+        (*residual_) = 0.0;
+        constrainedDofs_ = 0.0;
+    }
+
+    // reset the jacobian vector to 0.0
+    void resetJacobian_()
+    {
+        if(!jacobian_)
+        {
+            jacobian_ = std::make_shared<JacobianMatrix>();
+            setJacobianBuildMode_(*jacobian_);
+            setJacobianPattern_(*jacobian_);
+        }
+
+       (*jacobian_)  = 0.0;
+    }
+
+    //! Computes the colors
+    void maybeComputeColors_()
+    {
+        if constexpr (CouplingManagerSupportsMultithreadedAssembly<CouplingManager>::value)
+            if (enableMultithreading_)
+                couplingManager_->computeColorsForAssembly();
+    }
+
+    template<std::size_t i, class SubRes>
+    void assembleResidual_(Dune::index_constant<i> domainId, SubRes& subRes,
+                           const SolutionVector& curSol)
+    {
+        DUNE_THROW(Dune::InvalidStateException, "Called redidual");
+    }
+
+    /*!
+     * \brief A method assembling something per element
+     * \note Handles exceptions for parallel runs
+     * \throws NumericalProblem on all processes if an exception is thrown during assembly
+     */
+    template<std::size_t i, class AssembleElementFunc>
+    void assemble_(Dune::index_constant<i> domainId, AssembleElementFunc&& assembleElement) const
+    {
+        // a state that will be checked on all processes
+        bool succeeded = false;
+
+        // try assembling using the local assembly function
+        try
+        {
+            if constexpr (CouplingManagerSupportsMultithreadedAssembly<CouplingManager>::value)
+            {
+                if (enableMultithreading_)
+                {
+                    couplingManager_->assembleMultithreaded(
+                        domainId, std::forward<AssembleElementFunc>(assembleElement)
+                    );
+                    return;
+                }
+            }
+
+            // fallback for coupling managers that don't support multithreaded assembly (yet)
+            // or if multithreaded assembly is disabled
+            // let the local assembler add the element contributions
+            for (const auto& element : elements(gridView(domainId)))
+                assembleElement(element);
+
+            // if we get here, everything worked well on this process
+            succeeded = true;
+        }
+        // throw exception if a problem occurred
+        catch (NumericalProblem &e)
+        {
+            std::cout << "rank " << gridView(domainId).comm().rank()
+                      << " caught an exception while assembling:" << e.what()
+                      << "\n";
+            succeeded = false;
+        }
+
+        // make sure everything worked well on all processes
+        if (gridView(domainId).comm().size() > 1)
+            succeeded = gridView(domainId).comm().min(succeeded);
+
+        // if not succeeded rethrow the error on all processes
+        if (!succeeded)
+            DUNE_THROW(NumericalProblem, "A process did not succeed in linearizing the system");
+    }
+
+    // get diagonal block pattern
+    template<std::size_t i, std::size_t j, typename std::enable_if_t<(i==j), int> = 0>
+    Dune::MatrixIndexSet getJacobianPattern_(Dune::index_constant<i> domainI,
+                                             Dune::index_constant<j> domainJ) const
+    {
+        const auto& gg = gridGeometry(domainI);
+        if (timeSteppingMethod_->implicit())
+        {
+            auto pattern = getJacobianPattern<true>(gg);
+            couplingManager_->extendJacobianPattern(domainI, pattern);
+            return pattern;
+        }
+        else
+        {
+            auto pattern = getJacobianPattern<false>(gg);
+            couplingManager_->extendJacobianPattern(domainI, pattern);
+            return pattern;
+        }
+    }
+
+    // get coupling block pattern
+    template<std::size_t i, std::size_t j, typename std::enable_if_t<(i!=j), int> = 0>
+    Dune::MatrixIndexSet getJacobianPattern_(Dune::index_constant<i> domainI,
+                                             Dune::index_constant<j> domainJ) const
+    {
+        if (timeSteppingMethod_->implicit())
+            return getCouplingJacobianPattern<true>(*couplingManager_,
+                domainI, gridGeometry(domainI),
+                domainJ, gridGeometry(domainJ)
+            );
+        else
+            return getCouplingJacobianPattern<false>(*couplingManager_,
+                domainI, gridGeometry(domainI),
+                domainJ, gridGeometry(domainJ)
+            );
+    }
+
+    // TODO make this nicer with a is_detected trait in a common location
+    template<class P>
+    void setProblemTime_(const P& p, const Scalar t)
+    { setProblemTimeImpl_(p, t, 0); }
+
+    template<class P>
+    auto setProblemTimeImpl_(const P& p, const Scalar t, int) -> decltype(p.setTime(0))
+    { p.setTime(t); }
+
+    template<class P>
+    void setProblemTimeImpl_(const P& p, const Scalar t, long)
+    {}
+
+    std::shared_ptr<const Experimental::MultiStageMethod<Scalar>> timeSteppingMethod_;
+    std::vector<ResidualType> spatialOperatorEvaluations_;
+    std::vector<ResidualType> temporalOperatorEvaluations_;
+    ResidualType constrainedDofs_;
+    std::shared_ptr<const StageParams> stageParams_;
+
+    //! pointer to the problem to be solved
+    ProblemTuple problemTuple_;
+
+    //! the finite volume geometry of the grid
+    GridGeometryTuple gridGeometryTuple_;
+
+    //! the variables container for the grid
+    GridVariablesTuple gridVariablesTuple_;
+
+    //! Pointer to the previous solution
+    const SolutionVector* prevSol_;
+
+    //! shared pointers to the jacobian matrix and residual
+    std::shared_ptr<JacobianMatrix> jacobian_;
+    std::shared_ptr<ResidualType> residual_;
+
+    //! if multithreaded assembly is enabled
+    bool enableMultithreading_ = false;
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/subdomaincclocalassembler.hh b/dumux/experimental/assembly/subdomaincclocalassembler.hh
new file mode 100644
index 0000000000..0fdf96dd69
--- /dev/null
+++ b/dumux/experimental/assembly/subdomaincclocalassembler.hh
@@ -0,0 +1,378 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \ingroup MultiDomain
+ * \brief A multidomain local assembler for Jacobian and residual contribution per element (cell-centered methods)
+ */
+#ifndef DUMUX_EXPERIMENTAL_SUBDOMAIN_CC_LOCAL_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_SUBDOMAIN_CC_LOCAL_ASSEMBLER_HH
+
+#include <dune/common/indices.hh>
+#include <dune/common/reservedvector.hh>
+#include <dune/common/hybridutilities.hh>
+#include <dune/grid/common/gridenums.hh> // for GhostEntity
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/reservedblockvector.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numericdifferentiation.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/discretization/elementsolution.hh>
+#include <dumux/discretization/extrusion.hh>
+#include <dumux/assembly/numericepsilon.hh>
+#include <dumux/assembly/diffmethod.hh>
+
+#include <dumux/experimental/assembly/cclocalassembler.hh>
+
+namespace Dumux::Experimental {
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \ingroup MultiDomain
+ * \brief A base class for all multidomain local assemblers
+ * \tparam id the id of the sub domain
+ * \tparam TypeTag the TypeTag
+ * \tparam Assembler the assembler type
+ * \tparam Implementation the actual assembler implementation
+ */
+template<std::size_t id, class TypeTag, class Assembler, class Implementation, DiffMethod dm>
+class SubDomainCCLocalAssemblerBase : public Experimental::CCLocalAssembler<TypeTag, Assembler, dm, Implementation>
+{
+    using ParentType = Experimental::CCLocalAssembler<TypeTag, Assembler, dm, Implementation>;
+
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using LocalResidualValues = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using SolutionVector = typename Assembler::SolutionVector;
+    using ElementBoundaryTypes = GetPropType<TypeTag, Properties::ElementBoundaryTypes>;
+
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using GridVolumeVariables = typename GridVariables::GridVolumeVariables;
+    using ElementVolumeVariables = typename GridVolumeVariables::LocalView;
+    using ElementFluxVariablesCache = typename GridVariables::GridFluxVariablesCache::LocalView;
+    using Scalar = typename GridVariables::Scalar;
+
+    using GridGeometry = typename GridVariables::GridGeometry;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using SubControlVolume = typename GridGeometry::SubControlVolume;
+    using SubControlVolumeFace = typename GridGeometry::SubControlVolumeFace;
+    using Extrusion = Extrusion_t<GridGeometry>;
+    using GridView = typename GridGeometry::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+
+    using CouplingManager = typename Assembler::CouplingManager;
+    using ElementResidualVector = typename ParentType::LocalResidual::ElementResidualVector;
+
+public:
+    //! export the domain id of this sub-domain
+    static constexpr auto domainId = typename Dune::index_constant<id>();
+    //! the local residual type of this domain
+    using LocalResidual = GetPropType<TypeTag, Properties::LocalResidual>;
+    //! pull up constructor of parent class
+    using ParentType::ParentType;
+
+    //! the constructor
+    explicit SubDomainCCLocalAssemblerBase(const Assembler& assembler,
+                                           const Element& element,
+                                           const SolutionVector& curSol,
+                                           CouplingManager& couplingManager)
+    : ParentType(assembler,
+                 element,
+                 curSol,
+                 localView(assembler.gridGeometry(domainId)),
+                 localView(assembler.gridVariables(domainId).curGridVolVars()),
+                 localView(assembler.gridVariables(domainId).gridFluxVarsCache()),
+                 assembler.localResidual(domainId),
+                 (element.partitionType() == Dune::GhostEntity),
+                 assembler.isImplicit())
+    , couplingManager_(couplingManager)
+    {}
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix. The element residual is written into the right hand side.
+     */
+    template<class JacobianMatrixRow, class SubResidualVector, class GridVariablesTuple, class StageParams>
+    void assembleJacobianAndResidual(JacobianMatrixRow& jacRow, SubResidualVector& res, GridVariablesTuple& gridVariables,
+                                     const StageParams& stageParams, SubResidualVector& temporal, SubResidualVector& spatial,
+                                     SubResidualVector& constrainedDofs)
+    {
+        auto assembleCouplingBlocks = [&](const auto& residual)
+        {
+            // for the coupling blocks
+            using namespace Dune::Hybrid;
+            forEach(integralRange(Dune::Hybrid::size(jacRow)), [&](auto&& i)
+            {
+                if (std::decay_t<decltype(i)>{} != id)
+                    this->assembleJacobianCoupling(i, jacRow, residual, gridVariables);
+            });
+        };
+
+        // the coupled model does not support partial reassembly yet
+        const DefaultPartialReassembler* noReassembler = nullptr;
+        ParentType::assembleJacobianAndResidual(
+            jacRow[domainId], res,
+            *std::get<domainId>(gridVariables),
+            stageParams, temporal, spatial,
+            constrainedDofs,
+            noReassembler,
+            assembleCouplingBlocks
+        );
+    }
+
+    /*!
+     * \brief Assemble the entries in a coupling block of the jacobian.
+     *        There is no coupling block between a domain and itself.
+     */
+    template<std::size_t otherId, class JacRow, class GridVariables>
+    void assembleJacobianCoupling(Dune::index_constant<otherId> domainJ, JacRow& jacRow,
+                                  const LocalResidualValues& res, GridVariables& gridVariables)
+    {
+        if constexpr (id != otherId)
+            this->asImp_().assembleJacobianCoupling(domainJ, jacRow[domainJ], res, *std::get<domainId>(gridVariables));
+    }
+
+    /*!
+     * \brief Prepares all local views necessary for local assembly.
+     */
+    void bindLocalViews()
+    {
+        // get some references for convenience
+        const auto& element = this->element();
+        const auto& curSol = this->curSol(domainId);
+        auto&& fvGeometry = this->fvGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // bind the caches
+        couplingManager_.bindCouplingContext(domainId, element, this->assembler());
+        fvGeometry.bind(element);
+        curElemVolVars.bind(element, fvGeometry, curSol);
+        elemFluxVarsCache.bind(element, fvGeometry, curElemVolVars);
+    }
+
+    //! return reference to the subdomain problem
+    template<std::size_t i = domainId>
+    const Problem& problem(Dune::index_constant<i> dId = domainId) const
+    { return this->assembler().problem(domainId); }
+
+    //! return reference to the subdomain solution
+    template<std::size_t i = domainId>
+    const auto& curSol(Dune::index_constant<i> dId = domainId) const
+    { return ParentType::curSol()[dId]; }
+
+    //! return reference to the coupling manager
+    CouplingManager& couplingManager()
+    { return couplingManager_; }
+
+private:
+    CouplingManager& couplingManager_; //!< the coupling manager
+};
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \ingroup MultiDomain
+ * \brief The cell-centered scheme multidomain local assembler
+ * \tparam id the id of the sub domain
+ * \tparam TypeTag the TypeTag
+ * \tparam Assembler the assembler type
+ * \tparam DM the numeric differentiation method
+ */
+template<std::size_t id, class TypeTag, class Assembler, DiffMethod DM = DiffMethod::numeric>
+class SubDomainCCLocalAssembler;
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CCDiscretization
+ * \ingroup MultiDomain
+ * \brief Cell-centered scheme multidomain local assembler using numeric differentiation
+ */
+template<std::size_t id, class TypeTag, class Assembler>
+class SubDomainCCLocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>
+: public SubDomainCCLocalAssemblerBase<id, TypeTag, Assembler,
+            SubDomainCCLocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>, DiffMethod::numeric>
+{
+    using ThisType = SubDomainCCLocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>;
+    using ParentType = SubDomainCCLocalAssemblerBase<id, TypeTag, Assembler, ThisType, DiffMethod::numeric>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using LocalResidualValues = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using SubControlVolume = typename GridGeometry::SubControlVolume;
+    using GridView = typename GridGeometry::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+
+    enum { numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq() };
+    enum { dim = GridView::dimension };
+
+    static constexpr bool enableGridFluxVarsCache = GetPropType<TypeTag, Properties::GridVariables>::GridFluxVariablesCache::cachingEnabled;
+    static constexpr bool enableGridVolVarsCache = GetPropType<TypeTag, Properties::GridVariables>::GridVolumeVariables::cachingEnabled;
+    static constexpr int maxElementStencilSize = GridGeometry::maxElementStencilSize;
+    static constexpr auto domainI = Dune::index_constant<id>();
+
+public:
+    using ParentType::ParentType;
+    using ElementResidualVector = typename ParentType::LocalResidual::ElementResidualVector;
+
+    /*!
+     * \brief Update the coupling context for coupled models.
+     */
+    template<class ElemSol>
+    void maybeUpdateCouplingContext(const SubControlVolume& scv, ElemSol& elemSol, const int pvIdx)
+    {
+        if (this->assembler().isImplicit())
+            this->couplingManager().updateCouplingContext(domainI, *this, domainI, scv.dofIndex(), elemSol[0], pvIdx);
+    }
+
+    /*!
+     * \brief Update the additional domain derivatives for coupled models.
+     */
+    template<class JacobianMatrixDiagBlock, class GridVariables>
+    void maybeEvalAdditionalDomainDerivatives(const ElementResidualVector& origResiduals, JacobianMatrixDiagBlock& A, GridVariables& gridVariables)
+    {
+        if (this->assembler().isImplicit())
+            this->couplingManager().evalAdditionalDomainDerivatives(domainI, *this, origResiduals, A, gridVariables);
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     */
+    template<std::size_t otherId, class JacobianBlock, class GridVariables>
+    void assembleJacobianCoupling(Dune::index_constant<otherId> domainJ, JacobianBlock& A,
+                                  const LocalResidualValues& res, GridVariables& gridVariables)
+    {
+        if (!this->assembler().isImplicit())
+            return;
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // Calculate derivatives of all dofs in the element with respect to all dofs in the coupling stencil. //
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        const auto& gridGeometry = fvGeometry.gridGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // get stencil information
+        const auto globalI = gridGeometry.elementMapper().index(element);
+        const auto& stencil = this->couplingManager().couplingStencil(domainI, element, domainJ);
+        const auto& curSolJ = this->curSol(domainJ);
+
+        // make sure the flux variables cache does not contain any artifacts from prior differentiation
+        elemFluxVarsCache.update(element, fvGeometry, curElemVolVars);
+
+        // convenience lambda for call to update self
+        auto updateCoupledVariables = [&] ()
+        {
+            // Update ourself after the context has been modified. Depending on the
+            // type of caching, other objects might have to be updated. All ifs can be optimized away.
+            if (enableGridFluxVarsCache)
+            {
+                if (enableGridVolVarsCache)
+                {
+                    this->couplingManager().updateCoupledVariables(domainI, *this, gridVariables.curGridVolVars(), gridVariables.gridFluxVarsCache());
+                    this->couplingManager().updateCoupledVariables(domainI, *this, gridVariables.curGridVolVars(), elemFluxVarsCache);
+                }
+                else
+                {
+                    this->couplingManager().updateCoupledVariables(domainI, *this, curElemVolVars, gridVariables.gridFluxVarsCache());
+                    this->couplingManager().updateCoupledVariables(domainI, *this, curElemVolVars, elemFluxVarsCache);
+                }
+            }
+            else
+            {
+                if (enableGridVolVarsCache)
+                    this->couplingManager().updateCoupledVariables(domainI, *this, gridVariables.curGridVolVars(), elemFluxVarsCache);
+                else
+                    this->couplingManager().updateCoupledVariables(domainI, *this, curElemVolVars, elemFluxVarsCache);
+            }
+        };
+
+        for (const auto& globalJ : stencil)
+        {
+            // undeflected privars and privars to be deflected
+            const auto origPriVarsJ = curSolJ[globalJ];
+            auto priVarsJ = origPriVarsJ;
+
+            const auto origResidual = this->couplingManager().evalCouplingResidual(domainI, *this, domainJ, globalJ)[0];
+
+            for (int pvIdx = 0; pvIdx < JacobianBlock::block_type::cols; ++pvIdx)
+            {
+                auto evalCouplingResidual = [&](Scalar priVar)
+                {
+                    // update the volume variables and the flux var cache
+                    priVarsJ[pvIdx] = priVar;
+                    this->couplingManager().updateCouplingContext(domainI, *this, domainJ, globalJ, priVarsJ, pvIdx);
+                    updateCoupledVariables();
+                    return this->couplingManager().evalCouplingResidual(domainI, *this, domainJ, globalJ)[0];
+                };
+
+                // derive the residuals numerically
+                LocalResidualValues partialDeriv(0.0);
+                const auto& paramGroup = this->assembler().problem(domainJ).paramGroup();
+                static const int numDiffMethod = getParamFromGroup<int>(paramGroup, "Assembly.NumericDifferenceMethod");
+                static const auto epsCoupl = this->couplingManager().numericEpsilon(domainJ, paramGroup);
+                NumericDifferentiation::partialDerivative(evalCouplingResidual, priVarsJ[pvIdx], partialDeriv, origResidual,
+                                                          epsCoupl(priVarsJ[pvIdx], pvIdx), numDiffMethod);
+
+                // add the current partial derivatives to the global jacobian matrix
+                if constexpr (Problem::enableInternalDirichletConstraints())
+                {
+                    const auto& scv = fvGeometry.scv(globalI);
+                    const auto internalDirichletConstraints = this->problem().hasInternalDirichletConstraint(this->element(), scv);
+                    if (internalDirichletConstraints.none())
+                    {
+                        for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                            A[globalI][globalJ][eqIdx][pvIdx] += partialDeriv[eqIdx];
+                    }
+                    else
+                    {
+                        for (int eqIdx = 0; eqIdx < numEq; ++eqIdx)
+                        {
+                            if (internalDirichletConstraints[eqIdx])
+                                A[globalI][globalJ][eqIdx][pvIdx] = 0.0;
+                            else
+                                A[globalI][globalJ][eqIdx][pvIdx] += partialDeriv[eqIdx];
+                        }
+                    }
+                }
+                else
+                {
+                    for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                        A[globalI][globalJ][eqIdx][pvIdx] += partialDeriv[eqIdx];
+                }
+
+                // restore the current element solution
+                priVarsJ[pvIdx] = origPriVarsJ[pvIdx];
+
+                // restore the undeflected state of the coupling context
+                this->couplingManager().updateCouplingContext(domainI, *this, domainJ, globalJ, priVarsJ, pvIdx);
+            }
+
+            // restore original state of the flux vars cache and/or vol vars in case of global caching.
+            // This has to be done in order to guarantee that the undeflected residual computation done
+            // above is correct when jumping to the next coupled dof of domainJ.
+            updateCoupledVariables();
+        }
+    }
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/assembly/subdomaincvfelocalassembler.hh b/dumux/experimental/assembly/subdomaincvfelocalassembler.hh
new file mode 100644
index 0000000000..9a995e4c2e
--- /dev/null
+++ b/dumux/experimental/assembly/subdomaincvfelocalassembler.hh
@@ -0,0 +1,386 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+//
+// SPDX-FileCopyrightInfo: Copyright © DuMux Project contributors, see AUTHORS.md in root folder
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+/*!
+ * \file
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \ingroup MultiDomain
+ * \brief An assembler for Jacobian and residual contribution per element (CVFE methods) for multidomain problems
+ */
+#ifndef DUMUX_EXPERIMENTAL_SUBDOMAIN_CVFE_LOCAL_ASSEMBLER_HH
+#define DUMUX_EXPERIMENTAL_SUBDOMAIN_CVFE_LOCAL_ASSEMBLER_HH
+
+#include <type_traits>
+
+#include <dune/common/reservedvector.hh>
+#include <dune/common/indices.hh>
+#include <dune/common/hybridutilities.hh>
+#include <dune/grid/common/gridenums.hh> // for GhostEntity
+#include <dune/istl/matrixindexset.hh>
+
+#include <dumux/common/reservedblockvector.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numericdifferentiation.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/assembly/numericepsilon.hh>
+#include <dumux/assembly/diffmethod.hh>
+#include <dumux/discretization/extrusion.hh>
+
+#include <dumux/experimental/assembly/cvfelocalassembler.hh>
+
+namespace Dumux::Experimental {
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \ingroup MultiDomain
+ * \brief A base class for all CVFE subdomain local assemblers
+ * \tparam id the id of the sub domain
+ * \tparam TypeTag the TypeTag
+ * \tparam Assembler the assembler type
+ * \tparam Implementation the actual implementation type
+ */
+template<std::size_t id, class TypeTag, class Assembler, class Implementation, DiffMethod dm>
+class SubDomainCVFELocalAssemblerBase : public Experimental::CVFELocalAssembler<TypeTag, Assembler, dm, Implementation>
+{
+    using ParentType = Experimental::CVFELocalAssembler<TypeTag, Assembler, dm, Implementation>;
+
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using LocalResidualValues = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+    using JacobianMatrix = GetPropType<TypeTag, Properties::JacobianMatrix>;
+    using SolutionVector = typename Assembler::SolutionVector;
+    using ElementBoundaryTypes = GetPropType<TypeTag, Properties::ElementBoundaryTypes>;
+
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using GridVolumeVariables = typename GridVariables::GridVolumeVariables;
+    using ElementVolumeVariables = typename GridVolumeVariables::LocalView;
+    using ElementFluxVariablesCache = typename GridVariables::GridFluxVariablesCache::LocalView;
+    using Scalar = typename GridVariables::Scalar;
+
+    using GridGeometry = typename GridVariables::GridGeometry;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using SubControlVolume = typename GridGeometry::SubControlVolume;
+    using SubControlVolumeFace = typename GridGeometry::SubControlVolumeFace;
+    using Extrusion = Extrusion_t<GridGeometry>;
+    using GridView = typename GridGeometry::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+
+    using CouplingManager = typename Assembler::CouplingManager;
+
+    static constexpr auto numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+
+public:
+    //! export the domain id of this sub-domain
+    static constexpr auto domainId = typename Dune::index_constant<id>();
+    //! pull up constructor of parent class
+    using ParentType::ParentType;
+    //! export element residual vector type
+    using ElementResidualVector = typename ParentType::LocalResidual::ElementResidualVector;
+
+    // the constructor
+    explicit SubDomainCVFELocalAssemblerBase(
+        const Assembler& assembler,
+        const Element& element,
+        const SolutionVector& curSol,
+        CouplingManager& couplingManager
+    )
+    : ParentType(assembler,
+                 element,
+                 curSol,
+                 localView(assembler.gridGeometry(domainId)),
+                 localView(assembler.gridVariables(domainId).curGridVolVars()),
+                 localView(assembler.gridVariables(domainId).gridFluxVarsCache()),
+                 assembler.localResidual(domainId),
+                 (element.partitionType() == Dune::GhostEntity),
+                 assembler.isImplicit())
+    , couplingManager_(couplingManager)
+    {}
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix. The element residual is written into the right hand side.
+     */
+    template<class JacobianMatrixRow, class SubResidualVector, class GridVariablesTuple, class StageParams>
+    void assembleJacobianAndResidual(JacobianMatrixRow& jacRow, SubResidualVector& res, GridVariablesTuple& gridVariables,
+                                     const StageParams& stageParams, SubResidualVector& temporal, SubResidualVector& spatial,
+                                     SubResidualVector& constrainedDofs)
+    {
+        auto assembleCouplingBlocks = [&](const auto& residual)
+        {
+            // assemble the coupling blocks
+            using namespace Dune::Hybrid;
+            forEach(integralRange(Dune::Hybrid::size(jacRow)), [&](auto&& i)
+            {
+                if constexpr (std::decay_t<decltype(i)>{} != id)
+                    this->assembleJacobianCoupling(i, jacRow, residual, gridVariables);
+            });
+        };
+
+        // the coupled model does not support partial reassembly yet
+        const DefaultPartialReassembler* noReassembler = nullptr;
+        ParentType::assembleJacobianAndResidual(
+            jacRow[domainId], res,
+            *std::get<domainId>(gridVariables),
+            stageParams, temporal, spatial,
+            constrainedDofs,
+            noReassembler,
+            assembleCouplingBlocks
+        );
+    }
+
+    /*!
+     * \brief Assemble the entries in a coupling block of the jacobian.
+     *        There is no coupling block between a domain and itself.
+     */
+    template<std::size_t otherId, class JacRow, class GridVariables>
+    void assembleJacobianCoupling(Dune::index_constant<otherId> domainJ, JacRow& jacRow,
+                                  const ElementResidualVector& res, GridVariables& gridVariables)
+    {
+        if constexpr (id != otherId)
+            this->asImp_().assembleJacobianCoupling(domainJ, jacRow[domainJ], res, *std::get<domainId>(gridVariables));
+    }
+
+    /*!
+     * \brief Prepares all local views necessary for local assembly.
+     */
+    void bindLocalViews()
+    {
+        // get some references for convenience
+        const auto& element = this->element();
+        const auto& curSol = this->curSol(domainId);
+        auto&& fvGeometry = this->fvGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // bind the caches
+        couplingManager_.bindCouplingContext(domainId, element, this->assembler());
+        fvGeometry.bind(element);
+        if (std::abs(this->localResidual().spatialWeight()) < 1e-6)
+            curElemVolVars.bindElement(element, fvGeometry, curSol);
+        else
+        {
+            curElemVolVars.bind(element, fvGeometry, curSol);
+            elemFluxVarsCache.bind(element, fvGeometry, curElemVolVars);
+        }
+
+        this->elemBcTypes().update(problem(), this->element(), this->fvGeometry());
+    }
+
+    //! return reference to the underlying problem
+    template<std::size_t i = domainId>
+    const Problem& problem(Dune::index_constant<i> dId = domainId) const
+    { return this->assembler().problem(domainId); }
+
+    //! return reference to the underlying problem
+    template<std::size_t i = domainId>
+    const auto& curSol(Dune::index_constant<i> dId = domainId) const
+    { return ParentType::curSol()[dId]; }
+
+    //! return reference to the coupling manager
+    CouplingManager& couplingManager()
+    { return couplingManager_; }
+
+private:
+    CouplingManager& couplingManager_; //!< the coupling manager
+};
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \ingroup MultiDomain
+ * \brief The CVFE scheme multidomain local assembler
+ * \tparam id the id of the sub domain
+ * \tparam TypeTag the TypeTag
+ * \tparam Assembler the assembler type
+ * \tparam DM the numeric differentiation method
+ */
+template<std::size_t id, class TypeTag, class Assembler, DiffMethod DM = DiffMethod::numeric>
+class SubDomainCVFELocalAssembler;
+
+/*!
+ * \ingroup Assembly
+ * \ingroup CVFEDiscretization
+ * \ingroup MultiDomain
+ * \brief CVFE scheme multi domain local assembler using numeric differentiation
+ */
+template<std::size_t id, class TypeTag, class Assembler>
+class SubDomainCVFELocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>
+: public SubDomainCVFELocalAssemblerBase<id, TypeTag, Assembler,
+             SubDomainCVFELocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>, DiffMethod::numeric>
+{
+    using ThisType = SubDomainCVFELocalAssembler<id, TypeTag, Assembler, DiffMethod::numeric>;
+    using ParentType = SubDomainCVFELocalAssemblerBase<id, TypeTag, Assembler, ThisType, DiffMethod::numeric>;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using VolumeVariables = GetPropType<TypeTag, Properties::VolumeVariables>;
+
+    using ElementVolumeVariables = typename GetPropType<TypeTag, Properties::GridVolumeVariables>::LocalView;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using SubControlVolume = typename GridGeometry::SubControlVolume;
+    using GridView = typename GridGeometry::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+
+    static constexpr int numEq = GetPropType<TypeTag, Properties::ModelTraits>::numEq();
+    static constexpr int dim = GridView::dimension;
+
+    static constexpr bool enableGridFluxVarsCache = GetPropType<TypeTag, Properties::GridVariables>::GridFluxVariablesCache::cachingEnabled;
+    static constexpr bool enableGridVolVarsCache = GetPropType<TypeTag, Properties::GridVariables>::GridVolumeVariables::cachingEnabled;
+    static constexpr auto domainI = Dune::index_constant<id>();
+
+public:
+    using ParentType::ParentType;
+    //! export element residual vector type
+    using ElementResidualVector = typename ParentType::LocalResidual::ElementResidualVector;
+
+    /*!
+     * \brief Update the coupling context for coupled models.
+     */
+    template<class ElemSol>
+    void maybeUpdateCouplingContext(const SubControlVolume& scv, ElemSol& elemSol, const int pvIdx)
+    {
+        if (this->assembler().isImplicit())
+            this->couplingManager().updateCouplingContext(domainI, *this, domainI, scv.dofIndex(), elemSol[scv.localDofIndex()], pvIdx);
+    }
+
+    /*!
+     * \brief Update the additional domain derivatives for coupled models.
+     */
+    template<class JacobianMatrixDiagBlock, class GridVariables>
+    void maybeEvalAdditionalDomainDerivatives(const ElementResidualVector& origResiduals, JacobianMatrixDiagBlock& A, GridVariables& gridVariables)
+    {
+        if (this->assembler().isImplicit())
+            this->couplingManager().evalAdditionalDomainDerivatives(domainI, *this, origResiduals, A, gridVariables);
+    }
+
+    /*!
+     * \brief Computes the derivatives with respect to the given element and adds them
+     *        to the global matrix.
+     */
+    template<std::size_t otherId, class JacobianBlock, class GridVariables>
+    void assembleJacobianCoupling(Dune::index_constant<otherId> domainJ, JacobianBlock& A,
+                                  const ElementResidualVector& res, GridVariables& gridVariables)
+    {
+        if (!this->assembler().isImplicit())
+            return;
+
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // Calculate derivatives of all dofs in the element with respect to all dofs in the coupling stencil. //
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        // get some aliases for convenience
+        const auto& element = this->element();
+        const auto& fvGeometry = this->fvGeometry();
+        auto&& curElemVolVars = this->curElemVolVars();
+        auto&& elemFluxVarsCache = this->elemFluxVarsCache();
+
+        // convenience lambda for call to update self
+        auto updateCoupledVariables = [&] ()
+        {
+            // Update ourself after the context has been modified. Depending on the
+            // type of caching, other objects might have to be updated. All ifs can be optimized away.
+            if constexpr (enableGridFluxVarsCache)
+            {
+                if constexpr (enableGridVolVarsCache)
+                    this->couplingManager().updateCoupledVariables(domainI, *this, gridVariables.curGridVolVars(), gridVariables.gridFluxVarsCache());
+                else
+                    this->couplingManager().updateCoupledVariables(domainI, *this, curElemVolVars, gridVariables.gridFluxVarsCache());
+            }
+            else
+            {
+                if constexpr (enableGridVolVarsCache)
+                    this->couplingManager().updateCoupledVariables(domainI, *this, gridVariables.curGridVolVars(), elemFluxVarsCache);
+                else
+                    this->couplingManager().updateCoupledVariables(domainI, *this, curElemVolVars, elemFluxVarsCache);
+            }
+        };
+
+        // get element stencil information
+        const auto& stencil = this->couplingManager().couplingStencil(domainI, element, domainJ);
+        const auto& curSolJ = this->curSol(domainJ);
+        for (const auto globalJ : stencil)
+        {
+            // undeflected privars and privars to be deflected
+            const auto origPriVarsJ = curSolJ[globalJ];
+            auto priVarsJ = origPriVarsJ;
+
+            // the undeflected coupling residual
+            const auto origResidual = this->couplingManager().evalCouplingResidual(domainI, *this, domainJ, globalJ);
+
+            for (int pvIdx = 0; pvIdx < JacobianBlock::block_type::cols; ++pvIdx)
+            {
+                auto evalCouplingResidual = [&](Scalar priVar)
+                {
+                    priVarsJ[pvIdx] = priVar;
+                    this->couplingManager().updateCouplingContext(domainI, *this, domainJ, globalJ, priVarsJ, pvIdx);
+                    updateCoupledVariables();
+                    return this->couplingManager().evalCouplingResidual(domainI, *this, domainJ, globalJ);
+                };
+
+                // derive the residuals numerically
+                ElementResidualVector partialDerivs(fvGeometry.numScv());
+
+                const auto& paramGroup = this->assembler().problem(domainJ).paramGroup();
+                static const int numDiffMethod = getParamFromGroup<int>(paramGroup, "Assembly.NumericDifferenceMethod");
+                static const auto epsCoupl = this->couplingManager().numericEpsilon(domainJ, paramGroup);
+
+                NumericDifferentiation::partialDerivative(evalCouplingResidual, origPriVarsJ[pvIdx], partialDerivs, origResidual,
+                                                          epsCoupl(origPriVarsJ[pvIdx], pvIdx), numDiffMethod);
+
+                // update the global stiffness matrix with the current partial derivatives
+                for (const auto& scv : scvs(fvGeometry))
+                {
+                    for (int eqIdx = 0; eqIdx < numEq; eqIdx++)
+                    {
+                        // A[i][col][eqIdx][pvIdx] is the rate of change of
+                        // the residual of equation 'eqIdx' at dof 'i'
+                        // depending on the primary variable 'pvIdx' at dof
+                        // 'col'.
+                        A[scv.dofIndex()][globalJ][eqIdx][pvIdx] += partialDerivs[scv.localDofIndex()][eqIdx];
+
+                        // If the dof is coupled by a Dirichlet condition,
+                        // set the derived value only once (i.e. overwrite existing values).
+                        if (this->elemBcTypes().hasDirichlet())
+                        {
+                            const auto bcTypes = this->elemBcTypes().get(fvGeometry, scv);
+                            if (bcTypes.isCouplingDirichlet(eqIdx))
+                                A[scv.dofIndex()][globalJ][eqIdx][pvIdx] = partialDerivs[scv.localDofIndex()][eqIdx];
+                            else if (bcTypes.isDirichlet(eqIdx))
+                                A[scv.dofIndex()][globalJ][eqIdx][pvIdx] = 0.0;
+                        }
+
+                        // enforce internal Dirichlet constraints
+                        if constexpr (Problem::enableInternalDirichletConstraints())
+                        {
+                            const auto internalDirichletConstraints = this->problem().hasInternalDirichletConstraint(this->element(), scv);
+                            if (internalDirichletConstraints[eqIdx])
+                                A[scv.dofIndex()][globalJ][eqIdx][pvIdx] = 0.0;
+                        }
+
+                    }
+                }
+
+                // restore the current element solution
+                priVarsJ[pvIdx] = origPriVarsJ[pvIdx];
+
+                // restore the undeflected state of the coupling context
+                this->couplingManager().updateCouplingContext(domainI, *this, domainJ, globalJ, priVarsJ, pvIdx);
+            }
+
+            // Restore original state of the flux vars cache and/or vol vars.
+            // This has to be done in case they depend on variables of domainJ before
+            // we continue with the numeric derivative w.r.t the next globalJ. Otherwise,
+            // the next "origResidual" will be incorrect.
+            updateCoupledVariables();
+        }
+    }
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/dumux/experimental/timestepping/multistagetimestepper.hh b/dumux/experimental/timestepping/multistagetimestepper.hh
index 95341c0afa..643b487190 100644
--- a/dumux/experimental/timestepping/multistagetimestepper.hh
+++ b/dumux/experimental/timestepping/multistagetimestepper.hh
@@ -16,11 +16,9 @@
 #include <vector>
 #include <cmath>
 
-namespace Dumux::Experimental {
+#include <dumux/experimental/timestepping/multistagemethods.hh>
 
-//! forward declaration
-template<class Scalar>
-class MultiStageMethod;
+namespace Dumux::Experimental {
 
 //! Data object for the parameters of a given stage
 template<class Scalar>
@@ -39,7 +37,7 @@ public:
         for (std::size_t k = 0; k < size_; ++k)
         {
             auto& p = params_[k];
-            p.alpha = m.temporalWeight(i, k);
+            p.alpha = m.temporalWeight(i, k);///dt;
             p.betaDt = m.spatialWeight(i, k)*dt;
             p.timeAtStage = t + m.timeStepWeight(k)*dt;
             p.dtFraction = m.timeStepWeight(k);
@@ -92,7 +90,7 @@ template<class PDESolver>
 class MultiStageTimeStepper
 {
     using Variables = typename PDESolver::Variables;
-    using Scalar = typename Variables::Scalar;
+    using Scalar = double;//typename Variables::Scalar;
     using StageParams = MultiStageParams<Scalar>;
 
 public:
@@ -107,7 +105,11 @@ public:
                           std::shared_ptr<const MultiStageMethod<Scalar>> msMethod)
     : pdeSolver_(pdeSolver)
     , msMethod_(msMethod)
-    {}
+    {
+        std::cout << "Initialize time stepper with method " << msMethod_->name()
+                  << Fmt::format(" ({} stage{})", msMethod_->numStages(), (msMethod_->numStages() > 1 ? "s" : ""))
+                  << std::endl;
+    }
 
     /*!
      * \brief Advance one time step of the given time loop
@@ -124,11 +126,11 @@ public:
 
         for (auto stageIdx = 1UL; stageIdx <= msMethod_->numStages(); ++stageIdx)
         {
-            // extract parameters for this stage from the time stepping method
-            auto stageParams = std::make_shared<StageParams>(*msMethod_, stageIdx, t, dt);
-
             // prepare the assembler for this stage
-            pdeSolver_->assembler().prepareStage(vars, stageParams);
+            pdeSolver_->assembler().prepareStage(
+                vars,
+                std::make_shared<StageParams>(*msMethod_, stageIdx, t, dt)
+            );
 
             // assemble & solve
             pdeSolver_->solve(vars);
@@ -138,12 +140,6 @@ public:
         pdeSolver_->assembler().clearStages();
     }
 
-    /*!
-     * \brief Set/change the time step method
-     */
-    void setMethod(std::shared_ptr<const MultiStageMethod<Scalar>> msMethod)
-    { msMethod_ = msMethod; }
-
 private:
     std::shared_ptr<PDESolver> pdeSolver_;
     std::shared_ptr<const MultiStageMethod<Scalar>> msMethod_;
-- 
GitLab