diff --git a/dumux/assembly/staggeredlocalassembler.hh b/dumux/assembly/staggeredlocalassembler.hh
index a725718f766d574b8cbaaee0fd2ce6dfb43f27ea..13ac1307d6d0c4160dd98f4584cf95ca37eb1a3f 100644
--- a/dumux/assembly/staggeredlocalassembler.hh
+++ b/dumux/assembly/staggeredlocalassembler.hh
@@ -397,7 +397,7 @@ static void dCCdFace_(Assembler& assembler,
            priVars[pvIdx] += eps;
 
            faceSolution[globalJ][pvIdx] += eps;
-           faceVars.update(scvfJ, faceSolution);
+           faceVars.update(faceSolution, problem, element, fvGeometry, scvfJ);
 
            const auto deflectedResidual = localResidual.evalCellCenter(problem, element, fvGeometry,
                                           prevElemVolVars, curElemVolVars,
@@ -526,7 +526,7 @@ static void dFacedFace_(Assembler& assembler,
                const Scalar eps = numericEpsilon(faceSolution[globalJ][pvIdx], faceIdx, faceIdx);
 
                faceSolution[globalJ][pvIdx] += eps;
-               faceVars.update(scvf, faceSolution);
+               faceVars.update(faceSolution, problem, element, fvGeometry, scvf);
 
                const auto deflectedResidual = localResidual.evalFace(problem, element, fvGeometry, scvf,
                                               prevElemVolVars, curElemVolVars,
diff --git a/dumux/discretization/staggered/facesolution.hh b/dumux/discretization/staggered/facesolution.hh
index 386dbbece0b4e68ffcfccdaea76ac439b5d06919..d36ea605aa5dd38704e9f01a6c16e3e82e6f8ccd 100644
--- a/dumux/discretization/staggered/facesolution.hh
+++ b/dumux/discretization/staggered/facesolution.hh
@@ -52,8 +52,8 @@ public:
         const auto& connectivityMap = fvGridGeometry.connectivityMap();
         const auto& stencil = connectivityMap(faceIdx, faceIdx, scvf.index());
 
-        facePriVars_.clear();
-        map_.clear();
+        facePriVars_.reserve(stencil.size());
+        map_.reserve(stencil.size());
 
         for(const auto dofJ : stencil)
         {
@@ -85,7 +85,6 @@ private:
 
     std::vector<FacePrimaryVariables> facePriVars_;
     std::vector<unsigned int> map_;
-
 };
 
 } // end namespace
diff --git a/dumux/discretization/staggered/freeflow/facevariables.hh b/dumux/discretization/staggered/freeflow/facevariables.hh
index 3cb1a10f675d9d5eeb19733feef4011b42c52e9a..f696873cfeda070a0f5712b7d064b12cb22819e0 100644
--- a/dumux/discretization/staggered/freeflow/facevariables.hh
+++ b/dumux/discretization/staggered/freeflow/facevariables.hh
@@ -37,35 +37,75 @@ template<class TypeTag>
 class StaggeredFaceVariables
 {
     using Scalar = typename GET_PROP_TYPE(TypeTag, Scalar);
+    using Problem = typename GET_PROP_TYPE(TypeTag, Problem);
     using GridView = typename GET_PROP_TYPE(TypeTag, GridView);
     using FacePrimaryVariables = typename GET_PROP_TYPE(TypeTag, FacePrimaryVariables);
     using SubControlVolumeFace = typename GET_PROP_TYPE(TypeTag, SubControlVolumeFace);
+    using FVElementGeometry = typename GET_PROP_TYPE(TypeTag, FVElementGeometry);
 
     static constexpr int dimWorld = GridView::dimensionworld;
     static constexpr int numPairs = (dimWorld == 2) ? 2 : 4;
 
-public:
+    using GlobalPosition = Dune::FieldVector<Scalar, dimWorld>;
+    using Element = typename GridView::template Codim<0>::Entity;
 
-    void update(const FacePrimaryVariables &facePrivars)
-    {
-        velocitySelf_ = facePrivars;
-    }
+    using DofTypeIndices = typename GET_PROP(TypeTag, DofTypeIndices);
+    typename DofTypeIndices::CellCenterIdx cellCenterIdx;
+    typename DofTypeIndices::FaceIdx faceIdx;
+
+public:
 
     template<class SolVector>
-    void update(const SubControlVolumeFace& scvf, const SolVector& faceSol)
+    void update(const SolVector& faceSol,
+                const Problem& problem,
+                const Element& element,
+                const FVElementGeometry& fvGeometry,
+                const SubControlVolumeFace& scvf)
     {
         velocitySelf_ = faceSol[scvf.dofIndex()];
         velocityOpposite_ = faceSol[scvf.dofIndexOpposingFace()];
 
-        for(int i = 0; i < scvf.pairData().size(); ++i)
+        // lambda to conveniently create a ghost face which is outside the domain, parallel to the scvf of interest
+        auto makeGhostFace = [](const auto& pos)
         {
-            velocityNormalInside_[i] = faceSol[scvf.pairData(i).normalPair.first];
+            return SubControlVolumeFace(pos, std::vector<unsigned int>{0,0});
+        };
 
-            if(scvf.pairData(i).normalPair.second >= 0)
-                velocityNormalOutside_[i] = faceSol[scvf.pairData(i).normalPair.second];
+        // lambda to check whether there is a parallel face neighbor
+        auto hasParallelNeighbor = [](const auto& subFaceData)
+        {
+            return subFaceData.outerParallelFaceDofIdx >= 0;
+        };
+
+        // lambda to check whether there is a normal face neighbor
+        auto hasNormalNeighbor = [](const auto& subFaceData)
+        {
+            return subFaceData.normalPair.second >= 0;
+        };
 
-            if(scvf.pairData(i).outerParallelFaceDofIdx >= 0)
-                velocityParallel_[i] = faceSol[scvf.pairData(i).outerParallelFaceDofIdx];
+        // handle all sub faces
+        for(int i = 0; i < scvf.pairData().size(); ++i)
+        {
+            const auto& subFaceData = scvf.pairData(i);
+
+            // treat the velocities normal to the face
+            velocityNormalInside_[i] = faceSol[subFaceData.normalPair.first];
+
+            if(hasNormalNeighbor(subFaceData))
+            {
+                velocityNormalOutside_[i] = faceSol[subFaceData.normalPair.second];
+            }
+            else
+            {
+                const auto& normalFace = fvGeometry.scvf(scvf.insideScvIdx(), subFaceData.localNormalFaceIdx);
+                const auto normalDirIdx = normalFace.directionIndex();
+                velocityNormalOutside_[i] = problem.dirichlet(element, makeGhostFace(subFaceData.virtualOuterNormalFaceDofPos))[faceIdx][normalDirIdx];
+            }
+
+            // treat the velocity parallel to the face
+            velocityParallel_[i] = hasParallelNeighbor(subFaceData) ?
+                                   velocityParallel_[i] = faceSol[subFaceData.outerParallelFaceDofIdx] :
+                                   problem.dirichlet(element, makeGhostFace(subFaceData.virtualOuterParallelFaceDofPos))[faceIdx][scvf.directionIndex()];
         }
     }
 
@@ -95,6 +135,7 @@ public:
     }
 
 private:
+
     Scalar velocitySelf_;
     Scalar velocityOpposite_;
     std::array<Scalar, numPairs> velocityParallel_;
diff --git a/dumux/discretization/staggered/globalfacevariables.hh b/dumux/discretization/staggered/globalfacevariables.hh
index 3f8edb66db81e367e72621e29fa37ca495303446..22985ad413ce474ffc010df5f65004ba174834b1 100644
--- a/dumux/discretization/staggered/globalfacevariables.hh
+++ b/dumux/discretization/staggered/globalfacevariables.hh
@@ -65,7 +65,7 @@ public:
 
             for(auto&& scvf : scvfs(fvGeometry))
             {
-                faceVariables_[scvf.index()].update(scvf, faceSol);
+                faceVariables_[scvf.index()].update(faceSol, problem_(), element, fvGeometry, scvf);
             }
         }
     }
diff --git a/dumux/freeflow/staggered/fluxvariables.hh b/dumux/freeflow/staggered/fluxvariables.hh
index e8e8bb68690e0a7e82bf440f7c5d1b4c86e9312f..a85555cdc4279fbe4201b78ae96dbff8f178247b 100644
--- a/dumux/freeflow/staggered/fluxvariables.hh
+++ b/dumux/freeflow/staggered/fluxvariables.hh
@@ -74,6 +74,7 @@ class FreeFlowFluxVariablesImpl<TypeTag, false>
     using Stencil = std::vector<IndexType>;
 
     using ElementFaceVariables = typename GET_PROP_TYPE(TypeTag, ElementFaceVariables);
+    using FaceVariables = typename GET_PROP_TYPE(TypeTag, FaceVariables);
 
     static constexpr bool navierStokes = GET_PROP_VALUE(TypeTag, EnableInertiaTerms);
 
@@ -288,42 +289,25 @@ public:
 
 private:
 
-  template<class FaceVars>
   FacePrimaryVariables computeAdvectivePartOfTangentialMomentumFlux_(const Problem& problem,
                                                                      const Element& element,
                                                                      const SubControlVolumeFace& scvf,
                                                                      const SubControlVolumeFace& normalFace,
                                                                      const ElementVolumeVariables& elemVolVars,
-                                                                     const FaceVars& faceVars,
+                                                                     const FaceVariables& faceVars,
                                                                      const int localSubFaceIdx)
   {
       const Scalar transportingVelocity = faceVars.velocityNormalInside(localSubFaceIdx);
       const auto insideScvIdx = normalFace.insideScvIdx();
       const auto outsideScvIdx = normalFace.outsideScvIdx();
 
-      // lambda to conveniently create a ghost face which is outside the domain, parallel to the scvf of interest
-    //   auto makeGhostFace = [insideScvIdx] (const GlobalPosition& pos)
-    //   {
-    //       return SubControlVolumeFace(pos, std::vector<unsigned int>{insideScvIdx,insideScvIdx});
-    //   };
-
       const bool innerElementIsUpstream = ( sign(normalFace.outerNormalScalar()) == sign(transportingVelocity) );
 
       const auto& upVolVars = innerElementIsUpstream ? elemVolVars[insideScvIdx] : elemVolVars[outsideScvIdx];
 
-      Scalar transportedVelocity(0.0);
-
-      if(innerElementIsUpstream)
-          transportedVelocity = faceVars.velocitySelf();
-      else
-      {
-          const int outerDofIdx = scvf.pairData(localSubFaceIdx).outerParallelFaceDofIdx;
-          if(outerDofIdx >= 0)
-              transportedVelocity = faceVars.velocityParallel(localSubFaceIdx);
-          else // this is the case when the outer parallal dof would lie outside the domain TODO: discuss which one is better
-            //   transportedVelocity = problem.dirichlet(makeGhostFace(subFaceData.virtualOuterParallelFaceDofPos))[faceIdx][scvf.directionIndex()];
-              transportedVelocity = problem.dirichlet(element, scvf)[faceIdx][scvf.directionIndex()];
-      }
+      const Scalar transportedVelocity = innerElementIsUpstream ?
+                                         faceVars.velocitySelf() :
+                                         faceVars.velocityParallel(localSubFaceIdx);
 
       const Scalar momentum = upVolVars.density() * transportedVelocity;
       const int sgn = sign(normalFace.outerNormalScalar());
@@ -331,41 +315,28 @@ private:
       return transportingVelocity * momentum * sgn * normalFace.area() * 0.5;
   }
 
-  template<class FaceVars>
   FacePrimaryVariables computeDiffusivePartOfTangentialMomentumFlux_(const Problem& problem,
                                                                      const Element& element,
                                                                      const SubControlVolumeFace& scvf,
                                                                      const SubControlVolumeFace& normalFace,
                                                                      const ElementVolumeVariables& elemVolVars,
-                                                                     const FaceVars& faceVars,
+                                                                     const FaceVariables& faceVars,
                                                                      const int localSubFaceIdx)
   {
       FacePrimaryVariables tangentialDiffusiveFlux(0.0);
 
-      const auto normalDirIdx = normalFace.directionIndex();
       const auto insideScvIdx = normalFace.insideScvIdx();
       const auto outsideScvIdx = normalFace.outsideScvIdx();
 
       const auto& insideVolVars = elemVolVars[insideScvIdx];
       const auto& outsideVolVars = elemVolVars[outsideScvIdx];
 
-      // lambda to conveniently create a ghost face which is outside the domain, parallel to the scvf of interest
-      auto makeGhostFace = [insideScvIdx] (const GlobalPosition& pos)
-      {
-          return SubControlVolumeFace(pos, std::vector<unsigned int>{insideScvIdx,insideScvIdx});
-      };
-
       // the averaged viscosity at the face normal to our face of interest (where we assemble the face residual)
       const Scalar muAvg = (insideVolVars.viscosity() + outsideVolVars.viscosity()) * 0.5;
 
       // the normal derivative
-      const int outerNormalVelocityIdx = scvf.pairData(localSubFaceIdx).normalPair.second;
-
       const Scalar innerNormalVelocity = faceVars.velocityNormalInside(localSubFaceIdx);
-
-      const Scalar outerNormalVelocity = outerNormalVelocityIdx >= 0 ?
-                                  faceVars.velocityNormalOutside(localSubFaceIdx) :
-                                  problem.dirichlet(element, makeGhostFace(scvf.pairData(localSubFaceIdx).virtualOuterNormalFaceDofPos))[faceIdx][normalDirIdx];
+      const Scalar outerNormalVelocity = faceVars.velocityNormalOutside(localSubFaceIdx);
 
       const Scalar normalDeltaV = scvf.normalInPosCoordDir() ?
                                     (outerNormalVelocity - innerNormalVelocity) :
@@ -377,10 +348,7 @@ private:
       // the parallel derivative
       const Scalar innerParallelVelocity = faceVars.velocitySelf();
 
-      const int outerParallelFaceDofIdx = scvf.pairData(localSubFaceIdx).outerParallelFaceDofIdx;
-      const Scalar outerParallelVelocity = outerParallelFaceDofIdx >= 0 ?
-                                           faceVars.velocityParallel(localSubFaceIdx) :
-                                           problem.dirichlet(element, makeGhostFace(scvf.pairData(localSubFaceIdx).virtualOuterParallelFaceDofPos))[faceIdx][scvf.directionIndex()];
+      const Scalar outerParallelVelocity = faceVars.velocityParallel(localSubFaceIdx);
 
       const Scalar parallelDeltaV = normalFace.normalInPosCoordDir() ?
                                    (outerParallelVelocity - innerParallelVelocity) :