diff --git a/dumux/common/volumevariables.hh b/dumux/common/volumevariables.hh
new file mode 100644
index 0000000000000000000000000000000000000000..2b74ee8f019c1c92315341d1b000d6e342326e27
--- /dev/null
+++ b/dumux/common/volumevariables.hh
@@ -0,0 +1,67 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*****************************************************************************
+ *   See the file COPYING for full copying permissions.                      *
+ *                                                                           *
+ *   This program is free software: you can redistribute it and/or modify    *
+ *   it under the terms of the GNU General Public License as published by    *
+ *   the Free Software Foundation, either version 3 of the License, or       *
+ *   (at your option) any later version.                                     *
+ *                                                                           *
+ *   This program is distributed in the hope that it will be useful,         *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the            *
+ *   GNU General Public License for more details.                            *
+ *                                                                           *
+ *   You should have received a copy of the GNU General Public License       *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.   *
+ *****************************************************************************/
+/*!
+ * \file
+ * \ingroup Common
+ * \brief Basic volume variables for finite volume methods
+ */
+#ifndef DUMUX_COMMON_BASIC_VOLUME_VARIABLES_HH
+#define DUMUX_COMMON_BASIC_VOLUME_VARIABLES_HH
+
+#include <memory>
+
+namespace Dumux {
+
+template <class Traits>
+class BasicVolumeVariables
+{
+    using Scalar = typename Traits::PrimaryVariables::value_type;
+public:
+    //! export the type used for the primary variables
+    using PrimaryVariables = typename Traits::PrimaryVariables;
+
+    /*!
+     * \brief Update all quantities for a given control volume
+     */
+    template<class ElementSolution, class Problem, class Element, class SubControlVolume>
+    void update(const ElementSolution& elemSol,
+                const Problem& problem,
+                const Element& element,
+                const SubControlVolume& scv)
+    {
+        priVars_ = elemSol[scv.indexInElement()];
+    }
+
+    Scalar priVar(const int pvIdx) const
+    { return priVars_[pvIdx]; }
+
+    const PrimaryVariables& priVars() const
+    { return priVars_; }
+
+    // for compatibility with more general models
+    Scalar extrusionFactor() const
+    { return 1.0; }
+
+private:
+    PrimaryVariables priVars_;
+};
+
+} // end namespace Dumux
+
+#endif
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index e6ae059341089865f4e747a175ba6b324157cfa7..fbf20b2458099cde5fd04af2561380804f40aedb 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_subdirectory(2pinfiltration)
 add_subdirectory(1ptracer)
 add_subdirectory(biomineralization)
+add_subdirectory(diffusion)
 add_subdirectory(embedded_network_1d3d)
 add_subdirectory(shallowwaterfriction)
 add_subdirectory(freeflowchannel)
diff --git a/examples/README.md b/examples/README.md
index 5d73eb799f25e29ec452d03b0d92f1fe321148c7..beb935118d8db8a5d64f7bcedeb2a8c74f17f499 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -5,7 +5,29 @@ Click on the respective example to get to a detailed documentation of the exampl
 For each example in this overview, the model equations and discretization method are described in words
 and the DuMu<sup>x</sup> name of the model is given in parenthesis: e.g. (`OneP`) / (`CCTpfaModel`).
 
-### [:open_file_folder: Example 1: One-phase flow and tracer transport](1ptracer/README.md)
+### [:open_file_folder: Example 1: Diffusion equation](diffusion/README.md)
+<table><tr><td>
+
+In this example we create a diffusion equation model and then simulate a diffusive process.
+
+You learn how to
+
+* setup a new simple model equation (diffusion equation)
+* read parameters from a configuration file
+* create a type tag and specialize properties for it
+* generate a randomly distributed initial field (with MPI parallelism)
+* solve a time-dependent diffusion problem in parallel
+
+__Model equations:__ A diffusion equation model fully developed and contained within the example<br />
+__Discretization method:__ Vertex-centered finite volumes / control-volume finite elements (Lagrange, P1) (`BoxModel`)
+
+</td>
+<td width="35%"><a href="diffusion/README.md">
+<figure><img src="diffusion/img/diffusion.gif" alt="diffusion"/></figure></td>
+</a></td>
+</tr></table>
+
+### [:open_file_folder: Example 2: One-phase flow and tracer transport](1ptracer/README.md)
 
 <table><tr><td>
 
@@ -29,7 +51,7 @@ __Discretization method:__ Cell-centered finite volumes with two-point flux appr
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 2: Two-phase flow with infiltration and adaptive grid](2pinfiltration/README.md)
+### [:open_file_folder: Example 3: Two-phase flow with infiltration and adaptive grid](2pinfiltration/README.md)
 
 <table><tr><td>
 
@@ -47,14 +69,13 @@ You learn how to
 
 __Model equations:__ Immiscible two-phase flow Darcy equations in porous media (`TwoP`)<br />
 __Discretization method:__ Cell-centered finite volumes with two-point flux approximation (`CCTpfaModel`)
-
 </td>
 <td width="35%"><a href="2pinfiltration/README.md">
 <figure><img src="2pinfiltration/img/test_2p_pointsource_adaptive.png" alt="2p result"/></figure>
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 3: Shallow water model](shallowwaterfriction/README.md)
+### [:open_file_folder: Example 4: Shallow water model](shallowwaterfriction/README.md)
 
 <table><tr><td>
 
@@ -74,7 +95,7 @@ __Discretization method:__ Cell-centered finite volumes with Riemann solver (`CC
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 4: Freeflow channel](freeflowchannel/README.md)
+### [:open_file_folder: Example 5: Freeflow channel](freeflowchannel/README.md)
 
 <table><tr><td>
 
@@ -94,7 +115,7 @@ __Discretization method:__ Finite volumes with staggered grid arrangement (`Stag
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 5: One-phase flow with rotation-symmetric solution](1protationsymmetry/README.md)
+### [:open_file_folder: Example 6: One-phase flow with rotation-symmetric solution](1protationsymmetry/README.md)
 
 <table><tr><td>
 
@@ -115,7 +136,7 @@ __Discretization method:__ Vertex-centered finite volumes / control-volume finit
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 6: Biomineralization](biomineralization/README.md)
+### [:open_file_folder: Example 7: Biomineralization](biomineralization/README.md)
 
 <table><tr><td>
 
@@ -140,7 +161,7 @@ __Discretization method:__ Vertex-centered finite volumes / control-volume finit
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 7: Lid-driven cavity](liddrivencavity/README.md)
+### [:open_file_folder: Example 8: Lid-driven cavity](liddrivencavity/README.md)
 
 <table><tr><td>
 
@@ -161,7 +182,7 @@ __Discretization method:__ Finite volumes with staggered grid arrangement (`Stag
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 8: Permeability estimation using a pore-network model](porenetwork_upscaling/README.md)
+### [:open_file_folder: Example 9: Permeability estimation using a pore-network model](porenetwork_upscaling/README.md)
 
 <table><tr><td>
 
@@ -182,7 +203,7 @@ __Discretization method:__ Pore-network (`PoreNetworkModel`)
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 9: Embedded network 1D-3D model (tissue perfusion)](embedded_network_1d3d/README.md)
+### [:open_file_folder: Example 10: Embedded network 1D-3D model (tissue perfusion)](embedded_network_1d3d/README.md)
 
 <table><tr><td>
 
diff --git a/examples/diffusion/.doc_config b/examples/diffusion/.doc_config
new file mode 100644
index 0000000000000000000000000000000000000000..41c2bd836b90be017f3c3c2e63db165b4af5784e
--- /dev/null
+++ b/examples/diffusion/.doc_config
@@ -0,0 +1,27 @@
+{
+    "README.md": [
+        "doc/_intro.md"
+    ],
+
+    "doc/model.md" : [
+        "doc/_model.md",
+        "model.hh"
+    ],
+
+    "doc/main.md" : [
+        "doc/_main.md",
+        "main.cc"
+    ],
+
+    "navigation" : {
+        "mainpage" : "README.md",
+        "subpages" : [
+            "doc/model.md",
+            "doc/main.md"
+        ],
+        "subtitles" : [
+            "Model implementation",
+            "Main program"
+        ]
+    }
+}
diff --git a/examples/diffusion/CMakeLists.txt b/examples/diffusion/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb8baf078ebc701cf3dcaa103b5e7f3bf3c7f45c
--- /dev/null
+++ b/examples/diffusion/CMakeLists.txt
@@ -0,0 +1,29 @@
+dune_symlink_to_source_files(FILES "params.input")
+
+# To add an example it would be enough to have
+# dumux_add_test(NAME example_diffusion SOURCES main.cc)
+# But we also make sure with automated testing that the example keep compiling
+# and produce the same result, so here is a regression test:
+dumux_add_test(NAME example_diffusion
+               LABELS example
+               SOURCES main.cc
+               COMMAND ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
+               CMD_ARGS --script fuzzy
+                        --files ${CMAKE_SOURCE_DIR}/test/references/example_diffusion-reference.vtu
+                                ${CMAKE_CURRENT_BINARY_DIR}/diffusion-00050.vtu
+                        --command "${CMAKE_CURRENT_BINARY_DIR}/example_diffusion params.input
+                                   -Grid.Cells \"6 6\" -Problem.DiffusionCoefficient 0.001")
+
+# We also add a parallel test to make sure the parallel version also works
+dumux_add_test(NAME example_diffusion_parallel
+               LABELS example
+               CMAKE_GUARD MPI_FOUND
+               TARGET example_diffusion
+               COMMAND ${CMAKE_SOURCE_DIR}/bin/testing/runtest.py
+               CMD_ARGS --script fuzzy
+                        --files ${CMAKE_SOURCE_DIR}/test/references/example_diffusion_parallel_p0-reference.vtu
+                                ${CMAKE_CURRENT_BINARY_DIR}/s0002-p0000-diffusion_parallel-00050.vtu
+                                ${CMAKE_SOURCE_DIR}/test/references/example_diffusion_parallel_p1-reference.vtu
+                                ${CMAKE_CURRENT_BINARY_DIR}/s0002-p0001-diffusion_parallel-00050.vtu
+                        --command "${MPIEXEC} -np 2 ${CMAKE_CURRENT_BINARY_DIR}/example_diffusion params.input
+                                   -Grid.Cells \"6 6\" -Problem.DiffusionCoefficient 0.001 -Problem.Name diffusion_parallel")
diff --git a/examples/diffusion/README.md b/examples/diffusion/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c733e89b42237d090f66fba25a08b60a3a817e5d
--- /dev/null
+++ b/examples/diffusion/README.md
@@ -0,0 +1,161 @@
+<!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
+
+# Diffusion equation
+
+In this example, we create an application solving the diffusion equation.
+This example is implemented in three files, [`model.hh`](model.hh), [`params.input`](params.input), [`main.cc`](main.cc).
+The [`model.hh`](model.hh) header implements a simple self-contained diffusion equation model
+to be used with a control-volume finite element discretization like the Box method.
+The [`main.cc`](main.cc) source file contains the main program and [`params.input`](params.input) contains
+runtime parameters. The executable is configured in [`CMakeLists.txt`](CMakeLists.txt) and created with CMake.
+The source code will be discussed in Part 1 and Part 2 after an introduction to the equations and the problem description.
+
+__The main points illustrated in this example are__
+
+* setting up a simple model
+* creating a random initial solution in parallel
+* solving a time-dependent diffusion problem
+* using a linear PDE solver instance
+* reusing the Jacobian matrix for all time steps
+
+__Results__. The simulation result will look something like this:
+
+<figure>
+    <center>
+        <img src="img/diffusion.gif" alt="diffusion"/>
+        <figcaption> <b> Fig.1 </b> - Evolution of an initially random concentration field according to the diffusion equation.</figcaption>
+    </center>
+</figure>
+
+__Table of contents__. This description is structured as follows:
+
+[TOC]
+
+## Equation and problem description
+
+The scalar diffusion equation on a domain $\Omega \subset \mathbb{R}^2$ and time interval $(0,T]$
+with boundary $\partial\Omega = \Gamma_D \cup \Gamma_N$ composed of Dirichlet and Neumann boundaries,
+where for a position $x \in \Omega$ and time $t$, $c(x,t)$ denotes the unknown evolving concentration field, reads
+
+```math
+\begin{aligned}
+\frac{\partial c}{\partial t} - \nabla \cdot (D \nabla c) &= 0    && \mathrm{in}\; \Omega \times (0,T] \\
+                                                        c &= c_D  && \mathrm{on}\; \Gamma_D \times (0,T] \\
+                         -D \nabla c \cdot \boldsymbol{n} &= g_N  && \mathrm{on}\; \Gamma_N \times (0,T] \\
+                                                        c &= c^0  && \mathrm{for}\; t = 0,
+\end{aligned}
+```
+
+with diffusion coefficient $D$, Dirichlet data $c_D$, Neumann data $g_N$,
+and $\boldsymbol{n}$ denoting the outward-oriented unit normal vector on $\partial\Omega$.
+In this example, we will use homogenenous Neumann boundary conditions
+on all of $\partial\Omega$ such that $\Gamma_D = \emptyset$ and $g_N = 0$.
+The initial concentration field $c^0$ is a random field (the discrete control volume
+averages $c^0_{h,B}$ are sampled from a uniform distribution, $c^0_{h,B} \sim U(0,1)$).
+
+We partition $\Omega$ into control volumes $B$ (sometimes called "boxes").
+Integrating our equation over a control volume $B$ and applying the divergence theorem yields
+
+```math
+\begin{aligned}
+\int_B \frac{\partial c}{\partial t} \,\mathrm{d}V - \int_{\partial B} D \nabla c \cdot \boldsymbol{n} \,\mathrm{d}A &= 0.
+\end{aligned}
+```
+
+We discretize in time using an implicit Euler method
+
+```math
+\begin{aligned}
+\int_B \frac{c^{n+1}-c^{n}}{\Delta t} \,\mathrm{d}V - \int_{\partial B} D \nabla c^{n+1} \cdot \boldsymbol{n} \,\mathrm{d}A &= 0.
+\end{aligned}
+```
+
+For the Box method, the control volumes are centered at grid vertices. We partition the control volume boundary
+into sub control volume faces $\sigma$ such that each $\sigma$ belongs to exactly one grid element. The discrete solution
+$c_{h}$ is a piece-wise linear polynomial, and its restriction on an element $K$, denoted by $c_{h,K}$,
+is represented by a linear basis such that
+
+```math
+c_{h,K}(x) = \sum\limits_{B \in \mathcal{B}_K} c_{h,B} N_B(x)
+```
+
+where $\mathcal{B}_K$ is the set of control volumes intersecting element $K$ and $N_B$ are the basis functions associated
+with the vertex corresponding to the center of control volume $B$. Insertion of this ansatz yields the discrete equation
+
+```math
+\begin{aligned}
+\vert B \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{B}} \left[ D \nabla c_h^{n+1} \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right] &= 0,
+\end{aligned}
+```
+
+for each control volume $B$, where $\Sigma_{B}$ is the set of sub control volume faces on $\partial B$ and $\boldsymbol{n}_{B,\sigma}$ is the outward-oriented
+unit normal vector on $\partial B$. When assembling the discrete equation, we take an element-centered view. This means that we iterate
+over all elements $K$ in the mesh and for each element add the partial contributions
+
+```math
+\begin{aligned}
+\vert B \cap K \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{BK}} \left[ D \sum\limits_{B \in \mathcal{B}_K} \left\lbrace c_{h,B}^{n+1} \nabla N_B(x) \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right\rbrace \right] &= 0,
+\end{aligned}
+```
+
+to the control volume residual of control volume $B$, where $B \cap K$ is the sub control volume given by the intersection of $B$ and $K$
+and $\Sigma_{BK} \subset \Sigma_{B}$ is the set of sub control volume faces in $K$ belonging to $\partial B$.
+Computing the element-local contributions is described in Part 1.
+
+### Model parameters
+
+* $\Omega = [0,1]\times[0,1]$ (unit square)
+* End time $T = 5$ and time step size $\Delta t = 0.1$
+* $100 \times 100$ cells (structured Cartesian grid)
+* $D = 0.0001$
+* $c^0_{h,B} \sim U(0,1)$
+
+### Running the example in parallel
+
+By default Dumux will try to speed up the assembly by using shared memory parallelism if a suitable
+backend has been found on your system (one of TBB, OpenMP, Kokkos, C++ parallel algorithms).
+You can limit the number of threads by prepending your executable with `DUMUX_NUM_THREADS=<number>`.
+If you also want to use distributed memory parallelism with MPI (works better for solvers at the moment),
+run the executable with your MPI environment. Each MPI process will use multi-threading if
+`DUMUX_NUM_THREADS` is larger than $1$.
+
+Running the example with four MPI processes (distribution memory parallelism)
+each with two threads (shared memory parallelism):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion
+```
+
+You can set the parameter `Grid.Overlap` to some non-zero integer in [`params.input`](params.input)
+to turn the domain decomposition into an overlapping decomposition where
+`Grid.Overlap` specifies the number of grid cells in the overlap between processes.
+This can help to increase the convergence speed of the linear solver.
+
+__Tip__: To see the number of linear solver iteration, you can increase the solver verbosity level
+by adding to [`params.input`](params.input) the lines:
+
+```ini
+[LinearSolver]
+Verbosity = 1
+```
+
+or alternatively, run (here for an overlap size of $3$ elements):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion -LinearSolver.Verbosity 1 -Grid.Overlap 3
+```
+
+# Implementation
+
+For the code implementation see Part 1 and Part 2.
+
+## Part 1: Model implementation
+
+| [:arrow_right: Click to continue with part 1 of the documentation](doc/model.md) |
+|---:|
+
+
+## Part 2: Main program
+
+| [:arrow_right: Click to continue with part 2 of the documentation](doc/main.md) |
+|---:|
diff --git a/examples/diffusion/doc/_intro.md b/examples/diffusion/doc/_intro.md
new file mode 100644
index 0000000000000000000000000000000000000000..b899853eb0b9f756c630f1c827a5e5889c9ed3cb
--- /dev/null
+++ b/examples/diffusion/doc/_intro.md
@@ -0,0 +1,148 @@
+# Diffusion equation
+
+In this example, we create an application solving the diffusion equation.
+This example is implemented in three files, [`model.hh`](model.hh), [`params.input`](params.input), [`main.cc`](main.cc).
+The [`model.hh`](model.hh) header implements a simple self-contained diffusion equation model
+to be used with a control-volume finite element discretization like the Box method.
+The [`main.cc`](main.cc) source file contains the main program and [`params.input`](params.input) contains
+runtime parameters. The executable is configured in [`CMakeLists.txt`](CMakeLists.txt) and created with CMake.
+The source code will be discussed in Part 1 and Part 2 after an introduction to the equations and the problem description.
+
+__The main points illustrated in this example are__
+
+* setting up a simple model
+* creating a random initial solution in parallel
+* solving a time-dependent diffusion problem
+* using a linear PDE solver instance
+* reusing the Jacobian matrix for all time steps
+
+__Results__. The simulation result will look something like this:
+
+<figure>
+    <center>
+        <img src="img/diffusion.gif" alt="diffusion"/>
+        <figcaption> <b> Fig.1 </b> - Evolution of an initially random concentration field according to the diffusion equation.</figcaption>
+    </center>
+</figure>
+
+__Table of contents__. This description is structured as follows:
+
+[TOC]
+
+## Equation and problem description
+
+The scalar diffusion equation on a domain $\Omega \subset \mathbb{R}^2$ and time interval $(0,T]$
+with boundary $\partial\Omega = \Gamma_D \cup \Gamma_N$ composed of Dirichlet and Neumann boundaries,
+where for a position $x \in \Omega$ and time $t$, $c(x,t)$ denotes the unknown evolving concentration field, reads
+
+```math
+\begin{aligned}
+\frac{\partial c}{\partial t} - \nabla \cdot (D \nabla c) &= 0    && \mathrm{in}\; \Omega \times (0,T] \\
+                                                        c &= c_D  && \mathrm{on}\; \Gamma_D \times (0,T] \\
+                         -D \nabla c \cdot \boldsymbol{n} &= g_N  && \mathrm{on}\; \Gamma_N \times (0,T] \\
+                                                        c &= c^0  && \mathrm{for}\; t = 0,
+\end{aligned}
+```
+
+with diffusion coefficient $D$, Dirichlet data $c_D$, Neumann data $g_N$,
+and $\boldsymbol{n}$ denoting the outward-oriented unit normal vector on $\partial\Omega$.
+In this example, we will use homogenenous Neumann boundary conditions
+on all of $\partial\Omega$ such that $\Gamma_D = \emptyset$ and $g_N = 0$.
+The initial concentration field $c^0$ is a random field (the discrete control volume
+averages $c^0_{h,B}$ are sampled from a uniform distribution, $c^0_{h,B} \sim U(0,1)$).
+
+We partition $\Omega$ into control volumes $B$ (sometimes called "boxes").
+Integrating our equation over a control volume $B$ and applying the divergence theorem yields
+
+```math
+\begin{aligned}
+\int_B \frac{\partial c}{\partial t} \,\mathrm{d}V - \int_{\partial B} D \nabla c \cdot \boldsymbol{n} \,\mathrm{d}A &= 0.
+\end{aligned}
+```
+
+We discretize in time using an implicit Euler method
+
+```math
+\begin{aligned}
+\int_B \frac{c^{n+1}-c^{n}}{\Delta t} \,\mathrm{d}V - \int_{\partial B} D \nabla c^{n+1} \cdot \boldsymbol{n} \,\mathrm{d}A &= 0.
+\end{aligned}
+```
+
+For the Box method, the control volumes are centered at grid vertices. We partition the control volume boundary
+into sub control volume faces $\sigma$ such that each $\sigma$ belongs to exactly one grid element. The discrete solution
+$c_{h}$ is a piece-wise linear polynomial, and its restriction on an element $K$, denoted by $c_{h,K}$,
+is represented by a linear basis such that
+
+```math
+c_{h,K}(x) = \sum\limits_{B \in \mathcal{B}_K} c_{h,B} N_B(x)
+```
+
+where $\mathcal{B}_K$ is the set of control volumes intersecting element $K$ and $N_B$ are the basis functions associated
+with the vertex corresponding to the center of control volume $B$. Insertion of this ansatz yields the discrete equation
+
+```math
+\begin{aligned}
+\vert B \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{B}} \left[ D \nabla c_h^{n+1} \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right] &= 0,
+\end{aligned}
+```
+
+for each control volume $B$, where $\Sigma_{B}$ is the set of sub control volume faces on $\partial B$ and $\boldsymbol{n}_{B,\sigma}$ is the outward-oriented
+unit normal vector on $\partial B$. When assembling the discrete equation, we take an element-centered view. This means that we iterate
+over all elements $K$ in the mesh and for each element add the partial contributions
+
+```math
+\begin{aligned}
+\vert B \cap K \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{BK}} \left[ D \sum\limits_{B \in \mathcal{B}_K} \left\lbrace c_{h,B}^{n+1} \nabla N_B(x) \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right\rbrace \right] &= 0,
+\end{aligned}
+```
+
+to the control volume residual of control volume $B$, where $B \cap K$ is the sub control volume given by the intersection of $B$ and $K$
+and $\Sigma_{BK} \subset \Sigma_{B}$ is the set of sub control volume faces in $K$ belonging to $\partial B$.
+Computing the element-local contributions is described in Part 1.
+
+### Model parameters
+
+* $\Omega = [0,1]\times[0,1]$ (unit square)
+* End time $T = 5$ and time step size $\Delta t = 0.1$
+* $100 \times 100$ cells (structured Cartesian grid)
+* $D = 0.0001$
+* $c^0_{h,B} \sim U(0,1)$
+
+### Running the example in parallel
+
+By default Dumux will try to speed up the assembly by using shared memory parallelism if a suitable
+backend has been found on your system (one of TBB, OpenMP, Kokkos, C++ parallel algorithms).
+You can limit the number of threads by prepending your executable with `DUMUX_NUM_THREADS=<number>`.
+If you also want to use distributed memory parallelism with MPI (works better for solvers at the moment),
+run the executable with your MPI environment. Each MPI process will use multi-threading if
+`DUMUX_NUM_THREADS` is larger than $1$.
+
+Running the example with four MPI processes (distribution memory parallelism)
+each with two threads (shared memory parallelism):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion
+```
+
+You can set the parameter `Grid.Overlap` to some non-zero integer in [`params.input`](params.input)
+to turn the domain decomposition into an overlapping decomposition where
+`Grid.Overlap` specifies the number of grid cells in the overlap between processes.
+This can help to increase the convergence speed of the linear solver.
+
+__Tip__: To see the number of linear solver iteration, you can increase the solver verbosity level
+by adding to [`params.input`](params.input) the lines:
+
+```ini
+[LinearSolver]
+Verbosity = 1
+```
+
+or alternatively, run (here for an overlap size of $3$ elements):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion -LinearSolver.Verbosity 1 -Grid.Overlap 3
+```
+
+# Implementation
+
+For the code implementation see Part 1 and Part 2.
diff --git a/examples/diffusion/doc/_main.md b/examples/diffusion/doc/_main.md
new file mode 100644
index 0000000000000000000000000000000000000000..a5e58e79849017eb71736e21b4d9d06e01254c7d
--- /dev/null
+++ b/examples/diffusion/doc/_main.md
@@ -0,0 +1 @@
+# Diffusion equation main program
diff --git a/examples/diffusion/doc/_model.md b/examples/diffusion/doc/_model.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f465c4d9706034e266fdec12564aa52463410e5
--- /dev/null
+++ b/examples/diffusion/doc/_model.md
@@ -0,0 +1 @@
+# Diffusion equation model definition
diff --git a/examples/diffusion/doc/main.md b/examples/diffusion/doc/main.md
new file mode 100644
index 0000000000000000000000000000000000000000..28d3bd539ab5ae4faf80e587e086cb8d4ef1a25c
--- /dev/null
+++ b/examples/diffusion/doc/main.md
@@ -0,0 +1,426 @@
+<!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_left: Go back to part 1](model.md) |
+|---|---:|
+
+# Diffusion equation main program
+
+
+In the file `main.cc`, we use the previously defined model to
+setup the simulation. The setup consists of four steps:
+1. Define the problem setting boundary conditions and the diffusion coefficient
+2. Configure the property system reusing the model defined in Part 1
+3. Define a function for setting the random initial condition
+4. The main program defining all steps of the program
+
+__Table of contents__
+
+[TOC]
+
+We start in `main.cc` with the necessary header includes:
+<details><summary> Click to show includes</summary>
+
+```cpp
+
+#include <config.h>
+```
+
+Include the header containing std::true_type and std::false_type
+
+```cpp
+#include <type_traits>
+```
+
+Include the headers useful for the random initial solution
+
+```cpp
+#include <random>
+#include <dumux/common/random.hh>
+```
+
+As Dune grid interface implementation we will use
+the structured parallel grid manager YaspGrid
+
+```cpp
+#include <dune/grid/yaspgrid.hh>
+```
+
+Common includes for problem and main
+
+```cpp
+#include <dumux/common/initialize.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/boundarytypes.hh>
+#include <dumux/common/fvproblem.hh>
+
+// VTK output functionality
+#include <dumux/io/vtkoutputmodule.hh>
+#include <dumux/io/grid/gridmanager_yasp.hh>
+
+// the discretization method
+#include <dumux/discretization/box.hh>
+
+// assembly and solver
+#include <dumux/linear/linearsolvertraits.hh>
+#include <dumux/linear/linearalgebratraits.hh>
+#include <dumux/linear/istlsolvers.hh>
+#include <dumux/linear/pdesolver.hh>
+#include <dumux/assembly/fvassembler.hh>
+```
+
+
+Finally, we include the model defined in Part 1.
+
+```cpp
+#include "model.hh"
+```
+
+</details>
+
+## 1. The problem class
+
+The problem class implements the boundary conditions. It also provides
+an interface that is used by the local residual (see Part 1) to obtain the diffusion
+coefficient. The value is read from the parameter configuration tree.
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+namespace Dumux {
+template<class TypeTag>
+class DiffusionTestProblem : public FVProblem<TypeTag>
+{
+```
+
+<details><summary> Click to show boilerplate code</summary>
+
+```cpp
+
+    using ParentType = FVProblem<TypeTag>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using GlobalPosition = typename GridGeometry::LocalView::Element::Geometry::GlobalCoordinate;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using PrimaryVariables = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    using NumEqVector = Dumux::NumEqVector<PrimaryVariables>;
+    using BoundaryTypes = Dumux::BoundaryTypes<GetPropType<TypeTag, Properties::ModelTraits>::numEq()>;
+```
+
+</details>
+In the constructor, we read the diffusion coefficient constant from the
+parameter tree (which is initialized with the content of `params.input`).
+
+```cpp
+public:
+    DiffusionTestProblem(std::shared_ptr<const GridGeometry> gridGeometry)
+    : ParentType(gridGeometry)
+    {
+        diffusionCoefficient_ = getParam<Scalar>("Problem.DiffusionCoefficient");
+    }
+```
+
+As boundary conditions, we specify Neumann everywhere. This means, we
+have to prescribe a flux at each boundary sub control volume face
+
+```cpp
+    BoundaryTypes boundaryTypesAtPos(const GlobalPosition& globalPos) const
+    {
+        BoundaryTypes values;
+        values.setAllNeumann();
+        return values;
+    }
+```
+
+We prescribe zero flux over all of the boundary
+
+```cpp
+    NumEqVector neumannAtPos(const GlobalPosition& globalPos) const
+    { return { 0.0 }; }
+```
+
+The diffusion coefficient interface is used in the local residual (see Part 1).
+We can name this interface however we want as long as we adapt the calling site
+in the `LocalResidual` class in `model.hh`.
+
+```cpp
+    Scalar diffusionCoefficient() const
+    { return diffusionCoefficient_; }
+
+private:
+    Scalar diffusionCoefficient_;
+};
+} // end namespace Dumux
+```
+
+
+</details>
+
+## 2. Property tag and specializations
+
+We create a new tag `DiffusionTest` that inherits all properties
+specialized for the tag `DiffusionModel` (we created this in Part 1)
+and the tag `BoxModel` which provides types relevant for the spatial
+discretization scheme (see [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh)).
+
+Here we choose a short form of specializing properties. The property
+system also recognizes an alias (`using`) with the property name being
+a member of the specified type tag. Note that we could also use the same mechanism
+as in (Part 1), for example:
+```code
+template<class TypeTag>
+struct Scalar<TypeTag, TTag::DiffusionTest>
+{ using type = double; };
+```
+which has the same effect as having an alias `Scalar = double;`
+as member of the type tag `DiffusionTest`.
+This mechanism allows for a terser code expression.
+In case both definitions are present for the same type tag, the explicit
+specialization (long form) takes precedence.
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+namespace Dumux::Properties::TTag {
+
+struct DiffusionTest
+{
+    using InheritsFrom = std::tuple<DiffusionModel, BoxModel>;
+
+    using Scalar = double;
+    using Grid = Dune::YaspGrid<2>;
+
+    template<class TypeTag>
+    using Problem = DiffusionTestProblem<TypeTag>;
+
+    using EnableGridVolumeVariablesCache = std::true_type;
+    using EnableGridFluxVariablesCache = std::true_type;
+    using EnableGridGeometryCache = std::true_type;
+};
+
+} // end namespace Dumux::Properties::TTag
+```
+
+
+</details>
+
+## 3. Creating the initial solution
+
+We want to initialize the entries of the solution vector $c_{h,B}$
+with a random field such that $c_{h,B} \sim U(0,1)$. When running
+with MPI in parallel this requires communication. With just one
+processor (`gg.gridView().comm().size() == 1`), we can just iterate
+over all entries of the solution vector and assign a uniformly
+distributed random number. However, with multiple processes, the
+solution vector only contains a subset of the degrees of freedom
+living on the processor. Moreover, some degrees of freedom exist
+on multiple processes. Each processor generates their own random
+number which means we might generate different numbers for the
+same degree of freedom. Therefore, we communicate the number.
+The idea is that each process adds its rank number (processes are
+numbered starting from 0) to its value. Then we take the minimum
+over all processes which will take return the value of the processor
+with the lowest rank. Afterwards, we subtract the added rank offset.
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+template<class SolutionVector, class GridGeometry>
+SolutionVector createInitialSolution(const GridGeometry& gg)
+{
+    SolutionVector sol(gg.numDofs());
+
+    // Generate random number and add processor offset
+    // For sequential run, `rank` always returns `0`.
+    std::mt19937 gen(0); // seed is 0 for deterministic results
+    Dumux::SimpleUniformDistribution<> dis(0.0, 1.0);
+
+    const auto rank = gg.gridView().comm().rank();
+    for (int n = 0; n < sol.size(); ++n)
+        sol[n] = dis(gen) + rank;
+
+    // We, take the value of the processor with the minimum rank
+    // and subtract the rank offset
+    if (gg.gridView().comm().size() > 1)
+    {
+        Dumux::VectorCommDataHandleMin<
+            typename GridGeometry::VertexMapper,
+            SolutionVector,
+            GridGeometry::GridView::dimension
+        > minHandle(gg.vertexMapper(), sol);
+        gg.gridView().communicate(minHandle, Dune::All_All_Interface, Dune::ForwardCommunication);
+
+        // remove processor offset
+        for (int n = 0; n < sol.size(); ++n)
+            sol[n][0] -= std::floor(sol[n][0]);
+    }
+
+    return sol;
+}
+```
+
+
+</details>
+
+## 4. The main program
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+int main(int argc, char** argv)
+{
+    using namespace Dumux;
+```
+
+First, we initialize MPI and the multithreading backend.
+This convenience function takes care that everything is setup in the right order and
+has to be called for every Dumux simulation. `Dumux::initialize` also respects
+the environment variable `DUMUX_NUM_THREADS` to restrict to amount of available cores
+for multi-threaded code parts (for example the assembly).
+
+```cpp
+    Dumux::initialize(argc, argv);
+```
+
+We initialize parameter tree including command line arguments.
+This will, per default, read all parameters from the configuration file `params.input`
+if such as file exists. Then it will look for command line arguments. For example
+`./example_diffusion -TimeLoop.TEnd 10` will set the end time to $10$ seconds.
+Command line arguments overwrite settings in the parameter file.
+
+```cpp
+    Parameters::init(argc, argv);
+```
+
+We specify an alias for the model type tag.
+We will configure the assembler with this type tag that
+we specialized all these properties for above and in the model definition (Part 1).
+We can extract type information through properties specialized for the type tag
+using `GetPropType`.
+
+```cpp
+    using TypeTag = Properties::TTag::DiffusionTest;
+
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using Grid = GetPropType<TypeTag, Properties::Grid>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using SolutionVector = GetPropType<TypeTag, Properties::SolutionVector>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+```
+
+First, we initialize the grid. Grid parameters are taken from the input file
+from the group `[Grid]` if it exists. You can also pass any other parameter
+group (e.g. `"MyGroup"`) to `init` and then it will look in `[MyGroup.Grid]`.
+
+```cpp
+    GridManager<Grid> gridManager;
+    gridManager.init();
+```
+
+We, create the finite volume grid geometry from the (leaf) grid view,
+the problem for the boundary conditions, a solution vector and a grid variables instance.
+
+```cpp
+    auto gridGeometry = std::make_shared<GridGeometry>(gridManager.grid().leafGridView());
+    auto problem = std::make_shared<Problem>(gridGeometry);
+    auto sol = createInitialSolution<SolutionVector>(*gridGeometry);
+    auto gridVariables = std::make_shared<GridVariables>(problem, gridGeometry);
+    gridVariables->init(sol);
+```
+
+Ww initialize the VTK output module and write out the initial concentration field
+
+```cpp
+    VtkOutputModule<GridVariables, SolutionVector> vtkWriter(*gridVariables, sol, problem->name());
+    vtkWriter.addVolumeVariable([](const auto& vv){ return vv.priVar(0); }, "c");
+    vtkWriter.write(0.0);
+```
+
+We instantiate time loop using start and end time as well as
+the time step size from the parameter tree (`params.input`)
+
+```cpp
+    auto timeLoop = std::make_shared<CheckPointTimeLoop<Scalar>>(
+        getParam<Scalar>("TimeLoop.TStart", 0.0),
+        getParam<Scalar>("TimeLoop.Dt"),
+        getParam<Scalar>("TimeLoop.TEnd")
+    );
+```
+
+Next, we choose the type of assembler, linear solver and PDE solver
+and construct instances of these classes.
+
+```cpp
+    using Assembler = FVAssembler<TypeTag, DiffMethod::numeric>;
+    using LinearSolver = SSORCGIstlSolver<LinearSolverTraits<GridGeometry>, LinearAlgebraTraitsFromAssembler<Assembler>>;
+    using Solver = Dumux::LinearPDESolver<Assembler, LinearSolver>;
+
+    auto oldSol = sol; // copy the vector to store state of previous time step
+    auto assembler = std::make_shared<Assembler>(problem, gridGeometry, gridVariables, timeLoop, oldSol);
+    auto linearSolver = std::make_shared<LinearSolver>(gridGeometry->gridView(), gridGeometry->dofMapper());
+    Solver solver(assembler, linearSolver);
+```
+
+We tell the assembler to create the system matrix and vector objects. Then we
+assemble the system matrix once and then tell the solver to reuse in every time step.
+Since we have a linear problem, the matrix is not going to change.
+
+```cpp
+    assembler->setLinearSystem();
+    assembler->assembleJacobian(sol);
+    solver.reuseMatrix();
+```
+
+The time loop is where most of the actual computations happen.
+We assemble and solve the linear system of equations, update the solution,
+write the solution to a VTK file and continue until the we reach the
+final simulation time.
+
+
+```cpp
+    timeLoop->start(); do
+    {
+        // assemble & solve
+        solver.solve(sol);
+
+        // make the new solution the old solution
+        oldSol = sol;
+        gridVariables->advanceTimeStep();
+
+        // advance to the time loop to the next step
+        timeLoop->advanceTimeStep();
+
+        // write VTK output (writes out the concentration field)
+        vtkWriter.write(timeLoop->time());
+
+        // report statistics of this time step
+        timeLoop->reportTimeStep();
+
+    } while (!timeLoop->finished());
+
+    timeLoop->finalize(gridGeometry->gridView().comm());
+
+    return 0;
+}
+```
+
+
+</details>
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_left: Go back to part 1](model.md) |
+|---|---:|
+
diff --git a/examples/diffusion/doc/model.md b/examples/diffusion/doc/model.md
new file mode 100644
index 0000000000000000000000000000000000000000..8bbda20e8add11e2769e189f929267326f07379b
--- /dev/null
+++ b/examples/diffusion/doc/model.md
@@ -0,0 +1,245 @@
+<!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_right: Continue with part 2](main.md) |
+|---|---:|
+
+# Diffusion equation model definition
+
+
+In the file `model.hh`, we define the model equations and
+set all default model properties. The setup consist of three steps:
+1. Create a model type tag (used to specialize properties)
+2. Define the local residual class implementing the discrete equation
+3. Specialize important properties of the model such that Dumux knows how to assemble the system matrix
+
+__Table of contents__
+
+[TOC]
+
+We start in `model.hh` with the necessary header includes:
+<details><summary> Click to show includes</summary>
+
+```cpp
+
+#include <dumux/common/math.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/volumevariables.hh>
+#include <dumux/discretization/method.hh>
+```
+
+</details>
+
+## 1. Property Tag
+
+The property tag is simply an empty struct with the name `DiffusionModel`
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+```cpp
+namespace Dumux::Properties::TTag {
+//! The diffusion model tag that we can specialize properties for
+struct DiffusionModel {};
+} // end namespace Dumux::Properties::TTag
+```
+
+
+</details>
+
+## 2. The local (element-wise) residual
+
+The local residual assembles the contribution to the residual for
+all degrees of freedom associated with an element. Here, we use the
+Box method which is based on $P_1$ basis functions (piece-wise linears)
+and the degrees of freedom are on the nodes. Each node is associate with
+exactly one sub control volume (`scv`) per element and several ($2$ in $\mathbb{R}^2$)
+sub control volume faces (`scvf`). In the local residual, we can implement the
+contribution for one `scv` (storage and source terms) or one `scvf` (flux terms).
+
+Let's have a look at the class implementation.
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+The class `DiffusionModelLocalResidual` inherits from something called `BaseLocalResidual`.
+This base class differs depending on the chosen discretization scheme. For the box method
+(which is a control-volume finite element scheme) used in this example, the property
+`BaseLocalResidual` is specialized to `CVFELocalResidual<TypeTag>`
+in [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh).
+Since this local residual only works with control-volume finite element schemes due to
+the flux implementation, we could have also chosen to inherit from `public CVFELocalResidual<TypeTag>`.
+
+```cpp
+namespace Dumux {
+template<class TypeTag>
+class DiffusionModelLocalResidual
+: public GetPropType<TypeTag, Properties::BaseLocalResidual>
+{
+```
+
+
+**Storage term:** Evaluate the rate of change of all conserved quantities
+
+```cpp
+    NumEqVector computeStorage(const Problem& problem,
+                               const SubControlVolume& scv,
+                               const VolumeVariables& volVars) const
+    {
+        NumEqVector storage;
+        storage[Indices::massBalanceEqIdx] = volVars.priVar(Indices::concentrationIdx);
+        return storage;
+    }
+```
+
+**Flux term:** Evaluate the fluxes over a face of a sub control volume.
+Here we evaluate the (integrated) flux
+
+```math
+F_{K,\sigma} = -D \sum_{B \in \mathcal{B}_K} c_{h,B} \nabla N_B \cdot\boldsymbol{n} \vert \sigma \vert
+````
+
+
+```cpp
+    NumEqVector computeFlux(const Problem& problem,
+                            const Element& element,
+                            const FVElementGeometry& fvGeometry,
+                            const ElementVolumeVariables& elemVolVars,
+                            const SubControlVolumeFace& scvf,
+                            const ElementFluxVariablesCache& elemFluxVarsCache) const
+    {
+        static_assert(DiscretizationMethods::isCVFE<typename GridGeometry::DiscretizationMethod>,
+            "This local residual is hard-coded to control-volume finite element schemes");
+
+        // Compute ∇c at the integration point of this sub control volume face.
+        const auto& fluxVarCache = elemFluxVarsCache[scvf];
+        Dune::FieldVector<Scalar, dimWorld> gradConcentration(0.0);
+        for (const auto& scv : scvs(fvGeometry))
+        {
+            const auto& volVars = elemVolVars[scv];
+            // v.axpy(a, w) means v <- v + a*w
+            gradConcentration.axpy(
+                volVars.priVar(Indices::concentrationIdx),
+                fluxVarCache.gradN(scv.indexInElement())
+            );
+        }
+
+        NumEqVector flux;
+
+        // Compute the flux with `vtmv` (vector transposed times matrix times vector) or -n^T D ∇c A.
+        // The diffusion coefficient comes from the `problem` (see Part 2 of the example).
+        flux[Indices::massBalanceEqIdx] = -1.0*vtmv(
+            scvf.unitOuterNormal(), problem.diffusionCoefficient(), gradConcentration
+        )*scvf.area();
+
+        return flux;
+    }
+};
+} // end namespace Dumux
+```
+
+
+</details>
+
+## 3. The model properties
+
+By specializing properties for our type tag `DiffusionModel`,
+every other class that knows about the type tag (this will be
+for example the assembler or the problem), can extract the
+type information that we specify here.
+
+Note that these types can be overwritten for specific problem
+definitions if this is needed (we will show this on the next page).
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+```cpp
+namespace Dumux::Properties {
+```
+
+The type of the local residual is the class defined above
+
+```cpp
+template<class TypeTag>
+struct LocalResidual<TypeTag, TTag::DiffusionModel>
+{ using type = DiffusionModelLocalResidual<TypeTag>; };
+```
+
+The default scalar type is double
+we compute with double precision floating point numbers
+
+```cpp
+template<class TypeTag>
+struct Scalar<TypeTag, TTag::DiffusionModel>
+{ using type = double; };
+```
+
+The model traits specify some information about our equation system.
+Here we have just one equation. We still specify indices so in the
+places where we access primary variables, we can do so with a named variable.
+
+```cpp
+template<class TypeTag>
+struct ModelTraits<TypeTag, TTag::DiffusionModel>
+{
+    struct type
+    {
+        struct Indices
+        {
+            static constexpr int concentrationIdx = 0;
+            static constexpr int massBalanceEqIdx = 0;
+        };
+
+        static constexpr int numEq() { return 1; }
+    };
+};
+```
+
+The primary variable vector has entries of type `Scalar` and is
+as large as the number of equations (here 1) but we keep it general.
+
+```cpp
+template<class TypeTag>
+struct PrimaryVariables<TypeTag, TTag::DiffusionModel>
+{
+    using type = Dune::FieldVector<
+        GetPropType<TypeTag, Properties::Scalar>,
+        GetPropType<TypeTag, Properties::ModelTraits>::numEq()
+    >;
+};
+```
+
+The `BasicVolumeVariables` are the simplest class of volume variables.
+They only store one instance of `PrimaryVariables` for the
+degree of freedom (here: vertex dof) that they are attached to.
+
+```cpp
+template<class TypeTag>
+struct VolumeVariables<TypeTag, TTag::DiffusionModel>
+{
+    struct Traits
+    {
+        using PrimaryVariables
+            = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    };
+    using type = BasicVolumeVariables<Traits>;
+};
+
+} // end namespace Dumux::Properties
+```
+
+
+</details>
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_right: Continue with part 2](main.md) |
+|---|---:|
+
diff --git a/examples/diffusion/img/diffusion.gif b/examples/diffusion/img/diffusion.gif
new file mode 100644
index 0000000000000000000000000000000000000000..e0aecd597a2e3c7c84b85fae961915ccafb1f6e6
Binary files /dev/null and b/examples/diffusion/img/diffusion.gif differ
diff --git a/examples/diffusion/main.cc b/examples/diffusion/main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fb80e86507353b649afc9a898c5e105636503af5
--- /dev/null
+++ b/examples/diffusion/main.cc
@@ -0,0 +1,339 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*****************************************************************************
+ *   See the file COPYING for full copying permissions.                      *
+ *                                                                           *
+ *   This program is free software: you can redistribute it and/or modify    *
+ *   it under the terms of the GNU General Public License as published by    *
+ *   the Free Software Foundation, either version 3 of the License, or       *
+ *   (at your option) any later version.                                     *
+ *                                                                           *
+ *   This program is distributed in the hope that it will be useful,         *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the            *
+ *   GNU General Public License for more details.                            *
+ *                                                                           *
+ *   You should have received a copy of the GNU General Public License       *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.   *
+ *****************************************************************************/
+
+// In the file `main.cc`, we use the previously defined model to
+// setup the simulation. The setup consists of four steps:
+// 1. Define the problem setting boundary conditions and the diffusion coefficient
+// 2. Configure the property system reusing the model defined in Part 1
+// 3. Define a function for setting the random initial condition
+// 4. The main program defining all steps of the program
+//
+// __Table of contents__
+//
+// [TOC]
+//
+// We start in `main.cc` with the necessary header includes:
+// [[details]] includes
+#include <config.h>
+
+// Include the header containing std::true_type and std::false_type
+#include <type_traits>
+
+// Include the headers useful for the random initial solution
+#include <random>
+#include <dumux/common/random.hh>
+
+// As Dune grid interface implementation we will use
+// the structured parallel grid manager YaspGrid
+#include <dune/grid/yaspgrid.hh>
+
+// Common includes for problem and main
+// [[codeblock]]
+#include <dumux/common/initialize.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/boundarytypes.hh>
+#include <dumux/common/fvproblem.hh>
+
+// VTK output functionality
+#include <dumux/io/vtkoutputmodule.hh>
+#include <dumux/io/grid/gridmanager_yasp.hh>
+
+// the discretization method
+#include <dumux/discretization/box.hh>
+
+// assembly and solver
+#include <dumux/linear/linearsolvertraits.hh>
+#include <dumux/linear/linearalgebratraits.hh>
+#include <dumux/linear/istlsolvers.hh>
+#include <dumux/linear/pdesolver.hh>
+#include <dumux/assembly/fvassembler.hh>
+// [[/codeblock]]
+//
+// Finally, we include the model defined in Part 1.
+#include "model.hh"
+// [[/details]]
+
+//
+// ## 1. The problem class
+//
+// The problem class implements the boundary conditions. It also provides
+// an interface that is used by the local residual (see Part 1) to obtain the diffusion
+// coefficient. The value is read from the parameter configuration tree.
+// [[content]]
+namespace Dumux {
+template<class TypeTag>
+class DiffusionTestProblem : public FVProblem<TypeTag>
+{
+    // [[details]] boilerplate code
+    using ParentType = FVProblem<TypeTag>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using GlobalPosition = typename GridGeometry::LocalView::Element::Geometry::GlobalCoordinate;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using PrimaryVariables = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    using NumEqVector = Dumux::NumEqVector<PrimaryVariables>;
+    using BoundaryTypes = Dumux::BoundaryTypes<GetPropType<TypeTag, Properties::ModelTraits>::numEq()>;
+    // [[/details]]
+    // In the constructor, we read the diffusion coefficient constant from the
+    // parameter tree (which is initialized with the content of `params.input`).
+public:
+    DiffusionTestProblem(std::shared_ptr<const GridGeometry> gridGeometry)
+    : ParentType(gridGeometry)
+    {
+        diffusionCoefficient_ = getParam<Scalar>("Problem.DiffusionCoefficient");
+    }
+
+    // As boundary conditions, we specify Neumann everywhere. This means, we
+    // have to prescribe a flux at each boundary sub control volume face
+    BoundaryTypes boundaryTypesAtPos(const GlobalPosition& globalPos) const
+    {
+        BoundaryTypes values;
+        values.setAllNeumann();
+        return values;
+    }
+
+    // We prescribe zero flux over all of the boundary
+    NumEqVector neumannAtPos(const GlobalPosition& globalPos) const
+    { return { 0.0 }; }
+
+    // The diffusion coefficient interface is used in the local residual (see Part 1).
+    // We can name this interface however we want as long as we adapt the calling site
+    // in the `LocalResidual` class in `model.hh`.
+    Scalar diffusionCoefficient() const
+    { return diffusionCoefficient_; }
+
+private:
+    Scalar diffusionCoefficient_;
+};
+} // end namespace Dumux
+// [[/content]]
+
+//
+// ## 2. Property tag and specializations
+//
+// We create a new tag `DiffusionTest` that inherits all properties
+// specialized for the tag `DiffusionModel` (we created this in Part 1)
+// and the tag `BoxModel` which provides types relevant for the spatial
+// discretization scheme (see [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh)).
+//
+// Here we choose a short form of specializing properties. The property
+// system also recognizes an alias (`using`) with the property name being
+// a member of the specified type tag. Note that we could also use the same mechanism
+// as in (Part 1), for example:
+// ```code
+// template<class TypeTag>
+// struct Scalar<TypeTag, TTag::DiffusionTest>
+// { using type = double; };
+//```
+// which has the same effect as having an alias `Scalar = double;`
+// as member of the type tag `DiffusionTest`.
+// This mechanism allows for a terser code expression.
+// In case both definitions are present for the same type tag, the explicit
+// specialization (long form) takes precedence.
+//
+// [[content]]
+namespace Dumux::Properties::TTag {
+
+struct DiffusionTest
+{
+    using InheritsFrom = std::tuple<DiffusionModel, BoxModel>;
+
+    using Scalar = double;
+    using Grid = Dune::YaspGrid<2>;
+
+    template<class TypeTag>
+    using Problem = DiffusionTestProblem<TypeTag>;
+
+    using EnableGridVolumeVariablesCache = std::true_type;
+    using EnableGridFluxVariablesCache = std::true_type;
+    using EnableGridGeometryCache = std::true_type;
+};
+
+} // end namespace Dumux::Properties::TTag
+// [[/content]]
+
+//
+// ## 3. Creating the initial solution
+//
+// We want to initialize the entries of the solution vector $c_{h,B}$
+// with a random field such that $c_{h,B} \sim U(0,1)$. When running
+// with MPI in parallel this requires communication. With just one
+// processor (`gg.gridView().comm().size() == 1`), we can just iterate
+// over all entries of the solution vector and assign a uniformly
+// distributed random number. However, with multiple processes, the
+// solution vector only contains a subset of the degrees of freedom
+// living on the processor. Moreover, some degrees of freedom exist
+// on multiple processes. Each processor generates their own random
+// number which means we might generate different numbers for the
+// same degree of freedom. Therefore, we communicate the number.
+// The idea is that each process adds its rank number (processes are
+// numbered starting from 0) to its value. Then we take the minimum
+// over all processes which will take return the value of the processor
+// with the lowest rank. Afterwards, we subtract the added rank offset.
+//
+// [[content]]
+// [[codeblock]]
+template<class SolutionVector, class GridGeometry>
+SolutionVector createInitialSolution(const GridGeometry& gg)
+{
+    SolutionVector sol(gg.numDofs());
+
+    // Generate random number and add processor offset
+    // For sequential run, `rank` always returns `0`.
+    std::mt19937 gen(0); // seed is 0 for deterministic results
+    Dumux::SimpleUniformDistribution<> dis(0.0, 1.0);
+
+    const auto rank = gg.gridView().comm().rank();
+    for (int n = 0; n < sol.size(); ++n)
+        sol[n] = dis(gen) + rank;
+
+    // We, take the value of the processor with the minimum rank
+    // and subtract the rank offset
+    if (gg.gridView().comm().size() > 1)
+    {
+        Dumux::VectorCommDataHandleMin<
+            typename GridGeometry::VertexMapper,
+            SolutionVector,
+            GridGeometry::GridView::dimension
+        > minHandle(gg.vertexMapper(), sol);
+        gg.gridView().communicate(minHandle, Dune::All_All_Interface, Dune::ForwardCommunication);
+
+        // remove processor offset
+        for (int n = 0; n < sol.size(); ++n)
+            sol[n][0] -= std::floor(sol[n][0]);
+    }
+
+    return sol;
+}
+// [[/codeblock]]
+// [[/content]]
+//
+// ## 4. The main program
+//
+// [[content]]
+int main(int argc, char** argv)
+{
+    using namespace Dumux;
+
+    // First, we initialize MPI and the multithreading backend.
+    // This convenience function takes care that everything is setup in the right order and
+    // has to be called for every Dumux simulation. `Dumux::initialize` also respects
+    // the environment variable `DUMUX_NUM_THREADS` to restrict to amount of available cores
+    // for multi-threaded code parts (for example the assembly).
+    Dumux::initialize(argc, argv);
+
+    // We initialize parameter tree including command line arguments.
+    // This will, per default, read all parameters from the configuration file `params.input`
+    // if such as file exists. Then it will look for command line arguments. For example
+    // `./example_diffusion -TimeLoop.TEnd 10` will set the end time to $10$ seconds.
+    // Command line arguments overwrite settings in the parameter file.
+    Parameters::init(argc, argv);
+
+    // We specify an alias for the model type tag.
+    // We will configure the assembler with this type tag that
+    // we specialized all these properties for above and in the model definition (Part 1).
+    // We can extract type information through properties specialized for the type tag
+    // using `GetPropType`.
+    using TypeTag = Properties::TTag::DiffusionTest;
+
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using Grid = GetPropType<TypeTag, Properties::Grid>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using SolutionVector = GetPropType<TypeTag, Properties::SolutionVector>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+
+    // First, we initialize the grid. Grid parameters are taken from the input file
+    // from the group `[Grid]` if it exists. You can also pass any other parameter
+    // group (e.g. `"MyGroup"`) to `init` and then it will look in `[MyGroup.Grid]`.
+    GridManager<Grid> gridManager;
+    gridManager.init();
+
+    // We, create the finite volume grid geometry from the (leaf) grid view,
+    // the problem for the boundary conditions, a solution vector and a grid variables instance.
+    auto gridGeometry = std::make_shared<GridGeometry>(gridManager.grid().leafGridView());
+    auto problem = std::make_shared<Problem>(gridGeometry);
+    auto sol = createInitialSolution<SolutionVector>(*gridGeometry);
+    auto gridVariables = std::make_shared<GridVariables>(problem, gridGeometry);
+    gridVariables->init(sol);
+
+    // Ww initialize the VTK output module and write out the initial concentration field
+    VtkOutputModule<GridVariables, SolutionVector> vtkWriter(*gridVariables, sol, problem->name());
+    vtkWriter.addVolumeVariable([](const auto& vv){ return vv.priVar(0); }, "c");
+    vtkWriter.write(0.0);
+
+    // We instantiate time loop using start and end time as well as
+    // the time step size from the parameter tree (`params.input`)
+    auto timeLoop = std::make_shared<CheckPointTimeLoop<Scalar>>(
+        getParam<Scalar>("TimeLoop.TStart", 0.0),
+        getParam<Scalar>("TimeLoop.Dt"),
+        getParam<Scalar>("TimeLoop.TEnd")
+    );
+
+    // Next, we choose the type of assembler, linear solver and PDE solver
+    // and construct instances of these classes.
+    using Assembler = FVAssembler<TypeTag, DiffMethod::numeric>;
+    using LinearSolver = SSORCGIstlSolver<LinearSolverTraits<GridGeometry>, LinearAlgebraTraitsFromAssembler<Assembler>>;
+    using Solver = Dumux::LinearPDESolver<Assembler, LinearSolver>;
+
+    auto oldSol = sol; // copy the vector to store state of previous time step
+    auto assembler = std::make_shared<Assembler>(problem, gridGeometry, gridVariables, timeLoop, oldSol);
+    auto linearSolver = std::make_shared<LinearSolver>(gridGeometry->gridView(), gridGeometry->dofMapper());
+    Solver solver(assembler, linearSolver);
+
+    // We tell the assembler to create the system matrix and vector objects. Then we
+    // assemble the system matrix once and then tell the solver to reuse in every time step.
+    // Since we have a linear problem, the matrix is not going to change.
+    assembler->setLinearSystem();
+    assembler->assembleJacobian(sol);
+    solver.reuseMatrix();
+
+    // The time loop is where most of the actual computations happen.
+    // We assemble and solve the linear system of equations, update the solution,
+    // write the solution to a VTK file and continue until the we reach the
+    // final simulation time.
+    //
+    // [[codeblock]]
+    timeLoop->start(); do
+    {
+        // assemble & solve
+        solver.solve(sol);
+
+        // make the new solution the old solution
+        oldSol = sol;
+        gridVariables->advanceTimeStep();
+
+        // advance to the time loop to the next step
+        timeLoop->advanceTimeStep();
+
+        // write VTK output (writes out the concentration field)
+        vtkWriter.write(timeLoop->time());
+
+        // report statistics of this time step
+        timeLoop->reportTimeStep();
+
+    } while (!timeLoop->finished());
+
+    timeLoop->finalize(gridGeometry->gridView().comm());
+
+    return 0;
+}
+// [[/codeblock]]
+// [[/content]]
diff --git a/examples/diffusion/model.hh b/examples/diffusion/model.hh
new file mode 100644
index 0000000000000000000000000000000000000000..fcf7220f7290fa90f33a82305dcdbdc2fc428c66
--- /dev/null
+++ b/examples/diffusion/model.hh
@@ -0,0 +1,237 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*****************************************************************************
+ *   See the file COPYING for full copying permissions.                      *
+ *                                                                           *
+ *   This program is free software: you can redistribute it and/or modify    *
+ *   it under the terms of the GNU General Public License as published by    *
+ *   the Free Software Foundation, either version 3 of the License, or       *
+ *   (at your option) any later version.                                     *
+ *                                                                           *
+ *   This program is distributed in the hope that it will be useful,         *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of          *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the            *
+ *   GNU General Public License for more details.                            *
+ *                                                                           *
+ *   You should have received a copy of the GNU General Public License       *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>.   *
+ *****************************************************************************/
+
+#ifndef DUMUX_EXAMPLES_DIFFUSION_MODEL_HH
+#define DUMUX_EXAMPLES_DIFFUSION_MODEL_HH
+
+// In the file `model.hh`, we define the model equations and
+// set all default model properties. The setup consist of three steps:
+// 1. Create a model type tag (used to specialize properties)
+// 2. Define the local residual class implementing the discrete equation
+// 3. Specialize important properties of the model such that Dumux knows how to assemble the system matrix
+//
+// __Table of contents__
+//
+// [TOC]
+//
+// We start in `model.hh` with the necessary header includes:
+// [[details]] includes
+#include <dumux/common/math.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/volumevariables.hh>
+#include <dumux/discretization/method.hh>
+// [[/details]]
+//
+// ## 1. Property Tag
+//
+// The property tag is simply an empty struct with the name `DiffusionModel`
+//
+// [[content]]
+// [[codeblock]]
+namespace Dumux::Properties::TTag {
+//! The diffusion model tag that we can specialize properties for
+struct DiffusionModel {};
+} // end namespace Dumux::Properties::TTag
+// [[/codeblock]]
+// [[/content]]
+//
+// ## 2. The local (element-wise) residual
+//
+// The local residual assembles the contribution to the residual for
+// all degrees of freedom associated with an element. Here, we use the
+// Box method which is based on $P_1$ basis functions (piece-wise linears)
+// and the degrees of freedom are on the nodes. Each node is associate with
+// exactly one sub control volume (`scv`) per element and several ($2$ in $\mathbb{R}^2$)
+// sub control volume faces (`scvf`). In the local residual, we can implement the
+// contribution for one `scv` (storage and source terms) or one `scvf` (flux terms).
+//
+// Let's have a look at the class implementation.
+//
+// [[content]]
+//
+// The class `DiffusionModelLocalResidual` inherits from something called `BaseLocalResidual`.
+// This base class differs depending on the chosen discretization scheme. For the box method
+// (which is a control-volume finite element scheme) used in this example, the property
+// `BaseLocalResidual` is specialized to `CVFELocalResidual<TypeTag>`
+// in [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh).
+// Since this local residual only works with control-volume finite element schemes due to
+// the flux implementation, we could have also chosen to inherit from `public CVFELocalResidual<TypeTag>`.
+namespace Dumux {
+template<class TypeTag>
+class DiffusionModelLocalResidual
+: public GetPropType<TypeTag, Properties::BaseLocalResidual>
+{
+    // [[exclude]]
+    // the base local residual is selected depending on the chosen discretization scheme
+    using ParentType = GetPropType<TypeTag, Properties::BaseLocalResidual>;
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using NumEqVector = Dumux::NumEqVector<GetPropType<TypeTag, Properties::PrimaryVariables>>;
+
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+    using VolumeVariables = typename GridVariables::GridVolumeVariables::VolumeVariables;
+    using ElementVolumeVariables = typename GridVariables::GridVolumeVariables::LocalView;
+    using ElementFluxVariablesCache = typename GridVariables::GridFluxVariablesCache::LocalView;
+
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using FVElementGeometry = typename GridGeometry::LocalView;
+    using SubControlVolume = typename GridGeometry::SubControlVolume;
+    using SubControlVolumeFace = typename GridGeometry::SubControlVolumeFace;
+    using GridView = typename GridGeometry::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+
+    using ModelTraits = GetPropType<TypeTag, Properties::ModelTraits>;
+    using Indices = typename ModelTraits::Indices;
+    static constexpr int dimWorld = GridView::dimensionworld;
+
+public:
+    using ParentType::ParentType;
+    // [[/exclude]]
+    //
+    // **Storage term:** Evaluate the rate of change of all conserved quantities
+    // [[codeblock]]
+    NumEqVector computeStorage(const Problem& problem,
+                               const SubControlVolume& scv,
+                               const VolumeVariables& volVars) const
+    {
+        NumEqVector storage;
+        storage[Indices::massBalanceEqIdx] = volVars.priVar(Indices::concentrationIdx);
+        return storage;
+    }
+    // [[/codeblock]]
+
+    // **Flux term:** Evaluate the fluxes over a face of a sub control volume.
+    // Here we evaluate the (integrated) flux
+    //
+    // ```math
+    // F_{K,\sigma} = -D \sum_{B \in \mathcal{B}_K} c_{h,B} \nabla N_B \cdot\boldsymbol{n} \vert \sigma \vert
+    // ````
+    //
+    // [[codeblock]]
+    NumEqVector computeFlux(const Problem& problem,
+                            const Element& element,
+                            const FVElementGeometry& fvGeometry,
+                            const ElementVolumeVariables& elemVolVars,
+                            const SubControlVolumeFace& scvf,
+                            const ElementFluxVariablesCache& elemFluxVarsCache) const
+    {
+        static_assert(DiscretizationMethods::isCVFE<typename GridGeometry::DiscretizationMethod>,
+            "This local residual is hard-coded to control-volume finite element schemes");
+
+        // Compute ∇c at the integration point of this sub control volume face.
+        const auto& fluxVarCache = elemFluxVarsCache[scvf];
+        Dune::FieldVector<Scalar, dimWorld> gradConcentration(0.0);
+        for (const auto& scv : scvs(fvGeometry))
+        {
+            const auto& volVars = elemVolVars[scv];
+            // v.axpy(a, w) means v <- v + a*w
+            gradConcentration.axpy(
+                volVars.priVar(Indices::concentrationIdx),
+                fluxVarCache.gradN(scv.indexInElement())
+            );
+        }
+
+        NumEqVector flux;
+
+        // Compute the flux with `vtmv` (vector transposed times matrix times vector) or -n^T D ∇c A.
+        // The diffusion coefficient comes from the `problem` (see Part 2 of the example).
+        flux[Indices::massBalanceEqIdx] = -1.0*vtmv(
+            scvf.unitOuterNormal(), problem.diffusionCoefficient(), gradConcentration
+        )*scvf.area();
+
+        return flux;
+    }
+};
+} // end namespace Dumux
+// [[/codeblock]]
+// [[/content]]
+//
+// ## 3. The model properties
+//
+// By specializing properties for our type tag `DiffusionModel`,
+// every other class that knows about the type tag (this will be
+// for example the assembler or the problem), can extract the
+// type information that we specify here.
+//
+// Note that these types can be overwritten for specific problem
+// definitions if this is needed (we will show this on the next page).
+//
+// [[content]]
+namespace Dumux::Properties {
+
+// The type of the local residual is the class defined above
+template<class TypeTag>
+struct LocalResidual<TypeTag, TTag::DiffusionModel>
+{ using type = DiffusionModelLocalResidual<TypeTag>; };
+
+// The default scalar type is double
+// we compute with double precision floating point numbers
+template<class TypeTag>
+struct Scalar<TypeTag, TTag::DiffusionModel>
+{ using type = double; };
+
+// The model traits specify some information about our equation system.
+// Here we have just one equation. We still specify indices so in the
+// places where we access primary variables, we can do so with a named variable.
+template<class TypeTag>
+struct ModelTraits<TypeTag, TTag::DiffusionModel>
+{
+    struct type
+    {
+        struct Indices
+        {
+            static constexpr int concentrationIdx = 0;
+            static constexpr int massBalanceEqIdx = 0;
+        };
+
+        static constexpr int numEq() { return 1; }
+    };
+};
+
+// The primary variable vector has entries of type `Scalar` and is
+// as large as the number of equations (here 1) but we keep it general.
+template<class TypeTag>
+struct PrimaryVariables<TypeTag, TTag::DiffusionModel>
+{
+    using type = Dune::FieldVector<
+        GetPropType<TypeTag, Properties::Scalar>,
+        GetPropType<TypeTag, Properties::ModelTraits>::numEq()
+    >;
+};
+
+// The `BasicVolumeVariables` are the simplest class of volume variables.
+// They only store one instance of `PrimaryVariables` for the
+// degree of freedom (here: vertex dof) that they are attached to.
+template<class TypeTag>
+struct VolumeVariables<TypeTag, TTag::DiffusionModel>
+{
+    struct Traits
+    {
+        using PrimaryVariables
+            = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    };
+    using type = BasicVolumeVariables<Traits>;
+};
+
+} // end namespace Dumux::Properties
+// [[/content]]
+// [[exclude]]
+#endif
+// [[/exclude]]
diff --git a/examples/diffusion/params.input b/examples/diffusion/params.input
new file mode 100644
index 0000000000000000000000000000000000000000..7fb645d26a677d06f1b637387ecfe8f15f14bc14
--- /dev/null
+++ b/examples/diffusion/params.input
@@ -0,0 +1,13 @@
+[TimeLoop]
+TEnd = 5.0
+Dt = 0.1
+
+[Grid]
+LowerLeft = 0 0
+UpperRight = 1 1
+Cells = 100 100
+Overlap = 0
+
+[Problem]
+Name = diffusion
+DiffusionCoefficient = 0.0001
diff --git a/test/references/example_diffusion-reference.vtu b/test/references/example_diffusion-reference.vtu
new file mode 100644
index 0000000000000000000000000000000000000000..169c52e3c660c4cfe8f0826cee9c927969feb766
--- /dev/null
+++ b/test/references/example_diffusion-reference.vtu
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian">
+  <UnstructuredGrid>
+    <Piece NumberOfCells="36" NumberOfPoints="49">
+      <PointData Scalars="c">
+        <DataArray type="Float32" Name="c" NumberOfComponents="1" format="ascii">
+          0.573644 0.594092 0.709256 0.498368 0.674927 0.61297 0.741987 0.615632 0.593599 0.452598 0.69915 0.477316
+          0.53875 0.41387 0.720189 0.302494 0.751592 0.392863 0.425794 0.517298 0.702481 0.653806 0.501159 0.476217
+          0.514364 0.429345 0.763388 0.774239 0.286512 0.388567 0.2551 0.561912 0.2726 0.487823 0.757131 0.805291
+          0.667609 0.310247 0.716878 0.767391 0.81094 0.548676 0.790928 0.732083 0.4973 0.571787 0.768192 0.682043
+          0.356806
+        </DataArray>
+      </PointData>
+      <CellData Scalars="process rank">
+        <DataArray type="Float32" Name="process rank" NumberOfComponents="1" format="ascii">
+          0 0 0 0 0 0 0 0 0 0 0 0
+          0 0 0 0 0 0 0 0 0 0 0 0
+          0 0 0 0 0 0 0 0 0 0 0 0
+        </DataArray>
+      </CellData>
+      <Points>
+        <DataArray type="Float32" Name="Coordinates" NumberOfComponents="3" format="ascii">
+          0 0 0 0.166667 0 0 0 0.166667 0 0.166667 0.166667 0
+          0.333333 0 0 0.333333 0.166667 0 0.5 0 0 0.5 0.166667 0
+          0.666667 0 0 0.666667 0.166667 0 0.833333 0 0 0.833333 0.166667 0
+          1 0 0 1 0.166667 0 0 0.333333 0 0.166667 0.333333 0
+          0.333333 0.333333 0 0.5 0.333333 0 0.666667 0.333333 0 0.833333 0.333333 0
+          1 0.333333 0 0 0.5 0 0.166667 0.5 0 0.333333 0.5 0
+          0.5 0.5 0 0.666667 0.5 0 0.833333 0.5 0 1 0.5 0
+          0 0.666667 0 0.166667 0.666667 0 0.333333 0.666667 0 0.5 0.666667 0
+          0.666667 0.666667 0 0.833333 0.666667 0 1 0.666667 0 0 0.833333 0
+          0.166667 0.833333 0 0.333333 0.833333 0 0.5 0.833333 0 0.666667 0.833333 0
+          0.833333 0.833333 0 1 0.833333 0 0 1 0 0.166667 1 0
+          0.333333 1 0 0.5 1 0 0.666667 1 0 0.833333 1 0
+          1 1 0
+        </DataArray>
+      </Points>
+      <Cells>
+        <DataArray type="Int32" Name="connectivity" NumberOfComponents="1" format="ascii">
+          0 1 3 2 1 4 5 3 4 6 7 5
+          6 8 9 7 8 10 11 9 10 12 13 11
+          2 3 15 14 3 5 16 15 5 7 17 16
+          7 9 18 17 9 11 19 18 11 13 20 19
+          14 15 22 21 15 16 23 22 16 17 24 23
+          17 18 25 24 18 19 26 25 19 20 27 26
+          21 22 29 28 22 23 30 29 23 24 31 30
+          24 25 32 31 25 26 33 32 26 27 34 33
+          28 29 36 35 29 30 37 36 30 31 38 37
+          31 32 39 38 32 33 40 39 33 34 41 40
+          35 36 43 42 36 37 44 43 37 38 45 44
+          38 39 46 45 39 40 47 46 40 41 48 47
+        </DataArray>
+        <DataArray type="Int32" Name="offsets" NumberOfComponents="1" format="ascii">
+          4 8 12 16 20 24 28 32 36 40 44 48
+          52 56 60 64 68 72 76 80 84 88 92 96
+          100 104 108 112 116 120 124 128 132 136 140 144
+        </DataArray>
+        <DataArray type="UInt8" Name="types" NumberOfComponents="1" format="ascii">
+          9 9 9 9 9 9 9 9 9 9 9 9
+          9 9 9 9 9 9 9 9 9 9 9 9
+          9 9 9 9 9 9 9 9 9 9 9 9
+        </DataArray>
+      </Cells>
+    </Piece>
+  </UnstructuredGrid>
+</VTKFile>
diff --git a/test/references/example_diffusion_parallel_p0-reference.vtu b/test/references/example_diffusion_parallel_p0-reference.vtu
new file mode 100644
index 0000000000000000000000000000000000000000..2a1f5d044b092813ee5a12e0d174b80d7fc605ad
--- /dev/null
+++ b/test/references/example_diffusion_parallel_p0-reference.vtu
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian">
+  <UnstructuredGrid>
+    <Piece NumberOfCells="18" NumberOfPoints="28">
+      <PointData Scalars="c">
+        <DataArray type="Float32" Name="c" NumberOfComponents="1" format="ascii">
+          0.600795 0.628791 0.619313 0.751779 0.708185 0.607276 0.7937 0.767298 0.486278 0.603874 0.617154 0.463888
+          0.463939 0.407136 0.694774 0.240767 0.77346 0.411375 0.434334 0.460175 0.724304 0.715433 0.555488 0.526637
+          0.606071 0.523224 0.790582 0.739017
+        </DataArray>
+      </PointData>
+      <CellData Scalars="process rank">
+        <DataArray type="Float32" Name="process rank" NumberOfComponents="1" format="ascii">
+          0 0 0 0 0 0 0 0 0 0 0 0
+          0 0 0 0 0 0
+        </DataArray>
+      </CellData>
+      <Points>
+        <DataArray type="Float32" Name="Coordinates" NumberOfComponents="3" format="ascii">
+          0 0 0 0.166667 0 0 0 0.166667 0 0.166667 0.166667 0
+          0.333333 0 0 0.333333 0.166667 0 0.5 0 0 0.5 0.166667 0
+          0 0.333333 0 0.166667 0.333333 0 0.333333 0.333333 0 0.5 0.333333 0
+          0 0.5 0 0.166667 0.5 0 0.333333 0.5 0 0.5 0.5 0
+          0 0.666667 0 0.166667 0.666667 0 0.333333 0.666667 0 0.5 0.666667 0
+          0 0.833333 0 0.166667 0.833333 0 0.333333 0.833333 0 0.5 0.833333 0
+          0 1 0 0.166667 1 0 0.333333 1 0 0.5 1 0
+        </DataArray>
+      </Points>
+      <Cells>
+        <DataArray type="Int32" Name="connectivity" NumberOfComponents="1" format="ascii">
+          0 1 3 2 1 4 5 3 4 6 7 5
+          2 3 9 8 3 5 10 9 5 7 11 10
+          8 9 13 12 9 10 14 13 10 11 15 14
+          12 13 17 16 13 14 18 17 14 15 19 18
+          16 17 21 20 17 18 22 21 18 19 23 22
+          20 21 25 24 21 22 26 25 22 23 27 26
+        </DataArray>
+        <DataArray type="Int32" Name="offsets" NumberOfComponents="1" format="ascii">
+          4 8 12 16 20 24 28 32 36 40 44 48
+          52 56 60 64 68 72
+        </DataArray>
+        <DataArray type="UInt8" Name="types" NumberOfComponents="1" format="ascii">
+          9 9 9 9 9 9 9 9 9 9 9 9
+          9 9 9 9 9 9
+        </DataArray>
+      </Cells>
+    </Piece>
+  </UnstructuredGrid>
+</VTKFile>
diff --git a/test/references/example_diffusion_parallel_p1-reference.vtu b/test/references/example_diffusion_parallel_p1-reference.vtu
new file mode 100644
index 0000000000000000000000000000000000000000..d762cb3a3370eb4c44d0a331059269d8f42b78ad
--- /dev/null
+++ b/test/references/example_diffusion_parallel_p1-reference.vtu
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<VTKFile type="UnstructuredGrid" version="0.1" byte_order="LittleEndian">
+  <UnstructuredGrid>
+    <Piece NumberOfCells="18" NumberOfPoints="28">
+      <PointData Scalars="c">
+        <DataArray type="Float32" Name="c" NumberOfComponents="1" format="ascii">
+          0.7937 0.659834 0.767298 0.772924 0.709495 0.608217 0.782586 0.755256 0.463888 0.597787 0.617481 0.473542
+          0.240767 0.370738 0.694056 0.275299 0.460175 0.363768 0.432353 0.475921 0.526637 0.689418 0.553917 0.530383
+          0.739017 0.517034 0.789881 0.751804
+        </DataArray>
+      </PointData>
+      <CellData Scalars="process rank">
+        <DataArray type="Float32" Name="process rank" NumberOfComponents="1" format="ascii">
+          1 1 1 1 1 1 1 1 1 1 1 1
+          1 1 1 1 1 1
+        </DataArray>
+      </CellData>
+      <Points>
+        <DataArray type="Float32" Name="Coordinates" NumberOfComponents="3" format="ascii">
+          0.5 0 0 0.666667 0 0 0.5 0.166667 0 0.666667 0.166667 0
+          0.833333 0 0 0.833333 0.166667 0 1 0 0 1 0.166667 0
+          0.5 0.333333 0 0.666667 0.333333 0 0.833333 0.333333 0 1 0.333333 0
+          0.5 0.5 0 0.666667 0.5 0 0.833333 0.5 0 1 0.5 0
+          0.5 0.666667 0 0.666667 0.666667 0 0.833333 0.666667 0 1 0.666667 0
+          0.5 0.833333 0 0.666667 0.833333 0 0.833333 0.833333 0 1 0.833333 0
+          0.5 1 0 0.666667 1 0 0.833333 1 0 1 1 0
+        </DataArray>
+      </Points>
+      <Cells>
+        <DataArray type="Int32" Name="connectivity" NumberOfComponents="1" format="ascii">
+          0 1 3 2 1 4 5 3 4 6 7 5
+          2 3 9 8 3 5 10 9 5 7 11 10
+          8 9 13 12 9 10 14 13 10 11 15 14
+          12 13 17 16 13 14 18 17 14 15 19 18
+          16 17 21 20 17 18 22 21 18 19 23 22
+          20 21 25 24 21 22 26 25 22 23 27 26
+        </DataArray>
+        <DataArray type="Int32" Name="offsets" NumberOfComponents="1" format="ascii">
+          4 8 12 16 20 24 28 32 36 40 44 48
+          52 56 60 64 68 72
+        </DataArray>
+        <DataArray type="UInt8" Name="types" NumberOfComponents="1" format="ascii">
+          9 9 9 9 9 9 9 9 9 9 9 9
+          9 9 9 9 9 9
+        </DataArray>
+      </Cells>
+    </Piece>
+  </UnstructuredGrid>
+</VTKFile>