diff --git a/examples/README.md b/examples/README.md
index 0a954f37ab07da2a335c13024294f72de264e235..50b47011eeeba901709d82b575e03fdb5cfedad0 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -5,7 +5,29 @@ Click on the respective example to get to a detailed documentation of the exampl
 For each example in this overview, the model equations and discretization method are described in words
 and the DuMu<sup>x</sup> name of the model is given in parenthesis: e.g. (`OneP`) / (`CCTpfaModel`).
 
-### [:open_file_folder: Example 1: One-phase flow and tracer transport](1ptracer/README.md)
+### [:open_file_folder: Example 1: Diffusion equation](diffusion/README.md)
+<table><tr><td>
+
+In this example we create a diffusion equation model and then simulate a diffusive process.
+
+You learn how to
+
+* setup a new simple model equation (diffusion equation)
+* read parameters from a configuration file
+* create a type tag and specialize properties for it
+* generate a randomly distributed intial field (with MPI parallelism)
+* solve a time-depedent diffusion problem in parallel
+
+__Model equations:__ A diffusion equation model fully developed and contained within the example<br />
+__Discretization method:__ Vertex-centered finite volumes / control-volume finite elements (Lagrange, P1) (`BoxModel`)
+
+</td>
+<td width="35%"><a href="diffusion/README.md">
+<figure><img src="diffusion/img/diffusion.gif" alt="diffusion"/></figure></td>
+</a></td>
+</tr></table>
+
+### [:open_file_folder: Example 2: One-phase flow and tracer transport](1ptracer/README.md)
 
 <table><tr><td>
 
@@ -29,7 +51,7 @@ __Discretization method:__ Cell-centered finite volumes with two-point flux appr
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 2: Two-phase flow with infiltration and adaptive grid](2pinfiltration/README.md)
+### [:open_file_folder: Example 3: Two-phase flow with infiltration and adaptive grid](2pinfiltration/README.md)
 
 <table><tr><td>
 
@@ -47,14 +69,13 @@ You learn how to
 
 __Model equations:__ Immiscible two-phase flow Darcy equations in porous media (`TwoP`)<br />
 __Discretization method:__ Cell-centered finite volumes with two-point flux approximation (`CCTpfaModel`)
-0
 </td>
 <td width="35%"><a href="2pinfiltration/README.md">
 <figure><img src="2pinfiltration/img/test_2p_pointsource_adaptive.png" alt="2p result"/></figure>
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 3: Shallow water model](shallowwaterfriction/README.md)
+### [:open_file_folder: Example 4: Shallow water model](shallowwaterfriction/README.md)
 
 <table><tr><td>
 
@@ -74,7 +95,7 @@ __Discretization method:__ Cell-centered finite volumes with Riemann solver (`CC
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 4: Freeflow channel](freeflowchannel/README.md)
+### [:open_file_folder: Example 5: Freeflow channel](freeflowchannel/README.md)
 
 <table><tr><td>
 
@@ -94,7 +115,7 @@ __Discretization method:__ Finite volumes with staggered grid arrangement (`Stag
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 5: One-phase flow with rotation-symmetric solution](1protationsymmetry/README.md)
+### [:open_file_folder: Example 6: One-phase flow with rotation-symmetric solution](1protationsymmetry/README.md)
 
 <table><tr><td>
 
@@ -115,7 +136,7 @@ __Discretization method:__ Vertex-centered finite volumes / control-volume finit
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 6: Biomineralization](biomineralization/README.md)
+### [:open_file_folder: Example 7: Biomineralization](biomineralization/README.md)
 
 <table><tr><td>
 
@@ -140,7 +161,7 @@ __Discretization method:__ Vertex-centered finite volumes / control-volume finit
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 7: Lid-driven cavity](liddrivencavity/README.md)
+### [:open_file_folder: Example 8: Lid-driven cavity](liddrivencavity/README.md)
 
 <table><tr><td>
 
@@ -161,7 +182,7 @@ __Discretization method:__ Finite volumes with staggered grid arrangement (`Stag
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 8: Permeability estimation using a pore-network model](porenetwork_upscaling/README.md)
+### [:open_file_folder: Example 9: Permeability estimation using a pore-network model](porenetwork_upscaling/README.md)
 
 <table><tr><td>
 
@@ -182,7 +203,7 @@ __Discretization method:__ Pore-network (`PoreNetworkModel`)
 </a></td>
 </tr></table>
 
-### [:open_file_folder: Example 9: Embedded network 1D-3D model (tissue perfusion)](embedded_network_1d3d/README.md)
+### [:open_file_folder: Example 10: Embedded network 1D-3D model (tissue perfusion)](embedded_network_1d3d/README.md)
 
 <table><tr><td>
 
@@ -203,13 +224,3 @@ __Discretization method:__ Cell-centered finite volumes with two-point flux appr
 <figure><img src="embedded_network_1d3d/img/network.png" alt="blood vessel network"/></figure></td>
 </a></td>
 </tr></table>
-
-### [:open_file_folder: Example 10: A random concentration field diffusing](diffusion/README.md)
-<table><tr><td>
-In this example a 2D domain is initialized with a random concentration field that is then subject to diffusion while neglecting convection.
-
-</td>
-<td width="35%"><a href="diffusion/README.md">
-<figure><img src="diffusion/img/diffusion.gif" alt="diffusion"/></figure></td>
-</a></td>
-</tr></table>
diff --git a/examples/diffusion/.doc_config b/examples/diffusion/.doc_config
index 9fba5d44d2942283b157f013b5a535b78b1aad98..41c2bd836b90be017f3c3c2e63db165b4af5784e 100644
--- a/examples/diffusion/.doc_config
+++ b/examples/diffusion/.doc_config
@@ -1,5 +1,27 @@
 {
-    "README.md" : [
+    "README.md": [
         "doc/_intro.md"
-    ]
+    ],
+
+    "doc/model.md" : [
+        "doc/_model.md",
+        "model.hh"
+    ],
+
+    "doc/main.md" : [
+        "doc/_main.md",
+        "main.cc"
+    ],
+
+    "navigation" : {
+        "mainpage" : "README.md",
+        "subpages" : [
+            "doc/model.md",
+            "doc/main.md"
+        ],
+        "subtitles" : [
+            "Model implementation",
+            "Main program"
+        ]
+    }
 }
diff --git a/examples/diffusion/README.md b/examples/diffusion/README.md
index e31ae16231c2427039b44c62a3fa7a49aa4475bf..d54937c4e16e64e59fa8b14a10393212affde86b 100644
--- a/examples/diffusion/README.md
+++ b/examples/diffusion/README.md
@@ -1,26 +1,125 @@
 <!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
 
-# Simple diffusion example
+# Diffusion equation
 
-In this example a random concentration field is subject to diffusion only
+This example is implemented in three files, `model.hh`, `params.input`, `main.cc`.
+The `model.hh` header implements a simple self-contained diffusion equation model
+to be used with a control-volume finite element discretization like the Box method.
 
 __The main points illustrated in this example are__
-* TODO
 
-__Table of contents__. This description is structured as follows:
+* Setting up a simple model
+* Random initial solution in parallel
+* Solving a time-dependent diffusion problem
+* Use the linear PDE solver
+* Reuse the Jacobian for all time steps
 
-[[_TOC_]]
+[TOC]
 
-## Problem set-up
+## Equation and problem description
 
-TODO
+The scalar diffusion equation on a domain $\Omega \subset \mathbb{R}^2$
+with boundary $\partial\Omega = \Gamma_D \cup \Gamma_N$ composed of Dirichlet and Neummann boundaries
+reads
 
-## Model description
+```math
+\begin{aligned}
+\frac{\partial c}{\partial t} - \nabla \cdot (D \nabla c) &= 0 \quad \mathrm{in}\; \Omega \\
+                                                        c &= c_D  \quad \mathrm{on}\; \Gamma_D \\
+                         -D \nabla c \cdot \boldsymbol{n} &= g_N    \quad \mathrm{on}\; \Gamma_N \\
+\end{aligned}
+```
 
-TODO
+with the unknown concentration $c(x,t)$, diffusion coefficient $D$, Dirichlet data $c_D$, Neumann data $g_N$,
+and $\boldsymbol{n}$ denoting the outward-oriented unit normal vector on $\partial\Omega$.
+In this example we will use homogenenous Neumann boundary conditions
+on all of $\partial\Omega$ such that $\Gamma_D = \emptyset$ and $g_N = 0$.
 
-### The simple diffusion model
+We partition $\Omega$ into control volumes $B$ (sometimes called "boxes"). Integrating our equation over a control volume $B$
+and applying the divergence theorem yields
 
-TODO
+```math
+\begin{aligned}
+\int_B \frac{\partial c}{\partial t} \mathrm{d}V - \int_{\partial B} D (\nabla c \cdot \boldsymbol{n}) \mathrm{d}A &= 0.
+\end{aligned}
+```
 
+We discretize in time using an implicit Euler method
 
+```math
+\begin{aligned}
+\int_B \frac{c^{n+1}-c^{n}}{\Delta t} \mathrm{d}V - \int_{\partial B} D (\nabla c^{n+1} \cdot \boldsymbol{n}) \mathrm{d}A &= 0.
+\end{aligned}
+```
+The initial data is given by $c^0 := c(x, 0)$. In our example, $c^0$ is a random field, where the control volume
+averages $c^0_{h,B}$ are sampled from a uniform distribution, $c^0_{h,B} \sim U(0,1)$.
+
+For the Box method, the control volumes are centered at grid vertices. We partition the control volume boundary
+into sub control volume faces $\sigma$ such that each $\sigma$ belongs to exactly one grid element. On each element $K$,
+the discrete solution is represented by a linear basis functions as
+
+```math
+c_{h,B}(x) = \sum\limits_{K \in \mathcal{B}_K} c_{h,B} N_K(x)
+```
+
+where $\mathcal{B}_K$ is the set of control volumes intersecting element $K$ and $N_B$ are the basis functions associated
+with the vertex corresponding to the center of control volume $B$. Insertion of this ansatz yields the discrete equation
+
+```math
+\begin{aligned}
+\vert B \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{BK}} \left[ D \sum_{B \in \mathcal{B}_K} c^{n+1}_{h,B} \nabla N_B \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right] &= 0,
+\end{aligned}
+```
+
+where $\Sigma_{BK}$ is the set of sub control volume faces which belong to $B$ and $K$, and $\boldsymbol{n}_{B,\sigma}$ is the outward-oriented
+unit normal vector on $\partial B$.
+
+### Model parameters
+
+* $\Omega = [0,1]\times[0,1]$ (unit square)
+* $T = [0,5]$ with time step size $\Delta t =  0.1$
+* $100 \times 100$ cells (structured Cartesian grid)
+* $D = 0.0001$
+* $c^0_{h,B} \sim U(0,1)$
+
+### Simulation result
+
+The simulation result will look something like this.
+
+<figure><img src="img/diffusion.gif" alt="diffusion"/></figure>
+
+### Running the example in parallel
+
+By default Dumux will try to speed up the assembly by using shared memory parallelism if a suitable
+backend has been found on your system (one of TBB, OpenMP, Kokkos, C++ parallel algorithms).
+You can limit the number of threads by prepending your executable with `DUMUX_NUM_THREADS=<number>`.
+If you also want to use distributed memory parallelsim with MPI (works better for solvers at the moment),
+run the executable with your MPI environment. Each MPI process will use multi-threading if
+`DUMUX_NUM_THREADS` is larger than $1$.
+
+Running the example with four MPI processes (distribution memory parallelsim)
+each with two threads (shared memory parallelism):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion
+```
+
+You can set the parameter `Grid.Overlap` to some non-zero integer in `param.input`
+to turn the domain decomposition into an overlapping decomposition where
+`Grid.Overlap` specifies the number of grid cells in the overlap between processes.
+This can help to increase the convergence speed of the linear solver.
+
+# Implementation
+
+For the code implementation see Part I and Part II.
+
+## Part 1: Model implementation
+
+| [:arrow_right: Click to continue with part 1 of the documentation](doc/model.md) |
+|---:|
+
+
+## Part 2: Main program
+
+| [:arrow_right: Click to continue with part 2 of the documentation](doc/main.md) |
+|---:|
diff --git a/examples/diffusion/doc/_intro.md b/examples/diffusion/doc/_intro.md
index a9a249b81474d10f52c77e19b850440f16b2e2d4..5ca71136bd313a8ff721785681b24784ef940aa4 100644
--- a/examples/diffusion/doc/_intro.md
+++ b/examples/diffusion/doc/_intro.md
@@ -1,24 +1,112 @@
-# Simple diffusion example
+# Diffusion equation
 
-In this example a random concentration field is subject to diffusion only
+This example is implemented in three files, `model.hh`, `params.input`, `main.cc`.
+The `model.hh` header implements a simple self-contained diffusion equation model
+to be used with a control-volume finite element discretization like the Box method.
 
 __The main points illustrated in this example are__
-* TODO
 
-__Table of contents__. This description is structured as follows:
+* Setting up a simple model
+* Random initial solution in parallel
+* Solving a time-dependent diffusion problem
+* Use the linear PDE solver
+* Reuse the Jacobian for all time steps
 
-[[_TOC_]]
+[TOC]
 
-## Problem set-up
+## Equation and problem description
 
-TODO
+The scalar diffusion equation on a domain $\Omega \subset \mathbb{R}^2$
+with boundary $\partial\Omega = \Gamma_D \cup \Gamma_N$ composed of Dirichlet and Neummann boundaries
+reads
 
-## Model description
+```math
+\begin{aligned}
+\frac{\partial c}{\partial t} - \nabla \cdot (D \nabla c) &= 0 \quad \mathrm{in}\; \Omega \\
+                                                        c &= c_D  \quad \mathrm{on}\; \Gamma_D \\
+                         -D \nabla c \cdot \boldsymbol{n} &= g_N    \quad \mathrm{on}\; \Gamma_N \\
+\end{aligned}
+```
 
-TODO
+with the unknown concentration $c(x,t)$, diffusion coefficient $D$, Dirichlet data $c_D$, Neumann data $g_N$,
+and $\boldsymbol{n}$ denoting the outward-oriented unit normal vector on $\partial\Omega$.
+In this example we will use homogenenous Neumann boundary conditions
+on all of $\partial\Omega$ such that $\Gamma_D = \emptyset$ and $g_N = 0$.
 
-### The simple diffusion model
+We partition $\Omega$ into control volumes $B$ (sometimes called "boxes"). Integrating our equation over a control volume $B$
+and applying the divergence theorem yields
 
-TODO
+```math
+\begin{aligned}
+\int_B \frac{\partial c}{\partial t} \mathrm{d}V - \int_{\partial B} D (\nabla c \cdot \boldsymbol{n}) \mathrm{d}A &= 0.
+\end{aligned}
+```
 
+We discretize in time using an implicit Euler method
 
+```math
+\begin{aligned}
+\int_B \frac{c^{n+1}-c^{n}}{\Delta t} \mathrm{d}V - \int_{\partial B} D (\nabla c^{n+1} \cdot \boldsymbol{n}) \mathrm{d}A &= 0.
+\end{aligned}
+```
+The initial data is given by $c^0 := c(x, 0)$. In our example, $c^0$ is a random field, where the control volume
+averages $c^0_{h,B}$ are sampled from a uniform distribution, $c^0_{h,B} \sim U(0,1)$.
+
+For the Box method, the control volumes are centered at grid vertices. We partition the control volume boundary
+into sub control volume faces $\sigma$ such that each $\sigma$ belongs to exactly one grid element. On each element $K$,
+the discrete solution is represented by a linear basis functions as
+
+```math
+c_{h,B}(x) = \sum\limits_{K \in \mathcal{B}_K} c_{h,B} N_K(x)
+```
+
+where $\mathcal{B}_K$ is the set of control volumes intersecting element $K$ and $N_B$ are the basis functions associated
+with the vertex corresponding to the center of control volume $B$. Insertion of this ansatz yields the discrete equation
+
+```math
+\begin{aligned}
+\vert B \vert \frac{c_B^{n+1}-c_B^{n}}{\Delta t} - \sum_{\sigma \in \Sigma_{BK}} \left[ D \sum_{B \in \mathcal{B}_K} c^{n+1}_{h,B} \nabla N_B \cdot \boldsymbol{n}_{B,\sigma} \vert \sigma \vert \right] &= 0,
+\end{aligned}
+```
+
+where $\Sigma_{BK}$ is the set of sub control volume faces which belong to $B$ and $K$, and $\boldsymbol{n}_{B,\sigma}$ is the outward-oriented
+unit normal vector on $\partial B$.
+
+### Model parameters
+
+* $\Omega = [0,1]\times[0,1]$ (unit square)
+* $T = [0,5]$ with time step size $\Delta t =  0.1$
+* $100 \times 100$ cells (structured Cartesian grid)
+* $D = 0.0001$
+* $c^0_{h,B} \sim U(0,1)$
+
+### Simulation result
+
+The simulation result will look something like this.
+
+<figure><img src="img/diffusion.gif" alt="diffusion"/></figure>
+
+### Running the example in parallel
+
+By default Dumux will try to speed up the assembly by using shared memory parallelism if a suitable
+backend has been found on your system (one of TBB, OpenMP, Kokkos, C++ parallel algorithms).
+You can limit the number of threads by prepending your executable with `DUMUX_NUM_THREADS=<number>`.
+If you also want to use distributed memory parallelsim with MPI (works better for solvers at the moment),
+run the executable with your MPI environment. Each MPI process will use multi-threading if
+`DUMUX_NUM_THREADS` is larger than $1$.
+
+Running the example with four MPI processes (distribution memory parallelsim)
+each with two threads (shared memory parallelism):
+
+```sh
+DUMUX_NUM_THREADS=2 mpirun -np 4 ./example_diffusion
+```
+
+You can set the parameter `Grid.Overlap` to some non-zero integer in `param.input`
+to turn the domain decomposition into an overlapping decomposition where
+`Grid.Overlap` specifies the number of grid cells in the overlap between processes.
+This can help to increase the convergence speed of the linear solver.
+
+# Implementation
+
+For the code implementation see Part I and Part II.
diff --git a/examples/diffusion/doc/_main.md b/examples/diffusion/doc/_main.md
new file mode 100644
index 0000000000000000000000000000000000000000..a5e58e79849017eb71736e21b4d9d06e01254c7d
--- /dev/null
+++ b/examples/diffusion/doc/_main.md
@@ -0,0 +1 @@
+# Diffusion equation main program
diff --git a/examples/diffusion/doc/_model.md b/examples/diffusion/doc/_model.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f465c4d9706034e266fdec12564aa52463410e5
--- /dev/null
+++ b/examples/diffusion/doc/_model.md
@@ -0,0 +1 @@
+# Diffusion equation model definition
diff --git a/examples/diffusion/doc/main.md b/examples/diffusion/doc/main.md
new file mode 100644
index 0000000000000000000000000000000000000000..58ce521691bc047fa86cbd6ed662655001bfd2e2
--- /dev/null
+++ b/examples/diffusion/doc/main.md
@@ -0,0 +1,443 @@
+<!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_left: Go back to part 1](model.md) |
+|---|---:|
+
+# Diffusion equation main program
+
+
+In the file `main.cc`, we use the previously defined model to
+setup the simulation. The setup consist of four steps:
+1. Define the problem setting boundary conditions and the diffusion coefficient
+2. Configure the property system reusing the model defined in Part I
+3. Define a function for setting the random initial condition
+4. The main program defining all steps of the program
+
+[TOC]
+
+We start `model.hh` with the necessary header includes:
+<details><summary> Click to show includes</summary>
+
+```cpp
+
+#include <config.h>
+```
+
+Include the header containing std::true_type and std::false_type
+
+```cpp
+#include <type_traits>
+```
+
+Include the headers useful for the random initial solution
+
+```cpp
+#include <random>
+#include <dumux/common/random.hh>
+```
+
+As Dune grid interface implementation we will use
+the structured parallel grid manager YaspGrid
+
+```cpp
+#include <dune/grid/yaspgrid.hh>
+```
+
+Common includes for problem and main
+
+```cpp
+#include <dumux/common/initialize.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/parameters.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/boundarytypes.hh>
+#include <dumux/common/fvproblem.hh>
+
+// VTK output functionality
+#include <dumux/io/vtkoutputmodule.hh>
+#include <dumux/io/grid/gridmanager_yasp.hh>
+
+// the discretization method
+#include <dumux/discretization/box.hh>
+
+// assembly and solver
+#include <dumux/linear/linearsolvertraits.hh>
+#include <dumux/linear/linearalgebratraits.hh>
+#include <dumux/linear/istlsolvers.hh>
+#include <dumux/linear/pdesolver.hh>
+#include <dumux/assembly/fvassembler.hh>
+```
+
+
+Finally, we include the model defined in Part I
+
+```cpp
+#include "model.hh"
+```
+
+</details>
+
+## 1. The problem class
+
+The problem class implements the boundary conditions. It also provides
+an interface that is used by the local residual (see Part I) to obtain the diffusion
+coefficient. The value is read from the parameter configuration tree.
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+namespace Dumux {
+
+template<class TypeTag>
+class DiffusionTestProblem : public FVProblem<TypeTag>
+{
+```
+
+<details><summary> Click to show boilerplate code</summary>
+
+```cpp
+
+    using ParentType = FVProblem<TypeTag>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using GridView = typename GetPropType<TypeTag, Properties::GridGeometry>::GridView;
+    using Element = typename GridView::template Codim<0>::Entity;
+    using GlobalPosition = typename Element::Geometry::GlobalCoordinate;
+
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using PrimaryVariables = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    using NumEqVector = Dumux::NumEqVector<PrimaryVariables>;
+    using BoundaryTypes = Dumux::BoundaryTypes<GetPropType<TypeTag, Properties::ModelTraits>::numEq()>;
+    using Indices = typename GetPropType<TypeTag, Properties::ModelTraits>::Indices;
+```
+
+</details>
+
+```cpp
+
+public:
+```
+
+In the constructor, we read the diffusion coefficient constant from the
+parameter tree (which is initialized with the content of `params.input`).
+
+```cpp
+    DiffusionTestProblem(std::shared_ptr<const GridGeometry> gridGeometry)
+    : ParentType(gridGeometry)
+    {
+        diffusionCoefficient_ = getParam<Scalar>("Problem.DiffusionCoefficient");
+    }
+```
+
+As boundary conditions, we specify Neumann everywhere. This means, we
+have to prescribe a flux at each boundary sub control volume face
+
+```cpp
+    BoundaryTypes boundaryTypesAtPos(const GlobalPosition& globalPos) const
+    {
+        BoundaryTypes values;
+        values.setAllNeumann();
+        return values;
+    }
+```
+
+We prescribe zero flux over all of the boundary
+
+```cpp
+    NumEqVector neumannAtPos(const GlobalPosition& globalPos) const
+    { return { 0.0 }; }
+```
+
+The diffusion coefficient interface is used in the local residual (see Part I)
+We can name this interface however we want as long as we adapt the calling site
+in the `LocalResidual` class in `model.hh`.
+
+```cpp
+    Scalar diffusionCoefficient() const
+    { return diffusionCoefficient_; }
+
+private:
+    Scalar diffusionCoefficient_;
+};
+} // end namespace Dumux
+```
+
+
+</details>
+
+## 2. Property tag and specializations
+
+We create a new tag `DiffusionTest` that inherits all properties
+specialized for the tag `DiffusionModel` (we created this in Part I)
+and the tag `BoxModel` which provides types relevant for the spatial
+discretization scheme (see [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh)).
+
+Here we choose a short form of specializing properties. The property
+system also recognizes an alias (`using`) with the property name being
+a member of the specified type tag. Note that we could also use the same mechanism
+as in (Part I), for example:
+```code
+template<class TypeTag>
+struct Scalar<TypeTag, TTag::DiffusionTest>
+{ using type = double; };
+```
+which has the same effect as having an alias `Scalar = double;`
+as member of the type tag `DiffusionTest`.
+This mechanism allows for a terser code expression.
+In case both definitions are present for the same type tag, the explicit
+specialization (long form) takes precedence.
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+namespace Dumux::Properties::TTag {
+
+struct DiffusionTest
+{
+    using InheritsFrom = std::tuple<DiffusionModel, BoxModel>;
+
+    using Scalar = double;
+    using Grid = Dune::YaspGrid<2>;
+
+    template<class TypeTag>
+    using Problem = DiffusionTestProblem<TypeTag>;
+
+    using EnableGridVolumeVariablesCache = std::true_type;
+    using EnableGridFluxVariablesCache = std::true_type;
+    using EnableGridGeometryCache = std::true_type;
+};
+
+} // end namespace Dumux::Properties::TTag
+```
+
+
+</details>
+
+## 3. Creating the initial solution
+
+We want to initialize the entries of the solution vector $c_{h,B}$
+with a random field such that $c_{h,B} \sim U(0,1)$. When running
+with MPI in parallel this requires communication. With just one
+processor (`gg.gridView().comm().size() == 1`), we can just iterate
+over all entries of the solution vector and assign a uniformly
+distributed random number. However, with multiple processes, the
+solution vector only contains a subset of the degrees of freedom
+living on the processor. Moreover, some degrees of freedom exist
+on multiple processes. Each processor generates their own random
+number which means we might generate different numbers for the
+same degree of freedom. Therefore, we communicate the number.
+The idea is that each process adds its rank number (processes are
+numbered starting from 0) to its value. Then we take the minimum
+over all processes which will take return the value of the processor
+with the lowest rank. Afterwards, we subtract the added rank offset.
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+template<class SolutionVector, class GridGeometry>
+SolutionVector createInitialSolution(const GridGeometry& gg)
+{
+    SolutionVector sol(gg.numDofs());
+```
+
+Generate random number and add processor offset
+For sequential run, `rank` always returns `0`.
+
+```cpp
+    std::mt19937 gen(0); // seed is 0 for deterministic results
+    Dumux::SimpleUniformDistribution<> dis(0.0, 1.0);
+
+    const auto rank = gg.gridView().comm().rank();
+    for (int n = 0; n < sol.size(); ++n)
+        sol[n] = dis(gen) + rank;
+```
+
+We, take the value of the processor with the minimum rank
+and subtract the rank offset
+
+```cpp
+    if (gg.gridView().comm().size() > 1)
+    {
+        Dumux::VectorCommDataHandleMin<
+            typename GridGeometry::VertexMapper,
+            SolutionVector,
+            GridGeometry::GridView::dimension
+        > minHandle(gg.vertexMapper(), sol);
+        gg.gridView().communicate(minHandle, Dune::All_All_Interface, Dune::ForwardCommunication);
+```
+
+remove processor offset
+
+```cpp
+        for (int n = 0; n < sol.size(); ++n)
+            sol[n][0] -= std::floor(sol[n][0]);
+    }
+
+    return sol;
+}
+```
+
+
+</details>
+
+## 4. The main program
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../main.cc))</summary>
+
+
+```cpp
+int main(int argc, char** argv)
+{
+    using namespace Dumux;
+```
+
+First, we take case to initialize MPI and the multithreading backend.
+This convenience function takes care the everything is setup in the right order and
+has to be called for every Dumux simulation. `Dumux::initialize` also respects
+the environment variable `DUMUX_NUM_THREADS` to restrict to amount of available cores
+for multi-threaded code parts (for example the assembly).
+
+```cpp
+    Dumux::initialize(argc, argv);
+```
+
+We initialize parameter tree including command line arguments.
+This will, per default, read all parameters from the configuration file `params.input`
+if such as file exists. Then it will look for command line arguments. For example
+`./example_diffusion -TimeLoop.TEnd 10` will set the end time to 10 second.
+Command line arguments overwrite settings in the parameter file.
+
+```cpp
+    Parameters::init(argc, argv);
+```
+
+We specify an alias for the model type tag
+We will configure the assembler with this type tag that
+we specialized all these properties for above and in the model definition (Part I)
+We can extract type information through properties specialized for the type tag
+using `GetPropType`.
+
+```cpp
+    using TypeTag = Properties::TTag::DiffusionTest;
+
+    using Scalar = GetPropType<TypeTag, Properties::Scalar>;
+    using Grid = GetPropType<TypeTag, Properties::Grid>;
+    using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
+    using Problem = GetPropType<TypeTag, Properties::Problem>;
+    using SolutionVector = GetPropType<TypeTag, Properties::SolutionVector>;
+    using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
+```
+
+First, we initialize the grid. Grid parameters are taken from the input file
+from the group `[Grid]` if it exists. You can also pass any other parameter
+group (e.g. `"MyGroup"`) to `init` and then it will look in `[MyGroup.Grid]`.
+
+```cpp
+    GridManager<Grid> gridManager;
+    gridManager.init();
+```
+
+We, create the finite volume grid geometry from the (leaf) grid view,
+the problem for the boundary conditions, a solution vector and a grid variables instance.
+
+```cpp
+    auto gridGeometry = std::make_shared<GridGeometry>(gridManager.grid().leafGridView());
+    auto problem = std::make_shared<Problem>(gridGeometry);
+    auto sol = createInitialSolution<SolutionVector>(*gridGeometry);
+    auto gridVariables = std::make_shared<GridVariables>(problem, gridGeometry);
+    gridVariables->init(sol);
+```
+
+Ww initialize the VTK output module and write out the initial concentration field
+
+```cpp
+    VtkOutputModule<GridVariables, SolutionVector> vtkWriter(*gridVariables, sol, problem->name());
+    vtkWriter.addVolumeVariable([](const auto& vv){ return vv.priVar(0); }, "c");
+    vtkWriter.write(0.0);
+```
+
+We instantiate time loop using start and end time as well as
+the time step size from the parameter tree (`params.input`)
+
+```cpp
+    auto timeLoop = std::make_shared<CheckPointTimeLoop<Scalar>>(
+        getParam<Scalar>("TimeLoop.TStart", 0.0),
+        getParam<Scalar>("TimeLoop.Dt"),
+        getParam<Scalar>("TimeLoop.TEnd")
+    );
+```
+
+Next, we choose the type of assembler, linear solver and PDE solver
+and construct instances of these classes.
+
+```cpp
+    using Assembler = FVAssembler<TypeTag, DiffMethod::numeric>;
+    using LinearSolver = SSORCGIstlSolver<LinearSolverTraits<GridGeometry>, LinearAlgebraTraitsFromAssembler<Assembler>>;
+    using Solver = Dumux::LinearPDESolver<Assembler, LinearSolver>;
+
+    auto oldSol = sol; // copy the vector to store state of previous time step
+    auto assembler = std::make_shared<Assembler>(problem, gridGeometry, gridVariables, timeLoop, oldSol);
+    auto linearSolver = std::make_shared<LinearSolver>(gridGeometry->gridView(), gridGeometry->dofMapper());
+    Solver solver(assembler, linearSolver);
+```
+
+We tell the assembler to create the system matrix and vector objects. Then we
+assemble the system matrix once and then tell the solver to reuse in every time step.
+Since we have a linear problem, the matrix is not going to change.
+
+```cpp
+    assembler->setLinearSystem();
+    assembler->assembleJacobian(sol);
+    solver.reuseMatrix();
+```
+
+The time loop is where most of the actual computations happen.
+We assemble and solve the linear system of equations, update the solution,
+write the solution to a VTK file and continue until the we reach the
+final simulation time.
+
+
+```cpp
+    timeLoop->start(); do
+    {
+        // assemble & solve
+        solver.solve(sol);
+
+        // make the new solution the old solution
+        oldSol = sol;
+        gridVariables->advanceTimeStep();
+
+        // advance to the time loop to the next step
+        timeLoop->advanceTimeStep();
+
+        // write VTK output (writes out the concentration field)
+        vtkWriter.write(timeLoop->time());
+
+        // report statistics of this time step
+        timeLoop->reportTimeStep();
+
+    } while (!timeLoop->finished());
+
+    timeLoop->finalize(gridGeometry->gridView().comm());
+
+    return 0;
+}
+```
+
+
+</details>
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_left: Go back to part 1](model.md) |
+|---|---:|
+
diff --git a/examples/diffusion/doc/model.md b/examples/diffusion/doc/model.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4ee1e8600af74fad91f80d48ae59e068db45b41
--- /dev/null
+++ b/examples/diffusion/doc/model.md
@@ -0,0 +1,233 @@
+<!-- Important: This file has been automatically generated by generate_example_docs.py. Do not edit this file directly! -->
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_right: Continue with part 2](main.md) |
+|---|---:|
+
+# Diffusion equation model definition
+
+
+In the file `model.hh`, we define the model equations and
+set all default model properties. The setup consist of three steps:
+1. Create a model type tag (used to specialize properties)
+2. Define the local residual class implementing the discrete equation
+3. Specialize important properties of the model such that Dumux knows how to assemble the system matrix
+
+[TOC]
+
+We start `model.hh` with the necessary header includes:
+<details><summary> Click to show includes</summary>
+
+```cpp
+
+#include <dumux/common/math.hh>
+#include <dumux/common/properties.hh>
+#include <dumux/common/numeqvector.hh>
+#include <dumux/common/volumevariables.hh>
+#include <dumux/discretization/method.hh>
+```
+
+</details>
+
+## 1. Property Tag
+
+The property tag is simply an empty struct with the name `DiffusionModel`
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+```cpp
+namespace Dumux::Properties::TTag {
+//! The diffusion model tag that we can specialize properties for
+struct DiffusionModel {};
+} // end namespace Dumux::Properties::TTag
+```
+
+
+</details>
+
+## 2. The local (element-wise) residual
+
+The local residual assembles the contribution to the residual for
+all degrees of freedom associated with an element. Here, we use the
+Box method which is based on $P_1$ basis functions (piece-wise linears)
+and the degrees of freedom are on the nodes. Each node is associate with
+exactly one sub control volume (`scv`) per element and several ($2$ in $\mathbb{R}^2$)
+sub control volume faces (`scvf`). In the local residual, we can implement the
+constribution for one `scv` (storage and source terms) or one `scvf` (flux terms).
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+```cpp
+namespace Dumux {
+template<class TypeTag>
+class DiffusionModelLocalResidual
+: public GetPropType<TypeTag, Properties::BaseLocalResidual>
+{
+```
+
+
+**Storage term:** Evaluate the rate of change of all conserved quantities
+
+```cpp
+    NumEqVector computeStorage(const Problem& problem,
+                               const SubControlVolume& scv,
+                               const VolumeVariables& volVars) const
+    {
+        NumEqVector storage;
+        storage[Indices::massBalanceEqIdx] = volVars.priVar(Indices::concentrationIdx);
+        return storage;
+    }
+```
+
+**Flux term:** Evaluate the fluxes over a face of a sub control volume
+Here we evaluate the (integrated) flux
+
+```math
+F = -D \sum_{B \in \mathcal{B}_K} \left( c_{h,B} \nabla N_B \right) \cdot\boldsymbol{n} \vert \sigma \vert
+````
+
+
+```cpp
+    NumEqVector computeFlux(const Problem& problem,
+                            const Element& element,
+                            const FVElementGeometry& fvGeometry,
+                            const ElementVolumeVariables& elemVolVars,
+                            const SubControlVolumeFace& scvf,
+                            const ElementFluxVariablesCache& elemFluxVarsCache) const
+    {
+        static_assert(DiscretizationMethods::isCVFE<typename GridGeometry::DiscretizationMethod>,
+            "This local residual is hard-coded to control-volume finite element schemes");
+
+        // Compute ∇c at the integration point of this sub control volume face.
+        const auto& fluxVarCache = elemFluxVarsCache[scvf];
+        Dune::FieldVector<Scalar, dimWorld> gradConcentration(0.0);
+        for (const auto& scv : scvs(fvGeometry))
+        {
+            const auto& volVars = elemVolVars[scv];
+            // v.axpy(a, w) means v <- v + a*w
+            gradConcentration.axpy(
+                volVars.priVar(Indices::concentrationIdx),
+                fluxVarCache.gradN(scv.indexInElement())
+            );
+        }
+
+        NumEqVector flux;
+
+        // Compute the flux with `vtmv` (vector transposed times matrix times vector) or -n^T D ∇c A.
+        // The diffusion coefficient comes from the `problem` (see Part II of the example).
+        flux[Indices::massBalanceEqIdx] = -1.0*vtmv(
+            scvf.unitOuterNormal(), problem.diffusionCoefficient(), gradConcentration
+        )*scvf.area();
+
+        return flux;
+    }
+};
+} // end namespace Dumux
+```
+
+
+</details>
+
+## 3. The model properties
+
+By specializing properties for our type tag `DiffusionModel`,
+every other class that knows about the type tag (this will be
+for example the assembler or the problem), can extract the
+type information the we specify here.
+
+Note that these types can be overwritten for specific problem
+definitions if this is needed (we will show this on the next page).
+
+
+<details open>
+<summary><b>Click to hide/show the file documentation</b> (or inspect the [source code](../model.hh))</summary>
+
+
+```cpp
+namespace Dumux::Properties {
+```
+
+The type of the local residual is the class defined above
+
+```cpp
+template<class TypeTag>
+struct LocalResidual<TypeTag, TTag::DiffusionModel>
+{ using type = DiffusionModelLocalResidual<TypeTag>; };
+```
+
+The default scalar type is double
+we compute with double precision floating point numbers
+
+```cpp
+template<class TypeTag>
+struct Scalar<TypeTag, TTag::DiffusionModel>
+{ using type = double; };
+```
+
+The model traits specify some information about our equation system
+Here we have just one equation. We still specify indices so in the
+places where we access primary variables, we can do so with a named variable.
+
+```cpp
+template<class TypeTag>
+struct ModelTraits<TypeTag, TTag::DiffusionModel>
+{
+    struct type
+    {
+        struct Indices
+        {
+            static constexpr int concentrationIdx = 0;
+            static constexpr int massBalanceEqIdx = 0;
+        };
+
+        static constexpr int numEq() { return 1; }
+    };
+};
+```
+
+The primary variable vector has entries of type `Scalar` and is
+as large as the number of equations (here 1) but we keep it general.
+
+```cpp
+template<class TypeTag>
+struct PrimaryVariables<TypeTag, TTag::DiffusionModel>
+{
+    using type = Dune::FieldVector<
+        GetPropType<TypeTag, Properties::Scalar>,
+        GetPropType<TypeTag, Properties::ModelTraits>::numEq()
+    >;
+};
+```
+
+The `BasicVolumeVariables` are the simples volume variables
+They only store one instance of `PrimaryVariables` for the
+degree of freedom (here: vertex dof) that they are attached to.
+
+```cpp
+template<class TypeTag>
+struct VolumeVariables<TypeTag, TTag::DiffusionModel>
+{
+    struct Traits
+    {
+        using PrimaryVariables
+            = GetPropType<TypeTag, Properties::PrimaryVariables>;
+    };
+    using type = BasicVolumeVariables<Traits>;
+};
+
+} // end namespace Dumux::Properties
+```
+
+
+</details>
+
+
+| [:arrow_left: Back to the main documentation](../README.md) | [:arrow_right: Continue with part 2](main.md) |
+|---|---:|
+
diff --git a/examples/diffusion/img/diffusion.gif b/examples/diffusion/img/diffusion.gif
index 3dcc88d3426d4c2cf60b7ef3620d33d39f720dea..e0aecd597a2e3c7c84b85fae961915ccafb1f6e6 100644
Binary files a/examples/diffusion/img/diffusion.gif and b/examples/diffusion/img/diffusion.gif differ
diff --git a/examples/diffusion/main.cc b/examples/diffusion/main.cc
index ee6cb3cbdfbe393a38259e6bac03cbac909ce874..8e24d2a54bafa1ecc196f095a75022b7f885f6b0 100644
--- a/examples/diffusion/main.cc
+++ b/examples/diffusion/main.cc
@@ -17,14 +17,32 @@
  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.   *
  *****************************************************************************/
 
+// In the file `main.cc`, we use the previously defined model to
+// setup the simulation. The setup consist of four steps:
+// 1. Define the problem setting boundary conditions and the diffusion coefficient
+// 2. Configure the property system reusing the model defined in Part I
+// 3. Define a function for setting the random initial condition
+// 4. The main program defining all steps of the program
+//
+// [TOC]
+//
+// We start `model.hh` with the necessary header includes:
+// [[details]] includes
 #include <config.h>
 
+// Include the header containing std::true_type and std::false_type
 #include <type_traits>
+
+// Include the headers useful for the random initial solution
 #include <random>
+#include <dumux/common/random.hh>
 
+// As Dune grid interface implementation we will use
+// the structured parallel grid manager YaspGrid
 #include <dune/grid/yaspgrid.hh>
 
-#include <dumux/common/random.hh>
+// Common includes for problem and main
+// [[codeblock]]
 #include <dumux/common/initialize.hh>
 #include <dumux/common/properties.hh>
 #include <dumux/common/parameters.hh>
@@ -32,24 +50,38 @@
 #include <dumux/common/boundarytypes.hh>
 #include <dumux/common/fvproblem.hh>
 
+// VTK output functionality
 #include <dumux/io/vtkoutputmodule.hh>
 #include <dumux/io/grid/gridmanager_yasp.hh>
 
+// the discretization method
 #include <dumux/discretization/box.hh>
 
+// assembly and solver
 #include <dumux/linear/linearsolvertraits.hh>
 #include <dumux/linear/linearalgebratraits.hh>
 #include <dumux/linear/istlsolvers.hh>
 #include <dumux/linear/pdesolver.hh>
 #include <dumux/assembly/fvassembler.hh>
-
+// [[/codeblock]]
+//
+// Finally, we include the model defined in Part I
 #include "model.hh"
-
+// [[/details]]
+
+//
+// ## 1. The problem class
+//
+// The problem class implements the boundary conditions. It also provides
+// an interface that is used by the local residual (see Part I) to obtain the diffusion
+// coefficient. The value is read from the parameter configuration tree.
+// [[content]]
 namespace Dumux {
 
 template<class TypeTag>
 class DiffusionTestProblem : public FVProblem<TypeTag>
 {
+    // [[details]] boilerplate code
     using ParentType = FVProblem<TypeTag>;
     using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
     using GridView = typename GetPropType<TypeTag, Properties::GridGeometry>::GridView;
@@ -61,13 +93,18 @@ class DiffusionTestProblem : public FVProblem<TypeTag>
     using NumEqVector = Dumux::NumEqVector<PrimaryVariables>;
     using BoundaryTypes = Dumux::BoundaryTypes<GetPropType<TypeTag, Properties::ModelTraits>::numEq()>;
     using Indices = typename GetPropType<TypeTag, Properties::ModelTraits>::Indices;
+    // [[/details]]
 public:
+    // In the constructor, we read the diffusion coefficient constant from the
+    // parameter tree (which is initialized with the content of `params.input`).
     DiffusionTestProblem(std::shared_ptr<const GridGeometry> gridGeometry)
     : ParentType(gridGeometry)
     {
         diffusionCoefficient_ = getParam<Scalar>("Problem.DiffusionCoefficient");
     }
 
+    // As boundary conditions, we specify Neumann everywhere. This means, we
+    // have to prescribe a flux at each boundary sub control volume face
     BoundaryTypes boundaryTypesAtPos(const GlobalPosition& globalPos) const
     {
         BoundaryTypes values;
@@ -75,18 +112,46 @@ public:
         return values;
     }
 
+    // We prescribe zero flux over all of the boundary
     NumEqVector neumannAtPos(const GlobalPosition& globalPos) const
     { return { 0.0 }; }
 
+    // The diffusion coefficient interface is used in the local residual (see Part I)
+    // We can name this interface however we want as long as we adapt the calling site
+    // in the `LocalResidual` class in `model.hh`.
     Scalar diffusionCoefficient() const
     { return diffusionCoefficient_; }
 
 private:
     Scalar diffusionCoefficient_;
 };
-
 } // end namespace Dumux
-
+// [[/content]]
+
+//
+// ## 2. Property tag and specializations
+//
+// We create a new tag `DiffusionTest` that inherits all properties
+// specialized for the tag `DiffusionModel` (we created this in Part I)
+// and the tag `BoxModel` which provides types relevant for the spatial
+// discretization scheme (see [dumux/discretization/box.hh](https://git.iws.uni-stuttgart.de/dumux-repositories/dumux/-/blob/master/dumux/discretization/box.hh)).
+//
+// Here we choose a short form of specializing properties. The property
+// system also recognizes an alias (`using`) with the property name being
+// a member of the specified type tag. Note that we could also use the same mechanism
+// as in (Part I), for example:
+// ```code
+// template<class TypeTag>
+// struct Scalar<TypeTag, TTag::DiffusionTest>
+// { using type = double; };
+//```
+// which has the same effect as having an alias `Scalar = double;`
+// as member of the type tag `DiffusionTest`.
+// This mechanism allows for a terser code expression.
+// In case both definitions are present for the same type tag, the explicit
+// specialization (long form) takes precedence.
+//
+// [[content]]
 namespace Dumux::Properties::TTag {
 
 struct DiffusionTest
@@ -104,24 +169,45 @@ struct DiffusionTest
     using EnableGridGeometryCache = std::true_type;
 };
 
-} // end namespace Dumux::Properties
-
-// initialize the solution with a random field
-// and make sure this works in parallel as well
+} // end namespace Dumux::Properties::TTag
+// [[/content]]
+
+//
+// ## 3. Creating the initial solution
+//
+// We want to initialize the entries of the solution vector $c_{h,B}$
+// with a random field such that $c_{h,B} \sim U(0,1)$. When running
+// with MPI in parallel this requires communication. With just one
+// processor (`gg.gridView().comm().size() == 1`), we can just iterate
+// over all entries of the solution vector and assign a uniformly
+// distributed random number. However, with multiple processes, the
+// solution vector only contains a subset of the degrees of freedom
+// living on the processor. Moreover, some degrees of freedom exist
+// on multiple processes. Each processor generates their own random
+// number which means we might generate different numbers for the
+// same degree of freedom. Therefore, we communicate the number.
+// The idea is that each process adds its rank number (processes are
+// numbered starting from 0) to its value. Then we take the minimum
+// over all processes which will take return the value of the processor
+// with the lowest rank. Afterwards, we subtract the added rank offset.
+//
+// [[content]]
 template<class SolutionVector, class GridGeometry>
 SolutionVector createInitialSolution(const GridGeometry& gg)
 {
     SolutionVector sol(gg.numDofs());
 
-    // generate random number and add processor offset
+    // Generate random number and add processor offset
+    // For sequential run, `rank` always returns `0`.
     std::mt19937 gen(0); // seed is 0 for deterministic results
     Dumux::SimpleUniformDistribution<> dis(0.0, 1.0);
-    // the rank is zero for sequential runs
+
     const auto rank = gg.gridView().comm().rank();
     for (int n = 0; n < sol.size(); ++n)
         sol[n] = dis(gen) + rank;
 
-    // take the value of the processor with the minimum rank
+    // We, take the value of the processor with the minimum rank
+    // and subtract the rank offset
     if (gg.gridView().comm().size() > 1)
     {
         Dumux::VectorCommDataHandleMin<
@@ -138,23 +224,36 @@ SolutionVector createInitialSolution(const GridGeometry& gg)
 
     return sol;
 }
-
+// [[/content]]
+//
+// ## 4. The main program
+//
+// [[content]]
 int main(int argc, char** argv)
 {
     using namespace Dumux;
 
-    // maybe initialize MPI and/or multithreading backend
+    // First, we take case to initialize MPI and the multithreading backend.
+    // This convenience function takes care the everything is setup in the right order and
+    // has to be called for every Dumux simulation. `Dumux::initialize` also respects
+    // the environment variable `DUMUX_NUM_THREADS` to restrict to amount of available cores
+    // for multi-threaded code parts (for example the assembly).
     Dumux::initialize(argc, argv);
 
-    // initialize parameter tree including command line arguments
+    // We initialize parameter tree including command line arguments.
+    // This will, per default, read all parameters from the configuration file `params.input`
+    // if such as file exists. Then it will look for command line arguments. For example
+    // `./example_diffusion -TimeLoop.TEnd 10` will set the end time to 10 second.
+    // Command line arguments overwrite settings in the parameter file.
     Parameters::init(argc, argv);
 
     // We specify an alias for the model type tag
     // We will configure the assembler with this type tag that
-    // we specialized all these properties for above and in the model definition
+    // we specialized all these properties for above and in the model definition (Part I)
+    // We can extract type information through properties specialized for the type tag
+    // using `GetPropType`.
     using TypeTag = Properties::TTag::DiffusionTest;
 
-    // Moreover, these properties can be extracted to yield type information
     using Scalar = GetPropType<TypeTag, Properties::Scalar>;
     using Grid = GetPropType<TypeTag, Properties::Grid>;
     using GridGeometry = GetPropType<TypeTag, Properties::GridGeometry>;
@@ -162,48 +261,57 @@ int main(int argc, char** argv)
     using SolutionVector = GetPropType<TypeTag, Properties::SolutionVector>;
     using GridVariables = GetPropType<TypeTag, Properties::GridVariables>;
 
-    // initialize the grid
+    // First, we initialize the grid. Grid parameters are taken from the input file
+    // from the group `[Grid]` if it exists. You can also pass any other parameter
+    // group (e.g. `"MyGroup"`) to `init` and then it will look in `[MyGroup.Grid]`.
     GridManager<Grid> gridManager;
     gridManager.init();
 
-    // create the finite volume grid geometry from the (leaf) grid view
-    // the problem for the boundary conditions, a solution vector and gridvariables
+    // We, create the finite volume grid geometry from the (leaf) grid view,
+    // the problem for the boundary conditions, a solution vector and a grid variables instance.
     auto gridGeometry = std::make_shared<GridGeometry>(gridManager.grid().leafGridView());
     auto problem = std::make_shared<Problem>(gridGeometry);
     auto sol = createInitialSolution<SolutionVector>(*gridGeometry);
     auto gridVariables = std::make_shared<GridVariables>(problem, gridGeometry);
     gridVariables->init(sol);
 
-    // initialize the vtk output and write out the initial concentration field
+    // Ww initialize the VTK output module and write out the initial concentration field
     VtkOutputModule<GridVariables, SolutionVector> vtkWriter(*gridVariables, sol, problem->name());
     vtkWriter.addVolumeVariable([](const auto& vv){ return vv.priVar(0); }, "c");
     vtkWriter.write(0.0);
 
-    // instantiate time loop
+    // We instantiate time loop using start and end time as well as
+    // the time step size from the parameter tree (`params.input`)
     auto timeLoop = std::make_shared<CheckPointTimeLoop<Scalar>>(
         getParam<Scalar>("TimeLoop.TStart", 0.0),
         getParam<Scalar>("TimeLoop.Dt"),
         getParam<Scalar>("TimeLoop.TEnd")
     );
 
-    // Choose the type of assembler, linear solver and PDE solver
+    // Next, we choose the type of assembler, linear solver and PDE solver
+    // and construct instances of these classes.
     using Assembler = FVAssembler<TypeTag, DiffMethod::numeric>;
     using LinearSolver = SSORCGIstlSolver<LinearSolverTraits<GridGeometry>, LinearAlgebraTraitsFromAssembler<Assembler>>;
     using Solver = Dumux::LinearPDESolver<Assembler, LinearSolver>;
 
-    // Construct assembler, linear solver and PDE solver
     auto oldSol = sol; // copy the vector to store state of previous time step
     auto assembler = std::make_shared<Assembler>(problem, gridGeometry, gridVariables, timeLoop, oldSol);
     auto linearSolver = std::make_shared<LinearSolver>(gridGeometry->gridView(), gridGeometry->dofMapper());
     Solver solver(assembler, linearSolver);
 
-    // Assemble the system matrix once and then tell the solver to reuse in every time step
-    // Since we have a linear problem, the matrix is not going to change
+    // We tell the assembler to create the system matrix and vector objects. Then we
+    // assemble the system matrix once and then tell the solver to reuse in every time step.
+    // Since we have a linear problem, the matrix is not going to change.
     assembler->setLinearSystem();
     assembler->assembleJacobian(sol);
     solver.reuseMatrix();
 
-    // time loop
+    // The time loop is where most of the actual computations happen.
+    // We assemble and solve the linear system of equations, update the solution,
+    // write the solution to a VTK file and continue until the we reach the
+    // final simulation time.
+    //
+    // [[codeblock]]
     timeLoop->start(); do
     {
         // assemble & solve
@@ -228,3 +336,5 @@ int main(int argc, char** argv)
 
     return 0;
 }
+// [[/codeblock]]
+// [[/content]]
diff --git a/examples/diffusion/model.hh b/examples/diffusion/model.hh
index 7b1d67e75815ed684a12ee9b11061880ce577197..faa06b36542c9e4b9435ccd9742236a24fc7519e 100644
--- a/examples/diffusion/model.hh
+++ b/examples/diffusion/model.hh
@@ -20,27 +20,53 @@
 #ifndef DUMUX_EXAMPLES_DIFFUSION_MODEL_HH
 #define DUMUX_EXAMPLES_DIFFUSION_MODEL_HH
 
+// In the file `model.hh`, we define the model equations and
+// set all default model properties. The setup consist of three steps:
+// 1. Create a model type tag (used to specialize properties)
+// 2. Define the local residual class implementing the discrete equation
+// 3. Specialize important properties of the model such that Dumux knows how to assemble the system matrix
+//
+// [TOC]
+//
+// We start `model.hh` with the necessary header includes:
+// [[details]] includes
 #include <dumux/common/math.hh>
 #include <dumux/common/properties.hh>
 #include <dumux/common/numeqvector.hh>
 #include <dumux/common/volumevariables.hh>
-
 #include <dumux/discretization/method.hh>
-
-namespace Dumux {
-
-namespace Properties::TTag {
+// [[/details]]
+//
+// ## 1. Property Tag
+//
+// The property tag is simply an empty struct with the name `DiffusionModel`
+//
+// [[content]]
+// [[codeblock]]
+namespace Dumux::Properties::TTag {
 //! The diffusion model tag that we can specialize properties for
 struct DiffusionModel {};
-} // end namespace Properties::TTag
-
-/*!
- * \brief The local residual of the diffusion model
- */
+} // end namespace Dumux::Properties::TTag
+// [[/codeblock]]
+// [[/content]]
+//
+// ## 2. The local (element-wise) residual
+//
+// The local residual assembles the contribution to the residual for
+// all degrees of freedom associated with an element. Here, we use the
+// Box method which is based on $P_1$ basis functions (piece-wise linears)
+// and the degrees of freedom are on the nodes. Each node is associate with
+// exactly one sub control volume (`scv`) per element and several ($2$ in $\mathbb{R}^2$)
+// sub control volume faces (`scvf`). In the local residual, we can implement the
+// constribution for one `scv` (storage and source terms) or one `scvf` (flux terms).
+//
+// [[content]]
+namespace Dumux {
 template<class TypeTag>
 class DiffusionModelLocalResidual
 : public GetPropType<TypeTag, Properties::BaseLocalResidual>
 {
+    // [[exclude]]
     // the base local residual is selected depending on the chosen discretization scheme
     using ParentType = GetPropType<TypeTag, Properties::BaseLocalResidual>;
     using Scalar = GetPropType<TypeTag, Properties::Scalar>;
@@ -65,10 +91,10 @@ class DiffusionModelLocalResidual
 
 public:
     using ParentType::ParentType;
-
-    /*!
-     * \brief Evaluate the rate of change of all conserved quantities
-     */
+    // [[/exclude]]
+    //
+    // **Storage term:** Evaluate the rate of change of all conserved quantities
+    // [[codeblock]]
     NumEqVector computeStorage(const Problem& problem,
                                const SubControlVolume& scv,
                                const VolumeVariables& volVars) const
@@ -77,11 +103,16 @@ public:
         storage[Indices::massBalanceEqIdx] = volVars.priVar(Indices::concentrationIdx);
         return storage;
     }
-
-    /*!
-     * \brief Evaluate the fluxes over a face of a sub control volume
-     * Here we evaluate the flow rate, F = -D∇c·n A
-     */
+    // [[/codeblock]]
+
+    // **Flux term:** Evaluate the fluxes over a face of a sub control volume
+    // Here we evaluate the (integrated) flux
+    //
+    // ```math
+    // F = -D \sum_{B \in \mathcal{B}_K} \left( c_{h,B} \nabla N_B \right) \cdot\boldsymbol{n} \vert \sigma \vert
+    // ````
+    //
+    // [[codeblock]]
     NumEqVector computeFlux(const Problem& problem,
                             const Element& element,
                             const FVElementGeometry& fvGeometry,
@@ -89,15 +120,16 @@ public:
                             const SubControlVolumeFace& scvf,
                             const ElementFluxVariablesCache& elemFluxVarsCache) const
     {
-        static_assert(DiscretizationMethods::isCVFE<GridGeometry::DiscretizationMethod>
+        static_assert(DiscretizationMethods::isCVFE<typename GridGeometry::DiscretizationMethod>,
             "This local residual is hard-coded to control-volume finite element schemes");
 
-        // compute ∇c at the intergration point of this sub control volume face
+        // Compute ∇c at the integration point of this sub control volume face.
         const auto& fluxVarCache = elemFluxVarsCache[scvf];
         Dune::FieldVector<Scalar, dimWorld> gradConcentration(0.0);
         for (const auto& scv : scvs(fvGeometry))
         {
             const auto& volVars = elemVolVars[scv];
+            // v.axpy(a, w) means v <- v + a*w
             gradConcentration.axpy(
                 volVars.priVar(Indices::concentrationIdx),
                 fluxVarCache.gradN(scv.indexInElement())
@@ -106,7 +138,8 @@ public:
 
         NumEqVector flux;
 
-        // compute the flux as (v^T M v) or -n^T D ∇c A
+        // Compute the flux with `vtmv` (vector transposed times matrix times vector) or -n^T D ∇c A.
+        // The diffusion coefficient comes from the `problem` (see Part II of the example).
         flux[Indices::massBalanceEqIdx] = -1.0*vtmv(
             scvf.unitOuterNormal(), problem.diffusionCoefficient(), gradConcentration
         )*scvf.area();
@@ -114,32 +147,37 @@ public:
         return flux;
     }
 };
-
 } // end namespace Dumux
-
+// [[/codeblock]]
+// [[/content]]
+//
+// ## 3. The model properties
+//
+// By specializing properties for our type tag `DiffusionModel`,
+// every other class that knows about the type tag (this will be
+// for example the assembler or the problem), can extract the
+// type information the we specify here.
+//
+// Note that these types can be overwritten for specific problem
+// definitions if this is needed (we will show this on the next page).
+//
+// [[content]]
 namespace Dumux::Properties {
 
-//! set the local residual to the one defined above
+// The type of the local residual is the class defined above
 template<class TypeTag>
 struct LocalResidual<TypeTag, TTag::DiffusionModel>
 { using type = DiffusionModelLocalResidual<TypeTag>; };
 
-//! Set the default type of scalar values to double
+// The default scalar type is double
+// we compute with double precision floating point numbers
 template<class TypeTag>
-struct Scalar<TypeTag, TTag:: DiffusionModel >
+struct Scalar<TypeTag, TTag::DiffusionModel>
 { using type = double; };
 
-//! Set the default primary variable vector to a vector of size of number of equations
-template<class TypeTag>
-struct PrimaryVariables<TypeTag, TTag:: DiffusionModel >
-{
-    using type = Dune::FieldVector<
-        GetPropType<TypeTag, Properties::Scalar>,
-        GetPropType<TypeTag, Properties::ModelTraits>::numEq()
-    >;
-};
-
-//! Set the model traits property
+// The model traits specify some information about our equation system
+// Here we have just one equation. We still specify indices so in the
+// places where we access primary variables, we can do so with a named variable.
 template<class TypeTag>
 struct ModelTraits<TypeTag, TTag::DiffusionModel>
 {
@@ -155,7 +193,20 @@ struct ModelTraits<TypeTag, TTag::DiffusionModel>
     };
 };
 
-//! Set the volume variables property
+// The primary variable vector has entries of type `Scalar` and is
+// as large as the number of equations (here 1) but we keep it general.
+template<class TypeTag>
+struct PrimaryVariables<TypeTag, TTag::DiffusionModel>
+{
+    using type = Dune::FieldVector<
+        GetPropType<TypeTag, Properties::Scalar>,
+        GetPropType<TypeTag, Properties::ModelTraits>::numEq()
+    >;
+};
+
+// The `BasicVolumeVariables` are the simples volume variables
+// They only store one instance of `PrimaryVariables` for the
+// degree of freedom (here: vertex dof) that they are attached to.
 template<class TypeTag>
 struct VolumeVariables<TypeTag, TTag::DiffusionModel>
 {
@@ -168,5 +219,7 @@ struct VolumeVariables<TypeTag, TTag::DiffusionModel>
 };
 
 } // end namespace Dumux::Properties
-
+// [[/content]]
+// [[exclude]]
 #endif
+// [[/exclude]]