diff --git a/changelog-entries/849.md b/changelog-entries/849.md
new file mode 100644
index 000000000..67f9fbbc8
--- /dev/null
+++ b/changelog-entries/849.md
@@ -0,0 +1 @@
+- Added optional `run-before` and `run-after` hooks in `tests.yaml` for system test setup ([#849](https://github.com/precice/tutorials/pull/849)).
diff --git a/tools/tests/README.md b/tools/tests/README.md
index 5349e800a..d94d9529c 100644
--- a/tools/tests/README.md
+++ b/tools/tests/README.md
@@ -125,6 +125,12 @@ The available cases are listed in the `metadata.yaml` of each tutorial. To add a
 
 Use the `max_time` or `max_time_windows` parameters to restrict the runtime of the test to the first few coupling time windows, to save time. Aim for a runtime of less than a minute (assuming cached components), if possible.
 
+Some tutorials require setup before the simulation (e.g. switching configuration files). Use optional `run-before` and `run-after` fields in `tests.yaml` to run shell commands in the copied tutorial directory after copying and before Docker build (`run-before`), or after the simulation and before field comparison (`run-after`). Example:
+
+```yaml
+run-before: ./set-case.sh 1d3d
+```
+
 You will need to define a reference results file. The reference results can and should be generated on GitHub using the [Generate reference results (manual)](https://github.com/precice/tutorials/actions/workflows/generate-reference-results-manual.yml) workflow for the respective test suite. You might want to temporarily set the `selected` test suite for requesting results only for a subset of test cases.
 
 Note that you will need to define the `TUTORIALS_REF` in the file [`reference_versions.yaml`](https://github.com/precice/tutorials/actions/workflows/generate-reference-results-manual.yml) to match the respective branch. Restore that to `develop` after that. See a [related issue](https://github.com/precice/tutorials/issues/844).
diff --git a/tools/tests/generate_reference_results.py b/tools/tests/generate_reference_results.py
index 108d47726..b52486b75 100644
--- a/tools/tests/generate_reference_results.py
+++ b/tools/tests/generate_reference_results.py
@@ -172,13 +172,20 @@ def main():
             max_times = test_suite.max_times.get(tutorial, [])
             mtw_list = test_suite.max_time_windows.get(tutorial, [])
             timeouts = test_suite.timeouts.get(tutorial, [])
+            run_befores = test_suite.run_befores.get(tutorial, [])
+            run_afters = test_suite.run_afters.get(tutorial, [])
             for i, (case, reference_result) in enumerate(zip(
                     test_suite.cases_of_tutorial[tutorial], test_suite.reference_results[tutorial])):
                 max_time = max_times[i] if i < len(max_times) else None
                 max_time_windows = mtw_list[i] if i < len(mtw_list) else None
                 timeout = timeouts[i] if i < len(timeouts) and timeouts[i] is not None else GLOBAL_TIMEOUT
+                run_before = run_befores[i] if i < len(run_befores) else None
+                run_after = run_afters[i] if i < len(run_afters) else None
                 systemtests_to_run.add(
-                    Systemtest(tutorial, build_args, case, reference_result, max_time=max_time, max_time_windows=max_time_windows, timeout=timeout))
+                    Systemtest(
+                        tutorial, build_args, case, reference_result,
+                        max_time=max_time, max_time_windows=max_time_windows, timeout=timeout,
+                        run_before=run_before, run_after=run_after))
 
     reference_result_per_tutorial = {}
     current_time_string = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
diff --git a/tools/tests/systemtests.py b/tools/tests/systemtests.py
index 61c8273bc..296b552d0 100644
--- a/tools/tests/systemtests.py
+++ b/tools/tests/systemtests.py
@@ -91,6 +91,8 @@ def _group_end() -> None:
                 timeouts = test_suite.timeouts.get(tutorial, [])
                 tolerances = test_suite.tolerances.get(tutorial, [])
                 skip_compares = test_suite.skip_compares.get(tutorial, [])
+                run_befores = test_suite.run_befores.get(tutorial, [])
+                run_afters = test_suite.run_afters.get(tutorial, [])
                 for i, (case, reference_result) in enumerate(zip(
                         test_suite.cases_of_tutorial[tutorial], test_suite.reference_results[tutorial])):
                     max_time = max_times[i] if i < len(max_times) else None
@@ -100,8 +102,14 @@ def _group_end() -> None:
                         tolerances) and tolerances[i] is not None else DEFAULT_FIELDCOMPARE_RTOL
                     skip_compare = skip_compares[i] if i < len(
                         skip_compares) and skip_compares[i] is not None else False
+                    run_before = run_befores[i] if i < len(run_befores) else None
+                    run_after = run_afters[i] if i < len(run_afters) else None
                     systemtests_to_run.append(
-                        Systemtest(tutorial, build_args, case, reference_result, max_time=max_time, max_time_windows=max_time_windows, timeout=timeout, tolerance=tolerance, skip_compare=skip_compare))
+                        Systemtest(
+                            tutorial, build_args, case, reference_result,
+                            max_time=max_time, max_time_windows=max_time_windows, timeout=timeout,
+                            tolerance=tolerance, skip_compare=skip_compare,
+                            run_before=run_before, run_after=run_after))
 
     if not systemtests_to_run:
         raise RuntimeError("Did not find any Systemtests to execute.")
diff --git a/tools/tests/systemtests/Systemtest.py b/tools/tests/systemtests/Systemtest.py
index d65260553..a0dc0da18 100644
--- a/tools/tests/systemtests/Systemtest.py
+++ b/tools/tests/systemtests/Systemtest.py
@@ -226,6 +226,8 @@ class Systemtest:
     timeout: int = GLOBAL_TIMEOUT
     tolerance: float = DEFAULT_FIELDCOMPARE_RTOL
     skip_compare: bool = False
+    run_before: str | None = None
+    run_after: str | None = None
     params_to_use: Dict[str, str] = field(init=False)
     env: Dict[str, str] = field(init=False)
 
@@ -945,11 +947,41 @@ def __apply_max_time_override(self):
                 logging.info(f"Overwrote <max-time-windows> to {self.max_time_windows} in {config_path}")
         config_path.write_text(new_text)
 
+    def _run_hook(self, stage: str, command: str | None) -> bool:
+        """
+        Run a shell command in the copied tutorial directory (e.g. run-before / run-after).
+        """
+        if not command:
+            return True
+        logging.info(f"Running {stage} for {self}: {command}")
+        try:
+            result = subprocess.run(
+                command,
+                shell=True,
+                cwd=self.system_test_dir,
+                capture_output=True,
+                text=True,
+                start_new_session=True,
+            )
+        except Exception as e:
+            logging.critical(f"Failed to start {stage} for {self}: {e}")
+            return False
+        hook_output = (result.stdout or '') + (result.stderr or '')
+        if hook_output.strip():
+            logging.debug(f"{stage} output for {self}:\n{hook_output.rstrip()}")
+        if result.returncode != 0:
+            logging.critical(
+                f"{stage} for {self} failed with exit code {result.returncode}: {command}")
+            return False
+        return True
+
     def __prepare_for_run(self, run_directory: Path):
         """
         Prepares the run_directory with folders and datastructures needed for every systemtest execution
         """
         self.__copy_tutorial_into_directory(run_directory)
+        if not self._run_hook('run-before', self.run_before):
+            raise RuntimeError(f"run-before hook failed for {self}")
         self.__apply_max_time_override()
         self.__copy_tools(run_directory)
         self.__put_gitignore(run_directory)
@@ -961,7 +993,12 @@ def run(self, run_directory: Path):
         """
         Runs the system test by generating the Docker Compose file, copying everything into a run folder, and executing docker-compose up.
         """
-        self.__prepare_for_run(run_directory)
+        try:
+            self.__prepare_for_run(run_directory)
+        except RuntimeError as e:
+            logging.critical(str(e))
+            return SystemtestResult(False, [], [str(e)], self, build_time=0, solver_time=0, fieldcompare_time=0)
+
         self.__init_run_logs()
         std_out: List[str] = []
         std_err: List[str] = []
@@ -995,6 +1032,17 @@ def run(self, run_directory: Path):
                 solver_time=docker_run_result.runtime,
                 fieldcompare_time=0)
 
+        if not self._run_hook('run-after', self.run_after):
+            logging.critical(f"run-after hook failed for {self}")
+            return SystemtestResult(
+                False,
+                std_out,
+                std_err,
+                self,
+                build_time=docker_build_result.runtime,
+                solver_time=docker_run_result.runtime,
+                fieldcompare_time=0)
+
         if self.skip_compare:
             logging.info(f"Skipping fieldcompare for {self} (skip_compare=true)")
             fieldcompare_time = 0.0
@@ -1027,7 +1075,7 @@ def run(self, run_directory: Path):
                 self,
                 build_time=docker_build_result.runtime,
                 solver_time=docker_run_result.runtime,
-                fieldcompare_time=fieldcompare_result.runtime)
+                fieldcompare_time=fieldcompare_time)
 
         # self.__cleanup()
         self._cleanup_docker_networks()
@@ -1044,7 +1092,12 @@ def run_for_reference_results(self, run_directory: Path):
         """
         Runs the system test by generating the Docker Compose files to generate the reference results
         """
-        self.__prepare_for_run(run_directory)
+        try:
+            self.__prepare_for_run(run_directory)
+        except RuntimeError as e:
+            logging.critical(str(e))
+            return SystemtestResult(False, [], [str(e)], self, build_time=0, solver_time=0, fieldcompare_time=0)
+
         self.__init_run_logs()
         std_out: List[str] = []
         std_err: List[str] = []
@@ -1077,6 +1130,17 @@ def run_for_reference_results(self, run_directory: Path):
                 solver_time=docker_run_result.runtime,
                 fieldcompare_time=0)
 
+        if not self._run_hook('run-after', self.run_after):
+            logging.critical(f"run-after hook failed for {self}")
+            return SystemtestResult(
+                False,
+                std_out,
+                std_err,
+                self,
+                build_time=docker_build_result.runtime,
+                solver_time=docker_run_result.runtime,
+                fieldcompare_time=0)
+
         self._cleanup_docker_networks()
         return SystemtestResult(
             True,
@@ -1091,7 +1155,12 @@ def run_only_build(self, run_directory: Path):
         """
         Runs only the build commmand, for example to preheat the caches of the docker builder.
         """
-        self.__prepare_for_run(run_directory)
+        try:
+            self.__prepare_for_run(run_directory)
+        except RuntimeError as e:
+            logging.critical(str(e))
+            return SystemtestResult(False, [], [str(e)], self, build_time=0, solver_time=0, fieldcompare_time=0)
+
         self.__init_run_logs()
         std_out: List[str] = []
         std_err: List[str] = []
diff --git a/tools/tests/systemtests/TestSuite.py b/tools/tests/systemtests/TestSuite.py
index a561e9ddf..e963a2f52 100644
--- a/tools/tests/systemtests/TestSuite.py
+++ b/tools/tests/systemtests/TestSuite.py
@@ -15,6 +15,8 @@ class TestSuite:
     timeouts: Dict[Tutorial, List] = field(default_factory=dict)
     tolerances: Dict[Tutorial, list] = field(default_factory=dict)
     skip_compares: Dict[Tutorial, list] = field(default_factory=dict)
+    run_befores: Dict[Tutorial, List] = field(default_factory=dict)
+    run_afters: Dict[Tutorial, List] = field(default_factory=dict)
 
     def __repr__(self) -> str:
         return_string = f"Test suite: {self.name} contains:"
@@ -58,6 +60,8 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials):
                 timeouts_of_tutorial = {}
                 tolerances_of_tutorial = {}
                 skip_compares_of_tutorial = {}
+                run_befores_of_tutorial = {}
+                run_afters_of_tutorial = {}
                 # iterate over tutorials:
                 for tutorial_case in test_suites_raw[test_suite_name]['tutorials']:
                     tutorial = parsed_tutorials.get_by_path(tutorial_case['path'])
@@ -72,6 +76,8 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials):
                         timeouts_of_tutorial[tutorial] = []
                         tolerances_of_tutorial[tutorial] = []
                         skip_compares_of_tutorial[tutorial] = []
+                        run_befores_of_tutorial[tutorial] = []
+                        run_afters_of_tutorial[tutorial] = []
 
                     all_case_combinations = tutorial.case_combinations
                     case_combination_requested = CaseCombination.from_string_list(
@@ -119,12 +125,31 @@ def from_yaml(cls, path, parsed_tutorials: Tutorials):
                                 f"in tutorial '{tutorial}'."
                             )
                         skip_compares_of_tutorial[tutorial].append(skip_compare_value)
+
+                        run_before_raw = tutorial_case.get('run-before', None)
+                        run_after_raw = tutorial_case.get('run-after', None)
+                        run_befores_of_tutorial[tutorial].append(
+                            run_before_raw.strip()
+                            if isinstance(run_before_raw, str) and run_before_raw.strip() else None)
+                        run_afters_of_tutorial[tutorial].append(
+                            run_after_raw.strip()
+                            if isinstance(run_after_raw, str) and run_after_raw.strip() else None)
                     else:
                         raise Exception(
                             f"Could not find the case combination {tutorial_case['case_combination']} in the current metadata of tutorial {tutorial.name}, or it does not define all necessary participants.")
 
-                testsuites.append(TestSuite(test_suite_name, case_combinations_of_tutorial,
-                                            reference_results_of_tutorial, max_times_of_tutorial, max_time_windows_of_tutorial, timeouts_of_tutorial, tolerances_of_tutorial, skip_compares_of_tutorial))
+                testsuites.append(TestSuite(
+                    test_suite_name,
+                    case_combinations_of_tutorial,
+                    reference_results_of_tutorial,
+                    max_times_of_tutorial,
+                    max_time_windows_of_tutorial,
+                    timeouts_of_tutorial,
+                    tolerances_of_tutorial,
+                    skip_compares_of_tutorial,
+                    run_befores_of_tutorial,
+                    run_afters_of_tutorial,
+                ))
 
         return cls(testsuites)
 
diff --git a/tools/tests/tests.yaml b/tools/tests/tests.yaml
index 4901ecfa0..d7a69a98f 100644
--- a/tools/tests/tests.yaml
+++ b/tools/tests/tests.yaml
@@ -396,6 +396,7 @@ test_suites:
         case_combination:
           - fluid1d-left-nutils
           - fluid3d-right-openfoam
+        run-before: ./set-case.sh 1d3d
         max_time: 0.05
         reference_result: ./partitioned-pipe-multiscale/reference-results/fluid1d-left-nutils_fluid3d-right-openfoam.tar.gz
       # More case combinations are possible, but they requite calling set-case.sh
@@ -549,6 +550,7 @@ test_suites:
         case_combination:
           - fluid1d-left-nutils
           - fluid3d-right-openfoam
+        run-before: ./set-case.sh 1d3d
         max_time: 0.05
         reference_result: ./water-hammer/reference-results/fluid1d-left-nutils_fluid3d-right-openfoam.tar.gz
       # More case combinations are possible, but they requite calling set-case.sh