From 2bb26cfc486ce6aab620d4ca0b46bf8f41c56c25 Mon Sep 17 00:00:00 2001
From: Ashwin Krishna Kumar <nebulousmagneticwind@outlook.com>
Date: Thu, 25 Jun 2026 16:39:16 +0530
Subject: [PATCH 1/2] Allow bench workflow to run on PRs from forks

---
 .github/workflows/checklist_comment_on_new_pr.yml | 3 ++-
 .github/workflows/run-bench.yml                   | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/checklist_comment_on_new_pr.yml b/.github/workflows/checklist_comment_on_new_pr.yml
index fda81de21..cb287ef05 100644
--- a/.github/workflows/checklist_comment_on_new_pr.yml
+++ b/.github/workflows/checklist_comment_on_new_pr.yml
@@ -1,6 +1,7 @@
 name: Comment on new Pull Request with checklist
 on:
-  pull_request:
+  # safe as long as this workflow doesn't access code from the PR branch
+  pull_request_target:
     types: opened
 
 jobs:
diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml
index 1b9533cad..26a0eb8d3 100644
--- a/.github/workflows/run-bench.yml
+++ b/.github/workflows/run-bench.yml
@@ -24,6 +24,7 @@ on:
       - '**/src/main/java/**'
       - 'pom.xml'
       - '**/pom.xml'
+      - '.github/workflows/run-bench.yml'
 
 jobs:
   # Job to generate the matrix configuration
@@ -41,8 +42,8 @@ jobs:
 
           # Default branches based on event type
           if [[ "${{ github.event_name }}" == "pull_request" ]]; then
-            echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}"
-            BRANCHES='["main", "${{ github.head_ref }}"]'
+            echo "Pull request detected. Using main and PR ref: ${{ github.ref }}"
+            BRANCHES='["main", "${{ github.ref }}"]'
           elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
             # Parse space-separated branches input into JSON array
             echo "Workflow dispatch with branches input detected"
@@ -213,7 +214,7 @@ jobs:
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
               -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-gemma-1m
+              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m
           else
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \

From f71704e2d8d4394e4efd73c42ce2a7c49a087952 Mon Sep 17 00:00:00 2001
From: Ashwin Krishna Kumar <nebulousmagneticwind@outlook.com>
Date: Thu, 25 Jun 2026 17:35:13 +0530
Subject: [PATCH 2/2] Allow AutoBenchYAML to run non-regression datasets on
 demand

---
 .github/workflows/run-bench.yml               |  3 ++-
 .../jvector/example/AutoBenchYAML.java        | 20 +++++++++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml
index 26a0eb8d3..8317f43c6 100644
--- a/.github/workflows/run-bench.yml
+++ b/.github/workflows/run-bench.yml
@@ -214,7 +214,8 @@ jobs:
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
               -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \
-              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m
+              -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML \
+              --match-all-datasets --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m
           else
             java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \
               ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \
diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java
index 3f5be3d7d..903121fda 100644
--- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java
+++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java
@@ -76,17 +76,20 @@ public static void main(String[] args) throws IOException {
         String finalOutputPath = outputPath;
         String configPath = null;
         int diagnostic_level = 0;
-        for (int i = 0; i < args.length - 1; i++) {
-            if (args[i].equals("--config")) configPath = args[i+1];
-            if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
+        boolean matchAllDatasets = false;
+        for (int i = 0; i < args.length; i++) {
+            if (i < args.length - 1 && args[i].equals("--config")) configPath = args[i+1];
+            if (i < args.length - 1 && args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]);
+            if (args[i].equals("--match-all-datasets")) matchAllDatasets = true;
         }
         if (diagnostic_level > 0) {
             Grid.setDiagnosticLevel(diagnostic_level);
         }
         String finalConfigPath = configPath;
         String[] filteredArgs = Arrays.stream(args)
-                .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) && 
-                               !arg.equals("--config") && !arg.equals(finalConfigPath))
+                .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) &&
+                               !arg.equals("--config") && !arg.equals(finalConfigPath) &&
+                               !arg.equals("--match-all-datasets"))
                 .toArray(String[]::new);
 
         // Log the filtered arguments for debugging
@@ -100,7 +103,12 @@ public static void main(String[] args) throws IOException {
         var pattern = Pattern.compile(regex);
 
         var datasetCollection = DatasetCollection.load();
-        var datasetNames = datasetCollection.getSection(REGRESSION_TEST_KEY).stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList());
+        var candidateDatasets = matchAllDatasets ? datasetCollection.getAll() : datasetCollection.getSection(REGRESSION_TEST_KEY);
+        var datasetNames = candidateDatasets.stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList());
+
+        if (datasetNames.size() == 0) {
+            throw new RuntimeException("No datasets matched the given patterns, nothing to do");
+        }
 
         logger.info("Executing the following datasets: {}", datasetNames);
         List<BenchResult> results = new ArrayList<>();