From a8ec287fa55d4ebf8c489275ae1ef1389c10d10a Mon Sep 17 00:00:00 2001 From: Ken VanDine Date: Thu, 11 Jun 2026 15:29:38 -0400 Subject: [PATCH 1/4] ci: add sm_121 (GB10) CUDA arch for x86 and arm64 - Add sm_121 to ubuntu-latest-cuda and windows-latest-cuda matrices - Add new ubuntu-arm64-cuda job targeting ubuntu-24.04-arm runner with sm_121 - Use CUDA sbsa (ARM64) package repo for arm64 CUDA Toolkit install - Add ubuntu-arm64-cuda to release job needs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 97 ++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 025b07b75..31bb95eb2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -109,7 +109,7 @@ jobs: matrix: # On PRs only build one representative arch (packaging logic is identical # across all sm_*); build the full matrix on schedule/dispatch. - sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120"]') }} + sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }} steps: - name: Clone @@ -210,7 +210,7 @@ jobs: matrix: # On PRs only build one representative arch (packaging logic is identical # across all sm_*); build the full matrix on schedule/dispatch. - sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120"]') }} + sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }} steps: - name: Clone @@ -291,6 +291,98 @@ jobs: name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + ubuntu-arm64-cuda: + # GB10 (sm_121) ARM64 Linux build + runs-on: ubuntu-24.04-arm + + strategy: + fail-fast: false + matrix: + # sm_121 targets the GB10 (Blackwell) ARM64 device + sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_121"]') || fromJSON('["sm_121"]') }} + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: recursive + fetch-depth: 0 + repository: 'leejet/stable-diffusion.cpp' + ref: master + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-arm64-cuda-${{ matrix.sm }} + evict-old-files: 1d + + - name: Install CUDA Toolkit + run: | + wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-toolkit-12-8 cmake ninja-build patchelf + + - name: Set CUDA environment + run: | + echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV" + echo "/usr/local/cuda/bin" >> "$GITHUB_PATH" + echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV" + + - name: Build + id: cmake_build + run: | + cmake_arch="${{ matrix.sm }}" + cmake_arch="${cmake_arch#sm_}" + cmake -B build -S . \ + -DSD_CUBLAS=ON \ + -DGGML_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DGGML_NATIVE=OFF \ + -DSD_BUILD_SHARED_LIBS=ON \ + -DCMAKE_BUILD_TYPE=Release + cmake --build build --config Release -j $(nproc) + + - name: Bundle CUDA runtime libraries + run: | + cuda_lib=/usr/local/cuda/lib64 + cp -av ${cuda_lib}/libcudart.so* build/bin/ + cp -av ${cuda_lib}/libcublas.so* build/bin/ + cp -av ${cuda_lib}/libcublasLt.so* build/bin/ + cp -av ${cuda_lib}/libcurand.so* build/bin/ + cp -av ${cuda_lib}/libnvJitLink.so* build/bin/ + find build -name 'libggml*.so*' ! -path 'build/bin/*' -exec cp -av {} build/bin/ \; + + - name: Set RPATH for portable distribution + run: | + for f in build/bin/*; do + [ -f "$f" ] && ! [ -L "$f" ] || continue + if file "$f" | grep -q 'ELF'; then + patchelf --set-rpath '$ORIGIN' "$f" + fi + done + + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + run: | + cp ggml/LICENSE ./build/bin/ggml.txt + cp LICENSE ./build/bin/stable-diffusion.cpp.txt + tar -cJf sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz -C ./build/bin . + + - name: Upload artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz + windows-latest-cmake: runs-on: windows-2022 @@ -840,6 +932,7 @@ jobs: - ubuntu-latest-rocm - ubuntu-latest-cmake - ubuntu-latest-cuda + - ubuntu-arm64-cuda - windows-latest-cmake-hip - windows-latest-rocm - windows-latest-cmake From f19563e9e3f1cde9b27e9cfa0908fbda52904657 Mon Sep 17 00:00:00 2001 From: Ken VanDine Date: Thu, 11 Jun 2026 16:15:03 -0400 Subject: [PATCH 2/4] ci: upgrade CUDA to 12.9 for sm_121 support sm_121 (GB10/Blackwell) requires CUDA 12.9. Update ubuntu-latest-cuda, windows-latest-cuda, and ubuntu-arm64-cuda jobs accordingly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31bb95eb2..30cf9ef6d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -138,7 +138,7 @@ jobs: wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get install -y cuda-toolkit-12-8 cmake ninja-build patchelf + sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf - name: Set CUDA environment run: | @@ -225,7 +225,7 @@ jobs: id: cuda-toolkit uses: Jimver/cuda-toolkit@v0.2.22 with: - cuda: '12.8.0' + cuda: '12.9.0' method: 'network' sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]' @@ -321,7 +321,7 @@ jobs: wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get install -y cuda-toolkit-12-8 cmake ninja-build patchelf + sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf - name: Set CUDA environment run: | From 9e101905009ccdf005baae01cdef4bc2e0113941 Mon Sep 17 00:00:00 2001 From: Ken VanDine Date: Thu, 11 Jun 2026 16:45:10 -0400 Subject: [PATCH 3/4] ci: bump Jimver/cuda-toolkit to v0.2.35 with CUDA 12.9.1 v0.2.22 does not include CUDA 12.9.x; v0.2.24 was the first release to add it. Bump to latest v0.2.35 and use 12.9.1 (latest 12.9 patch). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 30cf9ef6d..7c27701ec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -223,9 +223,9 @@ jobs: - name: Install CUDA Toolkit id: cuda-toolkit - uses: Jimver/cuda-toolkit@v0.2.22 + uses: Jimver/cuda-toolkit@v0.2.35 with: - cuda: '12.9.0' + cuda: '12.9.1' method: 'network' sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]' From 8cd9abef0f988756d3a9c0d58644a160dac1a462 Mon Sep 17 00:00:00 2001 From: Ken VanDine Date: Thu, 11 Jun 2026 17:16:58 -0400 Subject: [PATCH 4/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7c27701ec..c159f8a8a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -299,7 +299,7 @@ jobs: fail-fast: false matrix: # sm_121 targets the GB10 (Blackwell) ARM64 device - sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_121"]') || fromJSON('["sm_121"]') }} + sm: ["sm_121"] steps: - name: Clone