feat: Binary wheels for CPU, CUDA (12.1 - 12.3), Metal (#1247)

* Generate binary wheel index on release

* Add total release downloads badge

* Update download label

* Use official cibuildwheel action

* Add workflows to build CUDA and Metal wheels

* Update generate index workflow

* Update workflow name
This commit is contained in:
Andrei 2024-04-03 15:32:13 -04:00 committed by GitHub
parent 8649d7671b
commit 5a930ee9a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 330 additions and 5 deletions

View file

@ -11,7 +11,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macOS-latest]
os: [ubuntu-20.04, windows-2019, macos-11]
steps:
- uses: actions/checkout@v3
@ -23,19 +23,19 @@ jobs:
with:
python-version: "3.8"
- name: Install cibuildwheel
run: python -m pip install cibuildwheel==2.12.1
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .[all]
- name: Build wheels
run: python -m cibuildwheel --output-dir wheelhouse
uses: pypa/cibuildwheel@v2.16.5
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
with:
package-dir: .
output-dir: wheelhouse
- uses: actions/upload-artifact@v3
with:

131
.github/workflows/build-wheels-cuda.yaml vendored Normal file
View file

@ -0,0 +1,131 @@
name: Build Wheels (CUDA)
on: workflow_dispatch
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-20.04', 'windows-latest')
'pyver' = @("3.10", "3.11", "3.12")
'cuda' = @("12.1.1", "12.2.2", "12.3.2")
'releasetag' = @("basic")
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: Setup Mamba
uses: conda-incubator/setup-miniconda@v2.2.0
with:
activate-environment: "build"
python-version: ${{ matrix.pyver }}
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
add-pip-as-python-dependency: true
auto-activate-base: false
- name: VS Integration Cache
id: vs-integration-cache
if: runner.os == 'Windows'
uses: actions/cache@v3.3.2
with:
path: ./MSBuildExtensions
key: cuda-${{ matrix.cuda }}-vs-integration
- name: Get Visual Studio Integration
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
run: |
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
$links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/dc0ca7bb29c5a92f7a963d3d5c93f8d59765136a/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
Remove-Item 'cudainstaller.zip'
- name: Install Visual Studio Integration
if: runner.os == 'Windows'
run: |
$y = (gi '.\MSBuildExtensions').fullname + '\*'
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
- name: Install Dependencies
env:
MAMBA_DOWNLOAD_FAILFAST: "0"
MAMBA_NO_LOW_SPEED_LIMIT: "1"
run: |
$cudaVersion = $env:CUDAVER
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
python -m pip install build wheel
- name: Build Wheel
run: |
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
$env:CUDA_PATH = $env:CONDA_PREFIX
$env:CUDA_HOME = $env:CONDA_PREFIX
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
if ($IsLinux) {
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
if ($env:AVXVER -eq 'AVX') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
}
if ($env:AVXVER -eq 'AVX512') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
}
if ($env:AVXVER -eq 'basic') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
}
python -m build --wheel
# write the build tag to the output
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
- uses: softprops/action-gh-release@v1
with:
files: dist/*
# Set tag_name to <tag>-cu<cuda_version>
tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View file

@ -0,0 +1,87 @@
name: Build Wheels (Metal)
on: workflow_dispatch
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('macos-11', 'macos-12', 'macos-13')
'pyver' = @('3.10', '3.11', '3.12')
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} Python ${{ matrix.pyver }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
env:
OSVER: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: Install Dependencies
run: |
python -m pip install build wheel cmake
- name: Build Wheel
run: |
XCODE15PATH="/Applications/Xcode_15.0.app/Contents/Developer"
XCODE15BINPATH="${XCODE15PATH}/Toolchains/XcodeDefault.xctoolchain/usr/bin"
export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_METAL=on"
[[ "$OSVER" == "macos-13" ]] && export CC="${XCODE15BINPATH}/cc" && export CXX="${XCODE15BINPATH}/c++" && export MACOSX_DEPLOYMENT_TARGET="13.0"
[[ "$OSVER" == "macos-12" ]] && export MACOSX_DEPLOYMENT_TARGET="12.0"
[[ "$OSVER" == "macos-11" ]] && export MACOSX_DEPLOYMENT_TARGET="11.0"
export CMAKE_OSX_ARCHITECTURES="arm64" && export ARCHFLAGS="-arch arm64"
VERBOSE=1 python -m build --wheel
if [[ "$OSVER" == "macos-13" ]]; then
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
export MACOSX_DEPLOYMENT_TARGET="14.0"
VERBOSE=1 python -m build --wheel
fi
for file in ./dist/*.whl; do cp "$file" "${file/arm64.whl/aarch64.whl}"; done
export CMAKE_OSX_ARCHITECTURES="x86_64" && export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_METAL=on" && export ARCHFLAGS="-arch x86_64"
VERBOSE=1 python -m build --wheel
if [[ "$OSVER" == "macos-13" ]]; then
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
export MACOSX_DEPLOYMENT_TARGET="14.0"
VERBOSE=1 python -m build --wheel
fi
- uses: softprops/action-gh-release@v1
with:
files: dist/*
# set release name to <tag>-metal
tag_name: ${{ github.ref_name }}-metal
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View file

@ -0,0 +1,48 @@
name: Wheels Index
on:
# Trigger on any new release
release:
types: [published]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
# Single deploy job since we're just deploying
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v4
- name: Build
run: |
./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
# Upload entire repository
path: 'index'
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View file

@ -6,6 +6,7 @@
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
[![PyPI - License](https://img.shields.io/pypi/l/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
[![Github All Releases](https://img.shields.io/github/downloads/abetlen/llama-cpp-python/total.svg?label=Github%20Downloads)]()
Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
This package provides:

58
scripts/release-to-pep-503.sh Executable file
View file

@ -0,0 +1,58 @@
#!/bin/bash
# Get output directory or default to index/whl/cpu
output_dir=${1:-"index/whl/cpu"}
# Create output directory
mkdir -p $output_dir
# Change to output directory
pushd $output_dir
# Create an index html file
echo "<!DOCTYPE html>" > index.html
echo "<html>" >> index.html
echo " <head></head>" >> index.html
echo " <body>" >> index.html
echo " <a href=\"llama-cpp-python/\">llama-cpp-python</a>" >> index.html
echo " <br>" >> index.html
echo " </body>" >> index.html
echo "</html>" >> index.html
echo "" >> index.html
# Create llama-cpp-python directory
mkdir -p llama-cpp-python
# Change to llama-cpp-python directory
pushd llama-cpp-python
# Create an index html file
echo "<!DOCTYPE html>" > index.html
echo "<html>" >> index.html
echo " <body>" >> index.html
echo " <h1>Links for llama-cpp-python</h1>" >> index.html
# Get all releases
releases=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases | jq -r .[].tag_name)
# Get pattern from second arg or default to valid python package version pattern
pattern=${2:-"^[v]?[0-9]+\.[0-9]+\.[0-9]+$"}
# Filter releases by pattern
releases=$(echo $releases | tr ' ' '\n' | grep -E $pattern)
# For each release, get all assets
for release in $releases; do
assets=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases/tags/$release | jq -r .assets)
echo " <h2>$release</h2>" >> index.html
for asset in $(echo $assets | jq -r .[].browser_download_url); do
if [[ $asset == *".whl" ]]; then
echo " <a href=\"$asset\">$asset</a>" >> index.html
echo " <br>" >> index.html
fi
done
done
echo " </body>" >> index.html
echo "</html>" >> index.html
echo "" >> index.html