Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/packaging/pre_build_nightly.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
set -uo pipefail # TODO revert back to set -euo

# Script runs relative to forge root
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
FORGE_ROOT="$(cd "$CURRENT_DIR/../.." && pwd)"
VERSIONS_FILE="$FORGE_ROOT/assets/versions.sh"
source "$VERSIONS_FILE"

echo "Installing nightly dependencies for forge build"
echo "PyTorch nightly already installed by test-infra via channel: nightly"

# 1. Verify PyTorch nightly is installed
python -c "import torch; print(f'PyTorch version: {torch.__version__}')"

# 2. Install torchtitan nightly
echo "Installing torchtitan nightly..."
pip install --pre torchtitan \
--extra-index-url https://download.pytorch.org/whl/nightly/cu128

# 3. Install torchmonarch-nightly
echo "Installing torchmonarch-nightly..."
pip install torchmonarch-nightly

# 4. Install torchstore from main branch WITHOUT dependencies
# Following monarch_forge.sh:580-588 pattern
echo "Installing torchstore dependencies..."
pip install pygtrie

echo "Installing torchstore from main branch..."
TORCHSTORE_DIR="/tmp/torchstore-build"
mkdir -p "$TORCHSTORE_DIR"
cd "$TORCHSTORE_DIR"
git clone https://github.com/meta-pytorch/torchstore.git
echo "DEBUG: git clone succeeded"

cd torchstore
echo "DEBUG: Changed into torchstore subdirectory"
git checkout main
pip install --no-deps .
echo "DEBUG: torchstore installation succeeded"
cd -

# 5. Build vLLM from source (following internal pt2.sh:561-578 pattern)
# Note: Cannot use pip install vllm==0.10.0 because PyPI version requires torch==2.7.0
# vLLM has C++/CUDA extensions that must compile against our PyTorch nightly
echo "Building vLLM ${VLLM_VERSION} from source against PyTorch nightly..."
BUILD_DIR="/tmp/vllm-build"
mkdir -p "$BUILD_DIR"
cd "$BUILD_DIR"

git clone https://github.com/vllm-project/vllm.git --branch "$VLLM_VERSION"
cd vllm

# Use existing torch (PyTorch nightly already installed)
# This script patches vLLM's setup.py to use the installed PyTorch instead of downloading
python use_existing_torch.py
pip install -r requirements/build.txt

# Clean up existing builds if needed
rm -rf build/ *.egg-info/

# Build and install vLLM (compiles C++/CUDA extensions against installed PyTorch)
pip install --no-build-isolation .

cd -

# 6. Set nightly version in __init__.py
echo "Setting nightly version..."
NIGHTLY_VERSION="${BUILD_VERSION:-$(date +%Y.%m.%d)}"
sed -i "s/__version__ = \".*\"/__version__ = \"${NIGHTLY_VERSION}\"/" "$FORGE_ROOT/src/forge/__init__.py"

echo "Nightly dependency installation complete!"
echo "Installed package versions:"
pip list | grep -E "torch|torchtitan|vllm|monarch"
53 changes: 53 additions & 0 deletions .github/workflows/nightly.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Build and upload forge nightly

on:
schedule:
- cron: '0 0 * * *' # Daily at midnight UTC
push:
branches:
- nightly
- add-nightly-build # TODO: remove this line after PR is accepted. This is for testing.
workflow_dispatch:

permissions:
id-token: write
contents: read

jobs:
build:
name: forge-cu128-nightly
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
strategy:
fail-fast: false
with:
repository: meta-pytorch/forge
# TODO: reset to "" after PR is accepted. This is to make sure during testing, the workflow
# checkout the current branch.
ref: ${{ github.sha }}
test-infra-repository: pytorch/test-infra
test-infra-ref: main
run-smoke-test: false
wheel-nightly-policy: gha_workflow_nightly_build_wheels
wheel-upload-path: whl/nightly/forge/
package-name: forge
build-matrix: |
{
"include": [
{
"python_version": "3.12",
"gpu_arch_type": "cpu",
"gpu_arch_version": "12.8",
"desired_cuda": "cu128",
"container_image": "pytorch/manylinux2_28-builder:cuda12.8",
"package_type": "manywheel",
"build_name": "manywheel-py3_10-cuda12_8",
"validation_runner": "linux.12xlarge.memory",
"upload_to_base_bucket": "no",
"use_split_build": false
}
]
}
pre-script: .github/packaging/pre_build_nightly.sh
post-script: .github/packaging/post_build_script.sh
trigger-event: ${{ github.event_name }}
build-platform: 'python-build-package'
20 changes: 17 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,25 @@ Optional: By default, the packages installation uses conda. If you want to insta

> **Note:** We are actively working on enabling pure `uv` installation. Currently, Conda is the recommended approach. `uv` support is not fully working at the moment but is being tracked in [issue #494](https://github.com/meta-pytorch/torchforge/issues/494).

After install, you can run the following command and should see output confirming GRPO training is running (you need a minimum 3 GPU devices):
### Nightly Installation

To install the latest nightly build of torchforge with the newest features and fixes:

```bash
# Install PyTorch nightly
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128

# Install torchforge nightly
pip install forge --index-url https://download.pytorch.org/whl/nightly/forge/
```
python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
```

Nightly builds are updated daily and include the latest features and fixes from the main branch.
Here is the list of key dependencies:
1. torchmonarch: download torchmonarch-nightly from PyPI.
2. vllm: build v0.10.0 from source with torch nightly. TODO: additional refactoring to upgrade to use vLLM nightly.
3. torchtitan: download the nightly build
4. torchstore: install from the github main branch since torchstore does not have a nightly build yet.


## Quick Start

Expand Down
Loading