Skip to content

docs: add wayfinder readme.md files for each docs directory (#1617) #3874

docs: add wayfinder readme.md files for each docs directory (#1617)

docs: add wayfinder readme.md files for each docs directory (#1617) #3874

Workflow file for this run

# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This workflow verifies that the basic install works across all supported platforms.
# For basic install, all imports need to either be successful or appropriately guarded.
name: Installation Test
on:
push:
branches:
- main
- "pull-request/[0-9]+"
- "deploy-release/*"
# Allow MCore to trigger this workflow remotely for compatibility testing
workflow_dispatch:
inputs:
mcore_commit:
description: 'MCore commit SHA to test against'
required: false
type: string
mcore_branch:
description: 'MCore branch name (for reference)'
required: false
type: string
mcore_repo:
description: 'MCore repository URL (for fetching from forks)'
required: false
type: string
default: 'https://github.com/NVIDIA/Megatron-LM.git'
test_suite:
description: 'Test suite to run'
required: false
type: string
default: 'all'
triggered_by:
description: 'Trigger source (for tracking)'
required: false
type: string
default: 'manual'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
pre-flight:
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
# pip-test-pytorch:
# needs: [pre-flight]
# if: |
# !(needs.pre-flight.outputs.docs_only == 'true'
# || needs.pre-flight.outputs.is_deployment_workflow == 'true')
# runs-on: self-hosted-nemo
# name: Pip - Python${{ matrix.python-version }}${{ matrix.extra-groups != '' && format('[{0}]', matrix.extra-groups) || '' }} - AMD64/Linux - NGC PyTorch
# container:
# image: nvcr.io/nvidia/pytorch:25.05-py3
# environment: nemo-ci
# strategy:
# fail-fast: false
# matrix:
# python-version: ["3.12"]
# extra-groups: ["", "recipes"]
# env:
# EXTRA: ${{ matrix.extra-groups != '' && format('[{0}]', matrix.extra-groups) || '' }}
# steps:
# - name: Install git
# shell: bash -x -e -u -o pipefail {0}
# run: |
# apt-get update
# apt-get install -y git
# - name: Checkout repository
# uses: actions/checkout@v4
# with:
# submodules: recursive
# - name: Set PATH
# run: |
# echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
# echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
# echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
# echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
# echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
# echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"
# - name: Install megatron-bridge${{ matrix.extra-groups != '' && format('[{0}]', matrix.extra-groups) || '' }}
# shell: bash -x -e -u -o pipefail {0}
# run: bash docker/common/install.sh --base-image pytorch --python-version ${{ matrix.python-version }}
# - name: Checkout check-imports
# uses: actions/checkout@v4
# with:
# repository: NVIDIA-NeMo/FW-CI-templates
# ref: v0.39.0
# path: FW-CI-templates
# - name: Check imports for megatron-bridge
# uses: ./FW-CI-templates/.github/actions/check-imports
# with:
# package-name: megatron.bridge
# python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python
uv-test-pytorch:
needs: [pre-flight]
if: |
!(needs.pre-flight.outputs.docs_only == 'true'
|| needs.pre-flight.outputs.is_deployment_workflow == 'true')
runs-on: self-hosted-nemo
name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
container:
image: nvcr.io/nvidia/pytorch:25.05-py3
environment: nemo-ci
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
steps:
- name: Install git
shell: bash -x -e -u -o pipefail {0}
run: |
apt-get update
apt-get install -y git
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Update MCore submodule (if triggered from MCore)
if: ${{ github.event.inputs.mcore_commit != '' }}
run: |
echo "🔄 Updating MCore submodule to commit: ${{ github.event.inputs.mcore_commit }}"
echo "📌 MCore branch: ${{ github.event.inputs.mcore_branch || 'unknown' }}"
echo "📍 MCore repo: ${{ github.event.inputs.mcore_repo || 'https://github.com/NVIDIA/Megatron-LM.git' }}"
echo "🎯 Triggered by: ${{ github.event.inputs.triggered_by }}"
cd 3rdparty/Megatron-LM
git fetch ${{ github.event.inputs.mcore_repo || 'origin' }} ${{ github.event.inputs.mcore_commit }}
git checkout ${{ github.event.inputs.mcore_commit }}
# Verify the checkout was successful
ACTUAL_COMMIT=$(git rev-parse HEAD)
EXPECTED_COMMIT="${{ github.event.inputs.mcore_commit }}"
echo "✅ MCore submodule updated successfully"
echo "Expected: ${EXPECTED_COMMIT}"
echo "Actual: ${ACTUAL_COMMIT}"
if [ "${ACTUAL_COMMIT}" != "${EXPECTED_COMMIT}" ]; then
echo "❌ ERROR: MCore commit mismatch!"
exit 1
fi
git log -1 --oneline
- name: Set environment for MCore testing
if: ${{ github.event.inputs.mcore_commit != '' }}
run: |
echo "MCORE_TRIGGERED_TESTING=true" | tee -a "$GITHUB_ENV"
echo "⚙️ MCore testing mode: skipping --locked flag because lockfile was generated with different MCore version"
- name: Set PATH
run: |
echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
echo "VIRTUAL_ENV=/opt/venv" | tee -a "$GITHUB_ENV"
echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
echo "CUDACXX=/usr/local/cuda/bin/nvcc" | tee -a "$GITHUB_ENV"
echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"
- name: Verify MCore version before install
if: ${{ github.event.inputs.mcore_commit != '' }}
run: |
cd 3rdparty/Megatron-LM
ACTUAL_COMMIT=$(git rev-parse HEAD)
EXPECTED_COMMIT="${{ github.event.inputs.mcore_commit }}"
echo "🧪 INSTALL TEST - MCore verification:"
echo "Expected: ${EXPECTED_COMMIT}"
echo "Actual: ${ACTUAL_COMMIT}"
if [ "${ACTUAL_COMMIT}" != "${EXPECTED_COMMIT}" ]; then
echo "❌ ERROR: MCore commit mismatch!"
exit 1
fi
echo "✅ MCore commit verified"
git log -1 --oneline
- name: Install project
shell: bash
run: bash docker/common/install.sh --base-image pytorch --use-uv
install-test-summary:
needs: [pre-flight, uv-test-pytorch]
runs-on: ubuntu-latest
name: Install test summary
if: |
(
needs.pre-flight.outputs.docs_only == 'true'
|| needs.pre-flight.outputs.is_deployment_workflow == 'true'
|| always()
)
&& !cancelled()
steps:
- name: Get workflow result
id: result
shell: bash -x -e -u -o pipefail {0}
env:
GH_TOKEN: ${{ github.token }}
RUN_ID: ${{ github.run_id }}
SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }}
run: |
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
echo "✅ All previous jobs completed successfully"
exit 0
else
echo "❌ Found $FAILED_JOBS failed job(s)"
# Show which jobs failed
gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
exit 1
fi