microsoft/TypeAgent
Publicmirrored from https://github.com/microsoft/TypeAgentAvailable
.github/workflows/docs-generate.yml
377lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | # Daily regeneration of package-level README.AUTOGEN.md companion files |
| 5 | # under ts/packages/**. |
| 6 | # |
| 7 | # Detects packages whose source has changed since the last successful |
| 8 | # scheduled run (tracked by the `docs-bot/last-run` lightweight tag), |
| 9 | # rebuilds each affected package's README.AUTOGEN.md (a parallel file |
| 10 | # alongside the hand-written README.md, never touching the latter), |
| 11 | # validates every link on disk, and opens a single batched PR. Prior |
| 12 | # open bot PRs are closed with --delete-branch so only one is ever |
| 13 | # live at a time. |
| 14 | # |
| 15 | # AI authoring of the documentation body uses Azure OpenAI via |
| 16 | # packages/aiclient. The deterministic Reference appendix (entry |
| 17 | # points, dependencies, files of interest, agent surface, action list) |
| 18 | # is computed entirely from package.json + src/ + the workspace graph. |
| 19 | # |
| 20 | # Required repository configuration: |
| 21 | # variables: |
| 22 | # DOCS_BOT_APP_ID GitHub App that opens the PR |
| 23 | # AZURE_CLIENT_ID Entra App registration (or User-Assigned MI) trusted by the |
| 24 | # federated credential. The OIDC subject must match |
| 25 | # `repo:<org>/<repo>:ref:refs/heads/main` (or whatever ref/env |
| 26 | # you registered when creating the FIC). |
| 27 | # AZURE_TENANT_ID Directory (tenant) ID for the same App registration. |
| 28 | # AZURE_SUBSCRIPTION_ID Subscription containing the Azure OpenAI resource. |
| 29 | # secrets: |
| 30 | # DOCS_BOT_APP_PRIVATE_KEY Private key for the GitHub App |
| 31 | # AZURE_OPENAI_ENDPOINT Azure OpenAI endpoint URL |
| 32 | # |
| 33 | # Auth model: GitHub Actions exchanges its OIDC token for an Azure AD bearer |
| 34 | # token via `azure/login@v2`. `aiclient` honors the literal value |
| 35 | # `AZURE_OPENAI_API_KEY=identity` as a sentinel that switches it to |
| 36 | # DefaultAzureCredential, which then picks up the AZURE_* env vars |
| 37 | # `azure/login` exports. No long-lived API key is stored in this repo. |
| 38 | # |
| 39 | # The workflow is read-only and emits no PR until those are provisioned; |
| 40 | # it simply prints what it would have done. |
| 41 | |
| 42 | name: docs-generate |
| 43 | |
| 44 | on: |
| 45 | schedule: |
| 46 | # Daily at 08:00 UTC = 01:00 PST. Low-traffic window so churn from |
| 47 | # this PR does not collide with other automated PRs. |
| 48 | - cron: "0 8 * * *" |
| 49 | workflow_dispatch: |
| 50 | inputs: |
| 51 | dry-run: |
| 52 | description: "Dry run — analyse and render only, don't write or open PR" |
| 53 | type: boolean |
| 54 | default: false |
| 55 | packages: |
| 56 | description: "Comma-separated package names to regenerate (overrides change detection)" |
| 57 | type: string |
| 58 | default: "" |
| 59 | since: |
| 60 | description: "Override the watermark — git ref to diff against (e.g. main, HEAD~10)" |
| 61 | type: string |
| 62 | default: "" |
| 63 | llm: |
| 64 | description: "Use Azure OpenAI to author the documentation sections (placeholder-only when off)" |
| 65 | type: boolean |
| 66 | default: true |
| 67 | max-packages: |
| 68 | description: "Per-run cap on packages touched (defaults to 25)" |
| 69 | type: string |
| 70 | default: "25" |
| 71 | |
| 72 | # Never cancel an in-flight docs-autogen run; let it finish and let the |
| 73 | # next scheduled run supersede the resulting PR if necessary. |
| 74 | concurrency: |
| 75 | group: ${{ github.workflow }} |
| 76 | cancel-in-progress: false |
| 77 | |
| 78 | permissions: |
| 79 | contents: write |
| 80 | pull-requests: write |
| 81 | # Required so `azure/login@v2` can request a short-lived OIDC JWT |
| 82 | # for federated credential exchange. The token never leaves the |
| 83 | # runner; Entra exchanges it for an Azure AD bearer token bound to |
| 84 | # the App registration named by AZURE_CLIENT_ID. |
| 85 | id-token: write |
| 86 | |
| 87 | jobs: |
| 88 | regenerate: |
| 89 | runs-on: ubuntu-latest |
| 90 | |
| 91 | steps: |
| 92 | - uses: actions/checkout@v4 |
| 93 | with: |
| 94 | fetch-depth: 0 |
| 95 | |
| 96 | - uses: pnpm/action-setup@v4 |
| 97 | name: Install pnpm |
| 98 | with: |
| 99 | package_json_file: ts/package.json |
| 100 | |
| 101 | - uses: actions/setup-node@v4 |
| 102 | with: |
| 103 | node-version: 22 |
| 104 | cache: "pnpm" |
| 105 | cache-dependency-path: ts/pnpm-lock.yaml |
| 106 | |
| 107 | - name: Generate GitHub App token |
| 108 | id: app-token |
| 109 | uses: actions/create-github-app-token@v1 |
| 110 | with: |
| 111 | app-id: ${{ vars.DOCS_BOT_APP_ID }} |
| 112 | private-key: ${{ secrets.DOCS_BOT_APP_PRIVATE_KEY }} |
| 113 | |
| 114 | - name: Install ts dependencies |
| 115 | working-directory: ts |
| 116 | run: | |
| 117 | corepack enable |
| 118 | pnpm install --frozen-lockfile |
| 119 | |
| 120 | - name: Build docs-autogen tool |
| 121 | working-directory: ts |
| 122 | run: | |
| 123 | pnpm --filter aiclient build |
| 124 | pnpm --filter @typeagent/docs-autogen build |
| 125 | |
| 126 | # Federated-credential login. Exports AZURE_CLIENT_ID, |
| 127 | # AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID, and |
| 128 | # AZURE_FEDERATED_TOKEN_FILE to subsequent steps. `aiclient`'s |
| 129 | # DefaultAzureCredential picks them up automatically when |
| 130 | # AZURE_OPENAI_API_KEY is set to the literal string "identity". |
| 131 | - name: Azure login (federated) |
| 132 | uses: azure/login@v2 |
| 133 | with: |
| 134 | client-id: ${{ vars.AZURE_CLIENT_ID }} |
| 135 | tenant-id: ${{ vars.AZURE_TENANT_ID }} |
| 136 | subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }} |
| 137 | |
| 138 | # Validate dispatch inputs before they reach a shell. Strict |
| 139 | # allowlists prevent shell-metachar injection and limit blast |
| 140 | # radius if a workflow_dispatch actor is compromised. Any |
| 141 | # validation failure aborts the run before the CLI is invoked. |
| 142 | - name: Validate dispatch inputs |
| 143 | id: validate |
| 144 | env: |
| 145 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 146 | INPUT_SINCE: ${{ inputs.since }} |
| 147 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 148 | run: | |
| 149 | set -e |
| 150 | # Allow scoped/unscoped npm package names: letters, digits, |
| 151 | # `-`, `_`, `.`, `/`, `@`. Comma-separated. |
| 152 | if [ -n "$INPUT_PACKAGES" ]; then |
| 153 | if ! printf '%s' "$INPUT_PACKAGES" | grep -Eq '^[A-Za-z0-9@/_.,[:space:]-]+$'; then |
| 154 | echo "::error::Invalid characters in 'packages' input." >&2 |
| 155 | exit 1 |
| 156 | fi |
| 157 | fi |
| 158 | # Allow git refs: letters, digits, `-`, `_`, `.`, `/`. |
| 159 | if [ -n "$INPUT_SINCE" ]; then |
| 160 | if ! printf '%s' "$INPUT_SINCE" | grep -Eq '^[A-Za-z0-9._/-]+$'; then |
| 161 | echo "::error::Invalid characters in 'since' input." >&2 |
| 162 | exit 1 |
| 163 | fi |
| 164 | fi |
| 165 | # Numeric only. |
| 166 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 167 | if ! printf '%s' "$INPUT_MAX_PACKAGES" | grep -Eq '^[0-9]+$'; then |
| 168 | echo "::error::'max-packages' must be a positive integer." >&2 |
| 169 | exit 1 |
| 170 | fi |
| 171 | fi |
| 172 | |
| 173 | - name: Regenerate package README.AUTOGEN.md files |
| 174 | id: regen |
| 175 | working-directory: ts |
| 176 | env: |
| 177 | AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} |
| 178 | # Sentinel value: switches @typeagent/aiclient from |
| 179 | # API-key auth to DefaultAzureCredential (federated OIDC). |
| 180 | AZURE_OPENAI_API_KEY: identity |
| 181 | DEBUG: "docs-autogen:*" |
| 182 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 183 | INPUT_SINCE: ${{ inputs.since }} |
| 184 | INPUT_LLM: ${{ inputs.llm }} |
| 185 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 186 | INPUT_DRY_RUN: ${{ inputs.dry-run }} |
| 187 | run: | |
| 188 | # Build the argv as a bash array so each value is passed as a |
| 189 | # single token. Never expand user input through GitHub's |
| 190 | # `${{ }}` template into the shell command line — that would |
| 191 | # allow workflow_dispatch actors to inject arbitrary shell |
| 192 | # metacharacters (`;`, `$()`, backticks, etc.). |
| 193 | set -o pipefail |
| 194 | ARGS=("--render") |
| 195 | if [ "$INPUT_DRY_RUN" = "true" ]; then |
| 196 | ARGS+=("--dry-run") |
| 197 | else |
| 198 | ARGS+=("--write") |
| 199 | fi |
| 200 | if [ -n "$INPUT_PACKAGES" ]; then |
| 201 | IFS=',' read -ra PKGS <<< "$INPUT_PACKAGES" |
| 202 | for PKG in "${PKGS[@]}"; do |
| 203 | PKG_TRIMMED=$(echo "$PKG" | xargs) |
| 204 | if [ -n "$PKG_TRIMMED" ]; then |
| 205 | ARGS+=("--package" "$PKG_TRIMMED") |
| 206 | fi |
| 207 | done |
| 208 | fi |
| 209 | if [ -n "$INPUT_SINCE" ]; then |
| 210 | ARGS+=("--since" "$INPUT_SINCE") |
| 211 | fi |
| 212 | if [ "$INPUT_LLM" = "true" ]; then |
| 213 | ARGS+=("--llm") |
| 214 | fi |
| 215 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 216 | ARGS+=("--max-packages" "$INPUT_MAX_PACKAGES") |
| 217 | fi |
| 218 | echo "Invoking docs-autogen with ${#ARGS[@]} args" |
| 219 | node tools/docsAutogen/bin/docs-autogen.cjs "${ARGS[@]}" \ |
| 220 | | tee /tmp/docs-autogen.log |
| 221 | echo "exit_code=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT" |
| 222 | |
| 223 | # Detect whether anything actually changed under |
| 224 | # ts/packages/**/README.AUTOGEN.md. README.md is never touched by |
| 225 | # the generator. When the CLI ran with --write, all edits are |
| 226 | # already on disk; we rely on git status alone — if it's clean |
| 227 | # there's no PR to open, even if the CLI processed packages |
| 228 | # (footer-only or unchanged verdicts skip writes). |
| 229 | - name: Detect changes |
| 230 | id: detect |
| 231 | working-directory: ts |
| 232 | run: | |
| 233 | if git diff --quiet --exit-code -- 'packages/**/README.AUTOGEN.md'; then |
| 234 | echo "changes=false" >> "$GITHUB_OUTPUT" |
| 235 | echo "No README.AUTOGEN.md changes after regeneration." |
| 236 | else |
| 237 | echo "changes=true" >> "$GITHUB_OUTPUT" |
| 238 | CHANGED=$(git diff --name-only -- 'packages/**/README.AUTOGEN.md' | wc -l) |
| 239 | echo "changed_files=$CHANGED" >> "$GITHUB_OUTPUT" |
| 240 | echo "$CHANGED README.AUTOGEN.md file(s) modified." |
| 241 | fi |
| 242 | |
| 243 | # ── Create PR (and supersede prior bot PRs) ───────────────────── |
| 244 | - name: Create pull request |
| 245 | if: ${{ steps.detect.outputs.changes == 'true' && inputs.dry-run != true }} |
| 246 | env: |
| 247 | GH_TOKEN: ${{ steps.app-token.outputs.token }} |
| 248 | run: | |
| 249 | BRANCH="automated/docs-readmes-$(date +%Y%m%d)-${{ github.run_number }}" |
| 250 | |
| 251 | git config user.name "github-actions[bot]" |
| 252 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" |
| 253 | |
| 254 | git checkout -b "$BRANCH" |
| 255 | # Scope strictly to ts/packages/**/README.AUTOGEN.md to avoid |
| 256 | # sweeping in any incidental working-tree noise from CI. |
| 257 | # README.md is never modified by docs-autogen. |
| 258 | git add 'ts/packages/**/README.AUTOGEN.md' |
| 259 | CHANGED_LIST=$(git diff --cached --name-only) |
| 260 | |
| 261 | git commit -m "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d)) |
| 262 | |
| 263 | Automated by docs-generate workflow. |
| 264 | |
| 265 | ${{ steps.detect.outputs.changed_files }} file(s) updated. |
| 266 | |
| 267 | Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> |
| 268 | Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" |
| 269 | |
| 270 | git push origin "$BRANCH" |
| 271 | |
| 272 | # Close any previously-open bot PRs before opening the new |
| 273 | # one. Each daily run produces a unique branch, so without |
| 274 | # dedup the repo accumulates stacking duplicate PRs whenever |
| 275 | # yesterday's hasn't been merged. Always keep the freshest. |
| 276 | PREV_PRS=$(gh pr list \ |
| 277 | --state open \ |
| 278 | --search 'head:automated/docs-readmes- in:branch' \ |
| 279 | --json number,headRefName \ |
| 280 | --jq '.[] | select(.headRefName != "'"$BRANCH"'") | .number') |
| 281 | if [ -n "$PREV_PRS" ]; then |
| 282 | echo "Closing superseded docs-autogen PRs: $PREV_PRS" |
| 283 | for PR in $PREV_PRS; do |
| 284 | gh pr close "$PR" \ |
| 285 | --delete-branch \ |
| 286 | --comment "Superseded by a newer automated docs PR." \ |
| 287 | || echo "::warning::Failed to close PR #$PR" |
| 288 | done |
| 289 | fi |
| 290 | |
| 291 | BODY=$(cat <<PREOF |
| 292 | ## Automated package README.AUTOGEN.md regeneration |
| 293 | |
| 294 | This PR was automatically generated by the \`docs-generate\` workflow. |
| 295 | |
| 296 | ### Summary |
| 297 | - **Files updated:** ${{ steps.detect.outputs.changed_files }} \`README.AUTOGEN.md\` file(s) |
| 298 | - **LLM mode:** ${{ inputs.llm == false && 'placeholder-only' || 'aiclient (full documentation)' }} |
| 299 | - **Per-run cap:** ${{ inputs.max-packages || '25' }} package(s) |
| 300 | - **Trigger:** ${{ github.event_name == 'schedule' && 'scheduled (daily)' || format('manual ({0})', github.actor) }} |
| 301 | |
| 302 | ### Changed files |
| 303 | \`\`\` |
| 304 | $CHANGED_LIST |
| 305 | \`\`\` |
| 306 | |
| 307 | ### How this works |
| 308 | 1. Diffs \`ts/packages/**\` source files against the last successful run |
| 309 | (tracked by the \`docs-bot/last-run\` git tag). |
| 310 | 2. For each affected package, regenerates a parallel \`README.AUTOGEN.md\` |
| 311 | alongside the hand-written \`README.md\` — multi-section AI-authored |
| 312 | documentation (Overview / What it does / Actions / Architecture / |
| 313 | How to extend) when LLM mode is on, plus a deterministic Reference |
| 314 | appendix (entry points, dependencies, files of interest, agent |
| 315 | surface, actions list). |
| 316 | 3. \`README.md\` is never modified — its content is only read as |
| 317 | authoritative source material the LLM mirrors and extends. |
| 318 | 4. Validates every generated link resolves on disk; refuses to write |
| 319 | when broken. |
| 320 | 5. Skips packages whose new file differs only in the staleness footer |
| 321 | (so daily PRs don't churn unchanged docs). |
| 322 | |
| 323 | ### Review checklist |
| 324 | - [ ] Sample one or two \`README.AUTOGEN.md\` files and confirm the |
| 325 | Reference section accurately describes the package. |
| 326 | - [ ] Sample the AI-authored sections and confirm they read as |
| 327 | contributor-grade documentation (not marketing prose, no |
| 328 | hallucinated APIs). |
| 329 | - [ ] Confirm no hand-written \`README.md\` has been modified. |
| 330 | |
| 331 | See [\`ts/docs/architecture/doc-autogen.md\`](../blob/main/ts/docs/architecture/doc-autogen.md) for design details. |
| 332 | PREOF |
| 333 | ) |
| 334 | |
| 335 | gh pr create \ |
| 336 | --base main \ |
| 337 | --head "$BRANCH" \ |
| 338 | --title "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d))" \ |
| 339 | --body "$BODY" \ |
| 340 | --label "documentation" |
| 341 | |
| 342 | # Advance the watermark only on a successful scheduled run with a |
| 343 | # PR. Manual dispatches and dry-runs intentionally do not move |
| 344 | # the tag, so they remain idempotent against the daily cron. |
| 345 | - name: Advance watermark tag |
| 346 | if: ${{ github.event_name == 'schedule' && steps.detect.outputs.changes == 'true' && inputs.dry-run != true }} |
| 347 | env: |
| 348 | GH_TOKEN: ${{ steps.app-token.outputs.token }} |
| 349 | run: | |
| 350 | # Tag the commit we generated against (the original SHA before |
| 351 | # the PR branch was created), not the branch tip. |
| 352 | git tag -f docs-bot/last-run ${{ github.sha }} |
| 353 | git push origin docs-bot/last-run --force |
| 354 | |
| 355 | # ── Job summary ───────────────────────────────────────────────── |
| 356 | - name: Job summary |
| 357 | if: always() |
| 358 | run: | |
| 359 | { |
| 360 | echo "## Package README.AUTOGEN.md regeneration" |
| 361 | echo "" |
| 362 | echo "| Metric | Value |" |
| 363 | echo "|--------|-------|" |
| 364 | echo "| Trigger | ${{ github.event_name }} |" |
| 365 | echo "| Dry run | ${{ inputs.dry-run || 'false' }} |" |
| 366 | echo "| LLM enabled | ${{ inputs.llm || 'true' }} |" |
| 367 | echo "| Per-run cap | ${{ inputs.max-packages || '25' }} |" |
| 368 | echo "| Changes detected | ${{ steps.detect.outputs.changes || 'false' }} |" |
| 369 | echo "| Files modified | ${{ steps.detect.outputs.changed_files || '0' }} |" |
| 370 | echo "" |
| 371 | if [ -f /tmp/docs-autogen.log ]; then |
| 372 | echo "### CLI output (truncated)" |
| 373 | echo '```' |
| 374 | tail -n 200 /tmp/docs-autogen.log || true |
| 375 | echo '```' |
| 376 | fi |
| 377 | } >> "$GITHUB_STEP_SUMMARY" |
| 378 | |