microsoft/TypeAgent
Publicmirrored fromhttps://github.com/microsoft/TypeAgentAvailable
.github/workflows/docs-generate.yml
379lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | # Daily regeneration of package-level README.AUTOGEN.md companion files |
| 5 | # under ts/packages/**. |
| 6 | # |
| 7 | # Detects packages whose source has changed since the last successful |
| 8 | # scheduled run (tracked by the `docs-bot/last-run` lightweight tag), |
| 9 | # rebuilds each affected package's README.AUTOGEN.md (a parallel file |
| 10 | # alongside the hand-written README.md, never touching the latter), |
| 11 | # validates every link on disk, and opens a single batched PR. Prior |
| 12 | # open bot PRs are closed with --delete-branch so only one is ever |
| 13 | # live at a time. |
| 14 | # |
| 15 | # AI authoring of the documentation body uses Azure OpenAI via |
| 16 | # packages/aiclient. The deterministic Reference appendix (entry |
| 17 | # points, dependencies, files of interest, agent surface, action list) |
| 18 | # is computed entirely from package.json + src/ + the workspace graph. |
| 19 | # |
| 20 | # Required repository configuration: |
| 21 | # variables: |
| 22 | # DOCS_BOT_APP_ID GitHub App that opens the PR |
| 23 | # secrets: |
| 24 | # DOCS_BOT_APP_PRIVATE_KEY Private key for the GitHub App |
| 25 | # AZUREAPPSERVICE_CLIENTID_5B0D2D6BA40F4710B45721D2112356DD \ |
| 26 | # AZUREAPPSERVICE_TENANTID_39BB903136F14B6EAD8F53A8AB78E3AA | Existing repo secrets shared with |
| 27 | # AZUREAPPSERVICE_SUBSCRIPTIONID_F36C1F2C4B2C49CA8DD5C52FAB98FA30 / smoke-tests / build-docker workflows. |
| 28 | # |
| 29 | # Auth model: GitHub Actions exchanges its OIDC token for an Azure AD bearer |
| 30 | # token via `azure/login@v2`, then `tools/scripts/getKeys.mjs` uses |
| 31 | # DefaultAzureCredential to pull the consolidated `typeagent-config` secret |
| 32 | # from the `build-pipeline-kv` Key Vault and writes it to |
| 33 | # `ts/config.local.yaml`. `aiclient` reads endpoint + key from that file via |
| 34 | # `@typeagent/config`. No Azure OpenAI key is stored in this repo. The |
| 35 | # Entra App registration referenced by the secrets above already has |
| 36 | # Key Vault read access on `build-pipeline-kv`, so no new RBAC is required. |
| 37 | # |
| 38 | # The workflow is read-only and emits no PR until those are provisioned; |
| 39 | # it simply prints what it would have done. |
| 40 | |
| 41 | name: docs-generate |
| 42 | |
| 43 | on: |
| 44 | schedule: |
| 45 | # Daily at 08:00 UTC = 01:00 PST. Low-traffic window so churn from |
| 46 | # this PR does not collide with other automated PRs. |
| 47 | - cron: "0 8 * * *" |
| 48 | workflow_dispatch: |
| 49 | inputs: |
| 50 | dry-run: |
| 51 | description: "Dry run — analyse and render only, don't write or open PR" |
| 52 | type: boolean |
| 53 | default: false |
| 54 | packages: |
| 55 | description: "Comma-separated package names to regenerate (overrides change detection)" |
| 56 | type: string |
| 57 | default: "" |
| 58 | since: |
| 59 | description: "Override the watermark — git ref to diff against (e.g. main, HEAD~10)" |
| 60 | type: string |
| 61 | default: "" |
| 62 | llm: |
| 63 | description: "Use Azure OpenAI to author the documentation sections (placeholder-only when off)" |
| 64 | type: boolean |
| 65 | default: true |
| 66 | max-packages: |
| 67 | description: "Per-run cap on packages touched (defaults to 25)" |
| 68 | type: string |
| 69 | default: "25" |
| 70 | |
| 71 | # Never cancel an in-flight docs-autogen run; let it finish and let the |
| 72 | # next scheduled run supersede the resulting PR if necessary. |
| 73 | concurrency: |
| 74 | group: ${{ github.workflow }} |
| 75 | cancel-in-progress: false |
| 76 | |
| 77 | permissions: |
| 78 | contents: write |
| 79 | pull-requests: write |
| 80 | # Required so `azure/login@v2` can request a short-lived OIDC JWT |
| 81 | # for federated credential exchange. The token never leaves the |
| 82 | # runner; Entra exchanges it for an Azure AD bearer token bound to |
| 83 | # the existing build-pipeline App registration. |
| 84 | id-token: write |
| 85 | |
| 86 | jobs: |
| 87 | regenerate: |
| 88 | runs-on: ubuntu-latest |
| 89 | |
| 90 | steps: |
| 91 | - uses: actions/checkout@v4 |
| 92 | with: |
| 93 | fetch-depth: 0 |
| 94 | |
| 95 | - uses: pnpm/action-setup@v4 |
| 96 | name: Install pnpm |
| 97 | with: |
| 98 | package_json_file: ts/package.json |
| 99 | |
| 100 | - uses: actions/setup-node@v4 |
| 101 | with: |
| 102 | node-version: 22 |
| 103 | cache: "pnpm" |
| 104 | cache-dependency-path: ts/pnpm-lock.yaml |
| 105 | |
| 106 | - name: Generate GitHub App token |
| 107 | id: app-token |
| 108 | uses: actions/create-github-app-token@v1 |
| 109 | with: |
| 110 | app-id: ${{ vars.DOCS_BOT_APP_ID }} |
| 111 | private-key: ${{ secrets.DOCS_BOT_APP_PRIVATE_KEY }} |
| 112 | |
| 113 | - name: Install ts dependencies |
| 114 | working-directory: ts |
| 115 | run: | |
| 116 | corepack enable |
| 117 | pnpm install --frozen-lockfile |
| 118 | |
| 119 | - name: Build docs-autogen tool |
| 120 | working-directory: ts |
| 121 | run: | |
| 122 | pnpm --filter aiclient build |
| 123 | pnpm --filter @typeagent/docs-autogen build |
| 124 | |
| 125 | # Federated-credential login. Reuses the same Entra App |
| 126 | # registration that smoke-tests.yml and build-docker-container.yml |
| 127 | # use, so no new RBAC is required to read build-pipeline-kv. |
| 128 | - name: Azure login (federated) |
| 129 | uses: azure/login@v2.2.0 |
| 130 | with: |
| 131 | client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_5B0D2D6BA40F4710B45721D2112356DD }} |
| 132 | tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_39BB903136F14B6EAD8F53A8AB78E3AA }} |
| 133 | subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_F36C1F2C4B2C49CA8DD5C52FAB98FA30 }} |
| 134 | |
| 135 | # Pull AzOpenAI endpoint + key (and the rest of the shared |
| 136 | # service config) from Key Vault into ts/config.local.yaml. |
| 137 | # @typeagent/config auto-discovers that file at runtime, so no |
| 138 | # further env wiring is needed for the regen step below. |
| 139 | - name: Pull config from Key Vault |
| 140 | working-directory: ts |
| 141 | run: | |
| 142 | node tools/scripts/getKeys.mjs --vault build-pipeline-kv --commit |
| 143 | |
| 144 | # Validate dispatch inputs before they reach a shell. Strict |
| 145 | # allowlists prevent shell-metachar injection and limit blast |
| 146 | # radius if a workflow_dispatch actor is compromised. Any |
| 147 | # validation failure aborts the run before the CLI is invoked. |
| 148 | - name: Validate dispatch inputs |
| 149 | id: validate |
| 150 | env: |
| 151 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 152 | INPUT_SINCE: ${{ inputs.since }} |
| 153 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 154 | run: | |
| 155 | set -e |
| 156 | # Allow scoped/unscoped npm package names: letters, digits, |
| 157 | # `-`, `_`, `.`, `/`, `@`. Comma-separated. |
| 158 | if [ -n "$INPUT_PACKAGES" ]; then |
| 159 | if ! printf '%s' "$INPUT_PACKAGES" | grep -Eq '^[A-Za-z0-9@/_.,[:space:]-]+$'; then |
| 160 | echo "::error::Invalid characters in 'packages' input." >&2 |
| 161 | exit 1 |
| 162 | fi |
| 163 | fi |
| 164 | # Allow git refs: letters, digits, `-`, `_`, `.`, `/`. |
| 165 | if [ -n "$INPUT_SINCE" ]; then |
| 166 | if ! printf '%s' "$INPUT_SINCE" | grep -Eq '^[A-Za-z0-9._/-]+$'; then |
| 167 | echo "::error::Invalid characters in 'since' input." >&2 |
| 168 | exit 1 |
| 169 | fi |
| 170 | fi |
| 171 | # Numeric only. |
| 172 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 173 | if ! printf '%s' "$INPUT_MAX_PACKAGES" | grep -Eq '^[0-9]+$'; then |
| 174 | echo "::error::'max-packages' must be a positive integer." >&2 |
| 175 | exit 1 |
| 176 | fi |
| 177 | fi |
| 178 | |
| 179 | - name: Regenerate package README.AUTOGEN.md files |
| 180 | id: regen |
| 181 | working-directory: ts |
| 182 | env: |
| 183 | DEBUG: "docs-autogen:*" |
| 184 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 185 | INPUT_SINCE: ${{ inputs.since }} |
| 186 | INPUT_LLM: ${{ inputs.llm }} |
| 187 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 188 | INPUT_DRY_RUN: ${{ inputs.dry-run }} |
| 189 | run: | |
| 190 | # Build the argv as a bash array so each value is passed as a |
| 191 | # single token. Never expand user input through GitHub's |
| 192 | # `${{ }}` template into the shell command line — that would |
| 193 | # allow workflow_dispatch actors to inject arbitrary shell |
| 194 | # metacharacters (`;`, `$()`, backticks, etc.). |
| 195 | set -o pipefail |
| 196 | ARGS=("--render") |
| 197 | if [ "$INPUT_DRY_RUN" = "true" ]; then |
| 198 | ARGS+=("--dry-run") |
| 199 | else |
| 200 | ARGS+=("--write") |
| 201 | fi |
| 202 | if [ -n "$INPUT_PACKAGES" ]; then |
| 203 | IFS=',' read -ra PKGS <<< "$INPUT_PACKAGES" |
| 204 | for PKG in "${PKGS[@]}"; do |
| 205 | PKG_TRIMMED=$(echo "$PKG" | xargs) |
| 206 | if [ -n "$PKG_TRIMMED" ]; then |
| 207 | ARGS+=("--package" "$PKG_TRIMMED") |
| 208 | fi |
| 209 | done |
| 210 | fi |
| 211 | if [ -n "$INPUT_SINCE" ]; then |
| 212 | ARGS+=("--since" "$INPUT_SINCE") |
| 213 | fi |
| 214 | if [ "$INPUT_LLM" = "true" ]; then |
| 215 | ARGS+=("--llm") |
| 216 | fi |
| 217 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 218 | ARGS+=("--max-packages" "$INPUT_MAX_PACKAGES") |
| 219 | fi |
| 220 | echo "Invoking docs-autogen with ${#ARGS[@]} args" |
| 221 | node tools/docsAutogen/bin/docs-autogen.cjs "${ARGS[@]}" \ |
| 222 | | tee /tmp/docs-autogen.log |
| 223 | echo "exit_code=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT" |
| 224 | |
| 225 | # Detect whether anything actually changed under |
| 226 | # ts/packages/**/README.AUTOGEN.md. README.md is never touched by |
| 227 | # the generator. When the CLI ran with --write, all edits are |
| 228 | # already on disk; we rely on git status alone — if it's clean |
| 229 | # there's no PR to open, even if the CLI processed packages |
| 230 | # (footer-only or unchanged verdicts skip writes). |
| 231 | - name: Detect changes |
| 232 | id: detect |
| 233 | working-directory: ts |
| 234 | run: | |
| 235 | if git diff --quiet --exit-code -- 'packages/**/README.AUTOGEN.md'; then |
| 236 | echo "changes=false" >> "$GITHUB_OUTPUT" |
| 237 | echo "No README.AUTOGEN.md changes after regeneration." |
| 238 | else |
| 239 | echo "changes=true" >> "$GITHUB_OUTPUT" |
| 240 | CHANGED=$(git diff --name-only -- 'packages/**/README.AUTOGEN.md' | wc -l) |
| 241 | echo "changed_files=$CHANGED" >> "$GITHUB_OUTPUT" |
| 242 | echo "$CHANGED README.AUTOGEN.md file(s) modified." |
| 243 | fi |
| 244 | |
| 245 | # ── Create PR (and supersede prior bot PRs) ───────────────────── |
| 246 | - name: Create pull request |
| 247 | if: ${{ steps.detect.outputs.changes == 'true' && inputs.dry-run != true }} |
| 248 | env: |
| 249 | GH_TOKEN: ${{ steps.app-token.outputs.token }} |
| 250 | run: | |
| 251 | BRANCH="automated/docs-readmes-$(date +%Y%m%d)-${{ github.run_number }}" |
| 252 | |
| 253 | git config user.name "github-actions[bot]" |
| 254 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" |
| 255 | |
| 256 | git checkout -b "$BRANCH" |
| 257 | # Scope strictly to ts/packages/**/README.AUTOGEN.md to avoid |
| 258 | # sweeping in any incidental working-tree noise from CI. |
| 259 | # README.md is never modified by docs-autogen. |
| 260 | git add 'ts/packages/**/README.AUTOGEN.md' |
| 261 | CHANGED_LIST=$(git diff --cached --name-only) |
| 262 | |
| 263 | git commit -m "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d)) |
| 264 | |
| 265 | Automated by docs-generate workflow. |
| 266 | |
| 267 | ${{ steps.detect.outputs.changed_files }} file(s) updated. |
| 268 | |
| 269 | Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> |
| 270 | Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" |
| 271 | |
| 272 | git push origin "$BRANCH" |
| 273 | |
| 274 | # Close any previously-open bot PRs before opening the new |
| 275 | # one. Each daily run produces a unique branch, so without |
| 276 | # dedup the repo accumulates stacking duplicate PRs whenever |
| 277 | # yesterday's hasn't been merged. Always keep the freshest. |
| 278 | PREV_PRS=$(gh pr list \ |
| 279 | --state open \ |
| 280 | --search 'head:automated/docs-readmes- in:branch' \ |
| 281 | --json number,headRefName \ |
| 282 | --jq '.[] | select(.headRefName != "'"$BRANCH"'") | .number') |
| 283 | if [ -n "$PREV_PRS" ]; then |
| 284 | echo "Closing superseded docs-autogen PRs: $PREV_PRS" |
| 285 | for PR in $PREV_PRS; do |
| 286 | gh pr close "$PR" \ |
| 287 | --delete-branch \ |
| 288 | --comment "Superseded by a newer automated docs PR." \ |
| 289 | || echo "::warning::Failed to close PR #$PR" |
| 290 | done |
| 291 | fi |
| 292 | |
| 293 | BODY=$(cat <<PREOF |
| 294 | ## Automated package README.AUTOGEN.md regeneration |
| 295 | |
| 296 | This PR was automatically generated by the \`docs-generate\` workflow. |
| 297 | |
| 298 | ### Summary |
| 299 | - **Files updated:** ${{ steps.detect.outputs.changed_files }} \`README.AUTOGEN.md\` file(s) |
| 300 | - **LLM mode:** ${{ inputs.llm == false && 'placeholder-only' || 'aiclient (full documentation)' }} |
| 301 | - **Per-run cap:** ${{ inputs.max-packages || '25' }} package(s) |
| 302 | - **Trigger:** ${{ github.event_name == 'schedule' && 'scheduled (daily)' || format('manual ({0})', github.actor) }} |
| 303 | |
| 304 | ### Changed files |
| 305 | \`\`\` |
| 306 | $CHANGED_LIST |
| 307 | \`\`\` |
| 308 | |
| 309 | ### How this works |
| 310 | 1. Diffs \`ts/packages/**\` source files against the last successful run |
| 311 | (tracked by the \`docs-bot/last-run\` git tag). |
| 312 | 2. For each affected package, regenerates a parallel \`README.AUTOGEN.md\` |
| 313 | alongside the hand-written \`README.md\` — multi-section AI-authored |
| 314 | documentation (Overview / What it does / Actions / Architecture / |
| 315 | How to extend) when LLM mode is on, plus a deterministic Reference |
| 316 | appendix (entry points, dependencies, files of interest, agent |
| 317 | surface, actions list). |
| 318 | 3. \`README.md\` is never modified — its content is only read as |
| 319 | authoritative source material the LLM mirrors and extends. |
| 320 | 4. Validates every generated link resolves on disk; refuses to write |
| 321 | when broken. |
| 322 | 5. Skips packages whose new file differs only in the staleness footer |
| 323 | (so daily PRs don't churn unchanged docs). |
| 324 | |
| 325 | ### Review checklist |
| 326 | - [ ] Sample one or two \`README.AUTOGEN.md\` files and confirm the |
| 327 | Reference section accurately describes the package. |
| 328 | - [ ] Sample the AI-authored sections and confirm they read as |
| 329 | contributor-grade documentation (not marketing prose, no |
| 330 | hallucinated APIs). |
| 331 | - [ ] Confirm no hand-written \`README.md\` has been modified. |
| 332 | |
| 333 | See [\`ts/docs/architecture/doc-autogen.md\`](../blob/main/ts/docs/architecture/doc-autogen.md) for design details. |
| 334 | PREOF |
| 335 | ) |
| 336 | |
| 337 | gh pr create \ |
| 338 | --base main \ |
| 339 | --head "$BRANCH" \ |
| 340 | --title "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d))" \ |
| 341 | --body "$BODY" \ |
| 342 | --label "documentation" |
| 343 | |
| 344 | # Advance the watermark only on a successful scheduled run with a |
| 345 | # PR. Manual dispatches and dry-runs intentionally do not move |
| 346 | # the tag, so they remain idempotent against the daily cron. |
| 347 | - name: Advance watermark tag |
| 348 | if: ${{ github.event_name == 'schedule' && steps.detect.outputs.changes == 'true' && inputs.dry-run != true }} |
| 349 | env: |
| 350 | GH_TOKEN: ${{ steps.app-token.outputs.token }} |
| 351 | run: | |
| 352 | # Tag the commit we generated against (the original SHA before |
| 353 | # the PR branch was created), not the branch tip. |
| 354 | git tag -f docs-bot/last-run ${{ github.sha }} |
| 355 | git push origin docs-bot/last-run --force |
| 356 | |
| 357 | # ── Job summary ───────────────────────────────────────────────── |
| 358 | - name: Job summary |
| 359 | if: always() |
| 360 | run: | |
| 361 | { |
| 362 | echo "## Package README.AUTOGEN.md regeneration" |
| 363 | echo "" |
| 364 | echo "| Metric | Value |" |
| 365 | echo "|--------|-------|" |
| 366 | echo "| Trigger | ${{ github.event_name }} |" |
| 367 | echo "| Dry run | ${{ inputs.dry-run || 'false' }} |" |
| 368 | echo "| LLM enabled | ${{ inputs.llm || 'true' }} |" |
| 369 | echo "| Per-run cap | ${{ inputs.max-packages || '25' }} |" |
| 370 | echo "| Changes detected | ${{ steps.detect.outputs.changes || 'false' }} |" |
| 371 | echo "| Files modified | ${{ steps.detect.outputs.changed_files || '0' }} |" |
| 372 | echo "" |
| 373 | if [ -f /tmp/docs-autogen.log ]; then |
| 374 | echo "### CLI output (truncated)" |
| 375 | echo '```' |
| 376 | tail -n 200 /tmp/docs-autogen.log || true |
| 377 | echo '```' |
| 378 | fi |
| 379 | } >> "$GITHUB_STEP_SUMMARY" |
| 380 | |