microsoft/TypeAgent
Publicmirrored fromhttps://github.com/microsoft/TypeAgentAvailable
.github/workflows/docs-generate.yml
406lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # Licensed under the MIT License. |
| 3 | |
| 4 | # Optional, manually-dispatched regeneration of package-level |
| 5 | # README.AUTOGEN.md companion files under ts/packages/**. |
| 6 | # |
| 7 | # This workflow is NOT scheduled. It runs only when an operator |
| 8 | # explicitly clicks "Run workflow" from the Actions UI (or otherwise |
| 9 | # triggers `workflow_dispatch`). To leverage the existing CI Azure |
| 10 | # infrastructure + Key Vault secrets the job is deployed to the |
| 11 | # `development-fork` GitHub environment — the same environment used by |
| 12 | # the smoke-tests and build-docker-container workflows. That binding |
| 13 | # is what scopes the Entra federated-credential subject claim to |
| 14 | # `repo:<org>/<repo>:environment:development-fork`, matching the |
| 15 | # subject the build-pipeline App registration is already configured |
| 16 | # to trust. |
| 17 | # |
| 18 | # When dispatched, it detects packages whose source has changed since |
| 19 | # the supplied diff baseline (`since` input, or the `docs-bot/last-run` |
| 20 | # lightweight tag if present), rebuilds each affected package's |
| 21 | # README.AUTOGEN.md (a parallel file alongside the hand-written |
| 22 | # README.md, never touching the latter), validates every link on disk, |
| 23 | # and opens a single batched PR. Prior open bot PRs are closed with |
| 24 | # --delete-branch so only one is ever live at a time. |
| 25 | # |
| 26 | # AI authoring of the documentation body uses Azure OpenAI via |
| 27 | # packages/aiclient. The deterministic Reference appendix (entry |
| 28 | # points, dependencies, files of interest, agent surface, action list) |
| 29 | # is computed entirely from package.json + src/ + the workspace graph. |
| 30 | # |
| 31 | # Required repository configuration: |
| 32 | # secrets (scoped to the `development-fork` environment): |
| 33 | # AZUREAPPSERVICE_CLIENTID_5B0D2D6BA40F4710B45721D2112356DD \ |
| 34 | # AZUREAPPSERVICE_TENANTID_39BB903136F14B6EAD8F53A8AB78E3AA | Shared with |
| 35 | # AZUREAPPSERVICE_SUBSCRIPTIONID_F36C1F2C4B2C49CA8DD5C52FAB98FA30 / smoke-tests / build-docker workflows. |
| 36 | # |
| 37 | # GitHub write identity: the workflow uses the auto-generated |
| 38 | # `GITHUB_TOKEN` to push the branch and open the PR. Trade-off: |
| 39 | # GitHub deliberately suppresses downstream workflow runs on PRs |
| 40 | # opened by `GITHUB_TOKEN`, so the generated docs PR will NOT have |
| 41 | # build / lint / smoke-test status checks attached. Reviewers should |
| 42 | # rely on the diff and on a separate manual run of those checks if |
| 43 | # needed. A future iteration can swap this for an installable |
| 44 | # GitHub App identity (e.g. once `microsoft-typeagent-docs-bot` is |
| 45 | # approved on the `microsoft` org) to re-enable downstream CI; see |
| 46 | # `ts/docs/architecture/doc-autogen-setup.md` for that path. |
| 47 | # |
| 48 | # Auth model (Azure side): GitHub Actions exchanges its OIDC token |
| 49 | # for an Azure AD bearer token via `azure/login@v2`, then |
| 50 | # `tools/scripts/getKeys.mjs` uses DefaultAzureCredential to pull the |
| 51 | # consolidated `typeagent-config` secret from the `build-pipeline-kv` |
| 52 | # Key Vault and writes it to `ts/config.local.yaml`. `aiclient` reads |
| 53 | # endpoint + key from that file via `@typeagent/config`. No Azure |
| 54 | # OpenAI key is stored in this repo. The Entra App registration |
| 55 | # referenced by the secrets above already has Key Vault read access on |
| 56 | # `build-pipeline-kv`, so no new RBAC is required. |
| 57 | |
| 58 | name: docs-generate |
| 59 | |
| 60 | on: |
| 61 | workflow_dispatch: |
| 62 | inputs: |
| 63 | dry-run: |
| 64 | description: "Dry run — analyse and render only, don't write or open PR" |
| 65 | type: boolean |
| 66 | default: false |
| 67 | packages: |
| 68 | description: "Comma-separated package names to regenerate (overrides change detection)" |
| 69 | type: string |
| 70 | default: "" |
| 71 | since: |
| 72 | description: "Override the watermark — git ref to diff against (e.g. main, HEAD~10)" |
| 73 | type: string |
| 74 | default: "" |
| 75 | llm: |
| 76 | description: "Use Azure OpenAI to author the documentation sections (placeholder-only when off)" |
| 77 | type: boolean |
| 78 | default: true |
| 79 | max-packages: |
| 80 | description: "Per-run cap on packages touched (defaults to 25)" |
| 81 | type: string |
| 82 | default: "25" |
| 83 | |
| 84 | # Never cancel an in-flight docs-autogen run; let it finish so any |
| 85 | # already-opened PR is not left half-written. |
| 86 | concurrency: |
| 87 | group: ${{ github.workflow }} |
| 88 | cancel-in-progress: false |
| 89 | |
| 90 | permissions: |
| 91 | contents: write |
| 92 | pull-requests: write |
| 93 | # Required so `azure/login@v2` can request a short-lived OIDC JWT |
| 94 | # for federated credential exchange. The token never leaves the |
| 95 | # runner; Entra exchanges it for an Azure AD bearer token bound to |
| 96 | # the existing build-pipeline App registration. |
| 97 | id-token: write |
| 98 | |
| 99 | jobs: |
| 100 | regenerate: |
| 101 | # Bind to the same environment used by smoke-tests and |
| 102 | # build-docker-container so federated-credential exchange against |
| 103 | # the existing build-pipeline Entra App registration succeeds, and |
| 104 | # so the workflow can read the `development-fork`-scoped Key Vault |
| 105 | # secrets. |
| 106 | environment: development-fork |
| 107 | runs-on: ubuntu-latest |
| 108 | |
| 109 | steps: |
| 110 | - uses: actions/checkout@v4 |
| 111 | with: |
| 112 | fetch-depth: 0 |
| 113 | |
| 114 | - uses: pnpm/action-setup@v4 |
| 115 | name: Install pnpm |
| 116 | with: |
| 117 | package_json_file: ts/package.json |
| 118 | |
| 119 | - uses: actions/setup-node@v4 |
| 120 | with: |
| 121 | node-version: 22 |
| 122 | cache: "pnpm" |
| 123 | cache-dependency-path: ts/pnpm-lock.yaml |
| 124 | |
| 125 | - name: Install ts dependencies |
| 126 | working-directory: ts |
| 127 | run: | |
| 128 | corepack enable |
| 129 | pnpm install --frozen-lockfile |
| 130 | |
| 131 | - name: Build docs-autogen tool |
| 132 | working-directory: ts |
| 133 | # The trailing `...` after the package name tells pnpm to |
| 134 | # build the named package AND all of its transitive workspace |
| 135 | # dependencies (e.g. aiclient → @typeagent/config), in |
| 136 | # topological order. Without it, `--filter @typeagent/docs-autogen` |
| 137 | # would only run the build script in docs-autogen itself and |
| 138 | # tsc would fail on missing workspace-dep type declarations. |
| 139 | # See build-dotnet.yml for the same pattern. |
| 140 | run: | |
| 141 | pnpm --filter '@typeagent/docs-autogen...' build |
| 142 | |
| 143 | # Federated-credential login. Reuses the same Entra App |
| 144 | # registration that smoke-tests.yml and build-docker-container.yml |
| 145 | # use, so no new RBAC is required to read build-pipeline-kv. |
| 146 | - name: Azure login (federated) |
| 147 | uses: azure/login@v2.2.0 |
| 148 | with: |
| 149 | client-id: ${{ secrets.AZUREAPPSERVICE_CLIENTID_5B0D2D6BA40F4710B45721D2112356DD }} |
| 150 | tenant-id: ${{ secrets.AZUREAPPSERVICE_TENANTID_39BB903136F14B6EAD8F53A8AB78E3AA }} |
| 151 | subscription-id: ${{ secrets.AZUREAPPSERVICE_SUBSCRIPTIONID_F36C1F2C4B2C49CA8DD5C52FAB98FA30 }} |
| 152 | |
| 153 | # Pull AzOpenAI endpoint + key (and the rest of the shared |
| 154 | # service config) from Key Vault into ts/config.local.yaml. |
| 155 | # @typeagent/config auto-discovers that file at runtime, so no |
| 156 | # further env wiring is needed for the regen step below. |
| 157 | - name: Pull config from Key Vault |
| 158 | working-directory: ts |
| 159 | run: | |
| 160 | node tools/scripts/getKeys.mjs --vault build-pipeline-kv --commit |
| 161 | |
| 162 | # Validate dispatch inputs before they reach a shell. Strict |
| 163 | # allowlists prevent shell-metachar injection and limit blast |
| 164 | # radius if a workflow_dispatch actor is compromised. Any |
| 165 | # validation failure aborts the run before the CLI is invoked. |
| 166 | - name: Validate dispatch inputs |
| 167 | id: validate |
| 168 | env: |
| 169 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 170 | INPUT_SINCE: ${{ inputs.since }} |
| 171 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 172 | run: | |
| 173 | set -e |
| 174 | # Allow scoped/unscoped npm package names: letters, digits, |
| 175 | # `-`, `_`, `.`, `/`, `@`. Comma-separated. |
| 176 | if [ -n "$INPUT_PACKAGES" ]; then |
| 177 | if ! printf '%s' "$INPUT_PACKAGES" | grep -Eq '^[A-Za-z0-9@/_.,[:space:]-]+$'; then |
| 178 | echo "::error::Invalid characters in 'packages' input." >&2 |
| 179 | exit 1 |
| 180 | fi |
| 181 | fi |
| 182 | # Allow git refs: letters, digits, `-`, `_`, `.`, `/`. |
| 183 | if [ -n "$INPUT_SINCE" ]; then |
| 184 | if ! printf '%s' "$INPUT_SINCE" | grep -Eq '^[A-Za-z0-9._/-]+$'; then |
| 185 | echo "::error::Invalid characters in 'since' input." >&2 |
| 186 | exit 1 |
| 187 | fi |
| 188 | fi |
| 189 | # Numeric only. |
| 190 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 191 | if ! printf '%s' "$INPUT_MAX_PACKAGES" | grep -Eq '^[0-9]+$'; then |
| 192 | echo "::error::'max-packages' must be a positive integer." >&2 |
| 193 | exit 1 |
| 194 | fi |
| 195 | fi |
| 196 | |
| 197 | - name: Regenerate package README.AUTOGEN.md files |
| 198 | id: regen |
| 199 | working-directory: ts |
| 200 | env: |
| 201 | DEBUG: "docs-autogen:*" |
| 202 | INPUT_PACKAGES: ${{ inputs.packages }} |
| 203 | INPUT_SINCE: ${{ inputs.since }} |
| 204 | INPUT_LLM: ${{ inputs.llm }} |
| 205 | INPUT_MAX_PACKAGES: ${{ inputs.max-packages }} |
| 206 | INPUT_DRY_RUN: ${{ inputs.dry-run }} |
| 207 | run: | |
| 208 | # Build the argv as a bash array so each value is passed as a |
| 209 | # single token. Never expand user input through GitHub's |
| 210 | # template-expression syntax into the shell command line — |
| 211 | # that would allow workflow_dispatch actors to inject |
| 212 | # arbitrary shell metacharacters (`;`, `$()`, backticks, |
| 213 | # etc.). |
| 214 | set -o pipefail |
| 215 | ARGS=("--render") |
| 216 | if [ "$INPUT_DRY_RUN" = "true" ]; then |
| 217 | ARGS+=("--dry-run") |
| 218 | else |
| 219 | ARGS+=("--write") |
| 220 | fi |
| 221 | if [ -n "$INPUT_PACKAGES" ]; then |
| 222 | IFS=',' read -ra PKGS <<< "$INPUT_PACKAGES" |
| 223 | for PKG in "${PKGS[@]}"; do |
| 224 | PKG_TRIMMED=$(echo "$PKG" | xargs) |
| 225 | if [ -n "$PKG_TRIMMED" ]; then |
| 226 | ARGS+=("--package" "$PKG_TRIMMED") |
| 227 | fi |
| 228 | done |
| 229 | fi |
| 230 | if [ -n "$INPUT_SINCE" ]; then |
| 231 | ARGS+=("--since" "$INPUT_SINCE") |
| 232 | fi |
| 233 | if [ "$INPUT_LLM" = "true" ]; then |
| 234 | ARGS+=("--llm") |
| 235 | fi |
| 236 | if [ -n "$INPUT_MAX_PACKAGES" ]; then |
| 237 | ARGS+=("--max-packages" "$INPUT_MAX_PACKAGES") |
| 238 | fi |
| 239 | echo "Invoking docs-autogen with ${#ARGS[@]} args" |
| 240 | node tools/docsAutogen/bin/docs-autogen.cjs "${ARGS[@]}" \ |
| 241 | | tee /tmp/docs-autogen.log |
| 242 | echo "exit_code=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT" |
| 243 | |
| 244 | # Re-format any freshly-written README.AUTOGEN.md files with the |
| 245 | # workspace's prettier so the resulting PR passes the repo's |
| 246 | # `prettier --check .` gate. docs-autogen writes minimally |
| 247 | # spaced markdown tables, but prettier prefers column-aligned |
| 248 | # ones; without this step, every PR opened by this workflow |
| 249 | # would otherwise fail prettier CI. Skipped on dry runs because |
| 250 | # nothing has been written to disk. |
| 251 | - name: Format generated docs with prettier |
| 252 | if: ${{ inputs.dry-run != true }} |
| 253 | working-directory: ts |
| 254 | run: | |
| 255 | npx prettier --write 'packages/**/README.AUTOGEN.md' |
| 256 | |
| 257 | # Detect whether anything actually changed under |
| 258 | # ts/packages/**/README.AUTOGEN.md. README.md is never touched by |
| 259 | # the generator. When the CLI ran with --write, all edits are |
| 260 | # already on disk; we rely on git status alone — if it's clean |
| 261 | # there's no PR to open, even if the CLI processed packages |
| 262 | # (footer-only or unchanged verdicts skip writes). |
| 263 | - name: Detect changes |
| 264 | id: detect |
| 265 | working-directory: ts |
| 266 | run: | |
| 267 | if git diff --quiet --exit-code -- 'packages/**/README.AUTOGEN.md'; then |
| 268 | echo "changes=false" >> "$GITHUB_OUTPUT" |
| 269 | echo "No README.AUTOGEN.md changes after regeneration." |
| 270 | else |
| 271 | echo "changes=true" >> "$GITHUB_OUTPUT" |
| 272 | CHANGED=$(git diff --name-only -- 'packages/**/README.AUTOGEN.md' | wc -l) |
| 273 | echo "changed_files=$CHANGED" >> "$GITHUB_OUTPUT" |
| 274 | echo "$CHANGED README.AUTOGEN.md file(s) modified." |
| 275 | fi |
| 276 | |
| 277 | # ── Create PR (and supersede prior bot PRs) ───────────────────── |
| 278 | - name: Create pull request |
| 279 | if: ${{ steps.detect.outputs.changes == 'true' && inputs.dry-run != true }} |
| 280 | env: |
| 281 | # Use the auto-generated GITHUB_TOKEN as the write identity. |
| 282 | # The `permissions:` block at the top of the workflow grants |
| 283 | # this token `contents: write` (to push the branch) and |
| 284 | # `pull-requests: write` (to create and close PRs). NOTE: |
| 285 | # GitHub suppresses downstream workflow runs on PRs opened |
| 286 | # by GITHUB_TOKEN, so the generated docs PR will not have |
| 287 | # build / lint / smoke-test status checks attached. |
| 288 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 289 | run: | |
| 290 | BRANCH="automated/docs-readmes-$(date +%Y%m%d)-${{ github.run_number }}" |
| 291 | |
| 292 | git config user.name "github-actions[bot]" |
| 293 | git config user.email "41898282+github-actions[bot]@users.noreply.github.com" |
| 294 | |
| 295 | git checkout -b "$BRANCH" |
| 296 | # Scope strictly to ts/packages/**/README.AUTOGEN.md to avoid |
| 297 | # sweeping in any incidental working-tree noise from CI. |
| 298 | # README.md is never modified by docs-autogen. |
| 299 | git add 'ts/packages/**/README.AUTOGEN.md' |
| 300 | CHANGED_LIST=$(git diff --cached --name-only) |
| 301 | |
| 302 | git commit -m "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d)) |
| 303 | |
| 304 | Automated by docs-generate workflow. |
| 305 | |
| 306 | ${{ steps.detect.outputs.changed_files }} file(s) updated. |
| 307 | |
| 308 | Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> |
| 309 | Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" |
| 310 | |
| 311 | git push origin "$BRANCH" |
| 312 | |
| 313 | # Close any previously-open bot PRs before opening the new |
| 314 | # one. Each dispatched run produces a unique branch, so |
| 315 | # without dedup the repo accumulates stacking duplicate PRs |
| 316 | # whenever a prior run's PR hasn't been merged. Always keep |
| 317 | # the freshest. |
| 318 | PREV_PRS=$(gh pr list \ |
| 319 | --state open \ |
| 320 | --search 'head:automated/docs-readmes- in:branch' \ |
| 321 | --json number,headRefName \ |
| 322 | --jq '.[] | select(.headRefName != "'"$BRANCH"'") | .number') |
| 323 | if [ -n "$PREV_PRS" ]; then |
| 324 | echo "Closing superseded docs-autogen PRs: $PREV_PRS" |
| 325 | for PR in $PREV_PRS; do |
| 326 | gh pr close "$PR" \ |
| 327 | --delete-branch \ |
| 328 | --comment "Superseded by a newer automated docs PR." \ |
| 329 | || echo "::warning::Failed to close PR #$PR" |
| 330 | done |
| 331 | fi |
| 332 | |
| 333 | BODY=$(cat <<PREOF |
| 334 | ## Automated package README.AUTOGEN.md regeneration |
| 335 | |
| 336 | This PR was automatically generated by the \`docs-generate\` workflow. |
| 337 | |
| 338 | ### Summary |
| 339 | - **Files updated:** ${{ steps.detect.outputs.changed_files }} \`README.AUTOGEN.md\` file(s) |
| 340 | - **LLM mode:** ${{ inputs.llm == false && 'placeholder-only' || 'aiclient (full documentation)' }} |
| 341 | - **Per-run cap:** ${{ inputs.max-packages || '25' }} package(s) |
| 342 | - **Trigger:** manual (${{ github.actor }}) |
| 343 | |
| 344 | ### Changed files |
| 345 | \`\`\` |
| 346 | $CHANGED_LIST |
| 347 | \`\`\` |
| 348 | |
| 349 | ### How this works |
| 350 | 1. Diffs \`ts/packages/**\` source files against the last successful run |
| 351 | (tracked by the \`docs-bot/last-run\` git tag). |
| 352 | 2. For each affected package, regenerates a parallel \`README.AUTOGEN.md\` |
| 353 | alongside the hand-written \`README.md\` — multi-section AI-authored |
| 354 | documentation (Overview / What it does / Actions / Architecture / |
| 355 | How to extend) when LLM mode is on, plus a deterministic Reference |
| 356 | appendix (entry points, dependencies, files of interest, agent |
| 357 | surface, actions list). |
| 358 | 3. \`README.md\` is never modified — its content is only read as |
| 359 | authoritative source material the LLM mirrors and extends. |
| 360 | 4. Validates every generated link resolves on disk; refuses to write |
| 361 | when broken. |
| 362 | 5. Skips packages whose new file differs only in the staleness footer |
| 363 | (so repeated dispatches don't churn unchanged docs). |
| 364 | |
| 365 | ### Review checklist |
| 366 | - [ ] Sample one or two \`README.AUTOGEN.md\` files and confirm the |
| 367 | Reference section accurately describes the package. |
| 368 | - [ ] Sample the AI-authored sections and confirm they read as |
| 369 | contributor-grade documentation (not marketing prose, no |
| 370 | hallucinated APIs). |
| 371 | - [ ] Confirm no hand-written \`README.md\` has been modified. |
| 372 | |
| 373 | See [\`ts/docs/architecture/doc-autogen.md\`](../blob/main/ts/docs/architecture/doc-autogen.md) for design details. |
| 374 | PREOF |
| 375 | ) |
| 376 | |
| 377 | gh pr create \ |
| 378 | --base main \ |
| 379 | --head "$BRANCH" \ |
| 380 | --title "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d))" \ |
| 381 | --body "$BODY" \ |
| 382 | --label "documentation" |
| 383 | |
| 384 | # ── Job summary ───────────────────────────────────────────────── |
| 385 | - name: Job summary |
| 386 | if: always() |
| 387 | run: | |
| 388 | { |
| 389 | echo "## Package README.AUTOGEN.md regeneration" |
| 390 | echo "" |
| 391 | echo "| Metric | Value |" |
| 392 | echo "|--------|-------|" |
| 393 | echo "| Trigger | ${{ github.event_name }} |" |
| 394 | echo "| Dry run | ${{ inputs.dry-run || 'false' }} |" |
| 395 | echo "| LLM enabled | ${{ inputs.llm || 'true' }} |" |
| 396 | echo "| Per-run cap | ${{ inputs.max-packages || '25' }} |" |
| 397 | echo "| Changes detected | ${{ steps.detect.outputs.changes || 'false' }} |" |
| 398 | echo "| Files modified | ${{ steps.detect.outputs.changed_files || '0' }} |" |
| 399 | echo "" |
| 400 | if [ -f /tmp/docs-autogen.log ]; then |
| 401 | echo "### CLI output (truncated)" |
| 402 | echo '```' |
| 403 | tail -n 200 /tmp/docs-autogen.log || true |
| 404 | echo '```' |
| 405 | fi |
| 406 | } >> "$GITHUB_STEP_SUMMARY" |
| 407 | |