microsoft/TypeAgent

Public

mirrored from https://github.com/microsoft/TypeAgentAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
f4e95a0bd3dd1e56cecd70ef7dd38a13f7a897be

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

.github/workflows/docs-generate.yml

377lines · modecode

1# Copyright (c) Microsoft Corporation.
2# Licensed under the MIT License.
3
4# Daily regeneration of package-level README.AUTOGEN.md companion files
5# under ts/packages/**.
6#
7# Detects packages whose source has changed since the last successful
8# scheduled run (tracked by the `docs-bot/last-run` lightweight tag),
9# rebuilds each affected package's README.AUTOGEN.md (a parallel file
10# alongside the hand-written README.md, never touching the latter),
11# validates every link on disk, and opens a single batched PR. Prior
12# open bot PRs are closed with --delete-branch so only one is ever
13# live at a time.
14#
15# AI authoring of the documentation body uses Azure OpenAI via
16# packages/aiclient. The deterministic Reference appendix (entry
17# points, dependencies, files of interest, agent surface, action list)
18# is computed entirely from package.json + src/ + the workspace graph.
19#
20# Required repository configuration:
21# variables:
22# DOCS_BOT_APP_ID GitHub App that opens the PR
23# AZURE_CLIENT_ID Entra App registration (or User-Assigned MI) trusted by the
24# federated credential. The OIDC subject must match
25# `repo:<org>/<repo>:ref:refs/heads/main` (or whatever ref/env
26# you registered when creating the FIC).
27# AZURE_TENANT_ID Directory (tenant) ID for the same App registration.
28# AZURE_SUBSCRIPTION_ID Subscription containing the Azure OpenAI resource.
29# secrets:
30# DOCS_BOT_APP_PRIVATE_KEY Private key for the GitHub App
31# AZURE_OPENAI_ENDPOINT Azure OpenAI endpoint URL
32#
33# Auth model: GitHub Actions exchanges its OIDC token for an Azure AD bearer
34# token via `azure/login@v2`. `aiclient` honors the literal value
35# `AZURE_OPENAI_API_KEY=identity` as a sentinel that switches it to
36# DefaultAzureCredential, which then picks up the AZURE_* env vars
37# `azure/login` exports. No long-lived API key is stored in this repo.
38#
39# The workflow is read-only and emits no PR until those are provisioned;
40# it simply prints what it would have done.
41
42name: docs-generate
43
44on:
45 schedule:
46 # Daily at 08:00 UTC = 01:00 PST. Low-traffic window so churn from
47 # this PR does not collide with other automated PRs.
48 - cron: "0 8 * * *"
49 workflow_dispatch:
50 inputs:
51 dry-run:
52 description: "Dry run — analyse and render only, don't write or open PR"
53 type: boolean
54 default: false
55 packages:
56 description: "Comma-separated package names to regenerate (overrides change detection)"
57 type: string
58 default: ""
59 since:
60 description: "Override the watermark — git ref to diff against (e.g. main, HEAD~10)"
61 type: string
62 default: ""
63 llm:
64 description: "Use Azure OpenAI to author the documentation sections (placeholder-only when off)"
65 type: boolean
66 default: true
67 max-packages:
68 description: "Per-run cap on packages touched (defaults to 25)"
69 type: string
70 default: "25"
71
72# Never cancel an in-flight docs-autogen run; let it finish and let the
73# next scheduled run supersede the resulting PR if necessary.
74concurrency:
75 group: ${{ github.workflow }}
76 cancel-in-progress: false
77
78permissions:
79 contents: write
80 pull-requests: write
81 # Required so `azure/login@v2` can request a short-lived OIDC JWT
82 # for federated credential exchange. The token never leaves the
83 # runner; Entra exchanges it for an Azure AD bearer token bound to
84 # the App registration named by AZURE_CLIENT_ID.
85 id-token: write
86
87jobs:
88 regenerate:
89 runs-on: ubuntu-latest
90
91 steps:
92 - uses: actions/checkout@v4
93 with:
94 fetch-depth: 0
95
96 - uses: pnpm/action-setup@v4
97 name: Install pnpm
98 with:
99 package_json_file: ts/package.json
100
101 - uses: actions/setup-node@v4
102 with:
103 node-version: 22
104 cache: "pnpm"
105 cache-dependency-path: ts/pnpm-lock.yaml
106
107 - name: Generate GitHub App token
108 id: app-token
109 uses: actions/create-github-app-token@v1
110 with:
111 app-id: ${{ vars.DOCS_BOT_APP_ID }}
112 private-key: ${{ secrets.DOCS_BOT_APP_PRIVATE_KEY }}
113
114 - name: Install ts dependencies
115 working-directory: ts
116 run: |
117 corepack enable
118 pnpm install --frozen-lockfile
119
120 - name: Build docs-autogen tool
121 working-directory: ts
122 run: |
123 pnpm --filter aiclient build
124 pnpm --filter @typeagent/docs-autogen build
125
126 # Federated-credential login. Exports AZURE_CLIENT_ID,
127 # AZURE_TENANT_ID, AZURE_SUBSCRIPTION_ID, and
128 # AZURE_FEDERATED_TOKEN_FILE to subsequent steps. `aiclient`'s
129 # DefaultAzureCredential picks them up automatically when
130 # AZURE_OPENAI_API_KEY is set to the literal string "identity".
131 - name: Azure login (federated)
132 uses: azure/login@v2
133 with:
134 client-id: ${{ vars.AZURE_CLIENT_ID }}
135 tenant-id: ${{ vars.AZURE_TENANT_ID }}
136 subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
137
138 # Validate dispatch inputs before they reach a shell. Strict
139 # allowlists prevent shell-metachar injection and limit blast
140 # radius if a workflow_dispatch actor is compromised. Any
141 # validation failure aborts the run before the CLI is invoked.
142 - name: Validate dispatch inputs
143 id: validate
144 env:
145 INPUT_PACKAGES: ${{ inputs.packages }}
146 INPUT_SINCE: ${{ inputs.since }}
147 INPUT_MAX_PACKAGES: ${{ inputs.max-packages }}
148 run: |
149 set -e
150 # Allow scoped/unscoped npm package names: letters, digits,
151 # `-`, `_`, `.`, `/`, `@`. Comma-separated.
152 if [ -n "$INPUT_PACKAGES" ]; then
153 if ! printf '%s' "$INPUT_PACKAGES" | grep -Eq '^[A-Za-z0-9@/_.,[:space:]-]+$'; then
154 echo "::error::Invalid characters in 'packages' input." >&2
155 exit 1
156 fi
157 fi
158 # Allow git refs: letters, digits, `-`, `_`, `.`, `/`.
159 if [ -n "$INPUT_SINCE" ]; then
160 if ! printf '%s' "$INPUT_SINCE" | grep -Eq '^[A-Za-z0-9._/-]+$'; then
161 echo "::error::Invalid characters in 'since' input." >&2
162 exit 1
163 fi
164 fi
165 # Numeric only.
166 if [ -n "$INPUT_MAX_PACKAGES" ]; then
167 if ! printf '%s' "$INPUT_MAX_PACKAGES" | grep -Eq '^[0-9]+$'; then
168 echo "::error::'max-packages' must be a positive integer." >&2
169 exit 1
170 fi
171 fi
172
173 - name: Regenerate package README.AUTOGEN.md files
174 id: regen
175 working-directory: ts
176 env:
177 AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
178 # Sentinel value: switches @typeagent/aiclient from
179 # API-key auth to DefaultAzureCredential (federated OIDC).
180 AZURE_OPENAI_API_KEY: identity
181 DEBUG: "docs-autogen:*"
182 INPUT_PACKAGES: ${{ inputs.packages }}
183 INPUT_SINCE: ${{ inputs.since }}
184 INPUT_LLM: ${{ inputs.llm }}
185 INPUT_MAX_PACKAGES: ${{ inputs.max-packages }}
186 INPUT_DRY_RUN: ${{ inputs.dry-run }}
187 run: |
188 # Build the argv as a bash array so each value is passed as a
189 # single token. Never expand user input through GitHub's
190 # `${{ }}` template into the shell command line — that would
191 # allow workflow_dispatch actors to inject arbitrary shell
192 # metacharacters (`;`, `$()`, backticks, etc.).
193 set -o pipefail
194 ARGS=("--render")
195 if [ "$INPUT_DRY_RUN" = "true" ]; then
196 ARGS+=("--dry-run")
197 else
198 ARGS+=("--write")
199 fi
200 if [ -n "$INPUT_PACKAGES" ]; then
201 IFS=',' read -ra PKGS <<< "$INPUT_PACKAGES"
202 for PKG in "${PKGS[@]}"; do
203 PKG_TRIMMED=$(echo "$PKG" | xargs)
204 if [ -n "$PKG_TRIMMED" ]; then
205 ARGS+=("--package" "$PKG_TRIMMED")
206 fi
207 done
208 fi
209 if [ -n "$INPUT_SINCE" ]; then
210 ARGS+=("--since" "$INPUT_SINCE")
211 fi
212 if [ "$INPUT_LLM" = "true" ]; then
213 ARGS+=("--llm")
214 fi
215 if [ -n "$INPUT_MAX_PACKAGES" ]; then
216 ARGS+=("--max-packages" "$INPUT_MAX_PACKAGES")
217 fi
218 echo "Invoking docs-autogen with ${#ARGS[@]} args"
219 node tools/docsAutogen/bin/docs-autogen.cjs "${ARGS[@]}" \
220 | tee /tmp/docs-autogen.log
221 echo "exit_code=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT"
222
223 # Detect whether anything actually changed under
224 # ts/packages/**/README.AUTOGEN.md. README.md is never touched by
225 # the generator. When the CLI ran with --write, all edits are
226 # already on disk; we rely on git status alone — if it's clean
227 # there's no PR to open, even if the CLI processed packages
228 # (footer-only or unchanged verdicts skip writes).
229 - name: Detect changes
230 id: detect
231 working-directory: ts
232 run: |
233 if git diff --quiet --exit-code -- 'packages/**/README.AUTOGEN.md'; then
234 echo "changes=false" >> "$GITHUB_OUTPUT"
235 echo "No README.AUTOGEN.md changes after regeneration."
236 else
237 echo "changes=true" >> "$GITHUB_OUTPUT"
238 CHANGED=$(git diff --name-only -- 'packages/**/README.AUTOGEN.md' | wc -l)
239 echo "changed_files=$CHANGED" >> "$GITHUB_OUTPUT"
240 echo "$CHANGED README.AUTOGEN.md file(s) modified."
241 fi
242
243 # ── Create PR (and supersede prior bot PRs) ─────────────────────
244 - name: Create pull request
245 if: ${{ steps.detect.outputs.changes == 'true' && inputs.dry-run != true }}
246 env:
247 GH_TOKEN: ${{ steps.app-token.outputs.token }}
248 run: |
249 BRANCH="automated/docs-readmes-$(date +%Y%m%d)-${{ github.run_number }}"
250
251 git config user.name "github-actions[bot]"
252 git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
253
254 git checkout -b "$BRANCH"
255 # Scope strictly to ts/packages/**/README.AUTOGEN.md to avoid
256 # sweeping in any incidental working-tree noise from CI.
257 # README.md is never modified by docs-autogen.
258 git add 'ts/packages/**/README.AUTOGEN.md'
259 CHANGED_LIST=$(git diff --cached --name-only)
260
261 git commit -m "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d))
262
263 Automated by docs-generate workflow.
264
265 ${{ steps.detect.outputs.changed_files }} file(s) updated.
266
267 Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
268 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>"
269
270 git push origin "$BRANCH"
271
272 # Close any previously-open bot PRs before opening the new
273 # one. Each daily run produces a unique branch, so without
274 # dedup the repo accumulates stacking duplicate PRs whenever
275 # yesterday's hasn't been merged. Always keep the freshest.
276 PREV_PRS=$(gh pr list \
277 --state open \
278 --search 'head:automated/docs-readmes- in:branch' \
279 --json number,headRefName \
280 --jq '.[] | select(.headRefName != "'"$BRANCH"'") | .number')
281 if [ -n "$PREV_PRS" ]; then
282 echo "Closing superseded docs-autogen PRs: $PREV_PRS"
283 for PR in $PREV_PRS; do
284 gh pr close "$PR" \
285 --delete-branch \
286 --comment "Superseded by a newer automated docs PR." \
287 || echo "::warning::Failed to close PR #$PR"
288 done
289 fi
290
291 BODY=$(cat <<PREOF
292 ## Automated package README.AUTOGEN.md regeneration
293
294 This PR was automatically generated by the \`docs-generate\` workflow.
295
296 ### Summary
297 - **Files updated:** ${{ steps.detect.outputs.changed_files }} \`README.AUTOGEN.md\` file(s)
298 - **LLM mode:** ${{ inputs.llm == false && 'placeholder-only' || 'aiclient (full documentation)' }}
299 - **Per-run cap:** ${{ inputs.max-packages || '25' }} package(s)
300 - **Trigger:** ${{ github.event_name == 'schedule' && 'scheduled (daily)' || format('manual ({0})', github.actor) }}
301
302 ### Changed files
303 \`\`\`
304 $CHANGED_LIST
305 \`\`\`
306
307 ### How this works
308 1. Diffs \`ts/packages/**\` source files against the last successful run
309 (tracked by the \`docs-bot/last-run\` git tag).
310 2. For each affected package, regenerates a parallel \`README.AUTOGEN.md\`
311 alongside the hand-written \`README.md\` — multi-section AI-authored
312 documentation (Overview / What it does / Actions / Architecture /
313 How to extend) when LLM mode is on, plus a deterministic Reference
314 appendix (entry points, dependencies, files of interest, agent
315 surface, actions list).
316 3. \`README.md\` is never modified — its content is only read as
317 authoritative source material the LLM mirrors and extends.
318 4. Validates every generated link resolves on disk; refuses to write
319 when broken.
320 5. Skips packages whose new file differs only in the staleness footer
321 (so daily PRs don't churn unchanged docs).
322
323 ### Review checklist
324 - [ ] Sample one or two \`README.AUTOGEN.md\` files and confirm the
325 Reference section accurately describes the package.
326 - [ ] Sample the AI-authored sections and confirm they read as
327 contributor-grade documentation (not marketing prose, no
328 hallucinated APIs).
329 - [ ] Confirm no hand-written \`README.md\` has been modified.
330
331 See [\`ts/docs/architecture/doc-autogen.md\`](../blob/main/ts/docs/architecture/doc-autogen.md) for design details.
332 PREOF
333 )
334
335 gh pr create \
336 --base main \
337 --head "$BRANCH" \
338 --title "docs: regenerate package README.AUTOGEN.md files ($(date +%Y-%m-%d))" \
339 --body "$BODY" \
340 --label "documentation"
341
342 # Advance the watermark only on a successful scheduled run with a
343 # PR. Manual dispatches and dry-runs intentionally do not move
344 # the tag, so they remain idempotent against the daily cron.
345 - name: Advance watermark tag
346 if: ${{ github.event_name == 'schedule' && steps.detect.outputs.changes == 'true' && inputs.dry-run != true }}
347 env:
348 GH_TOKEN: ${{ steps.app-token.outputs.token }}
349 run: |
350 # Tag the commit we generated against (the original SHA before
351 # the PR branch was created), not the branch tip.
352 git tag -f docs-bot/last-run ${{ github.sha }}
353 git push origin docs-bot/last-run --force
354
355 # ── Job summary ─────────────────────────────────────────────────
356 - name: Job summary
357 if: always()
358 run: |
359 {
360 echo "## Package README.AUTOGEN.md regeneration"
361 echo ""
362 echo "| Metric | Value |"
363 echo "|--------|-------|"
364 echo "| Trigger | ${{ github.event_name }} |"
365 echo "| Dry run | ${{ inputs.dry-run || 'false' }} |"
366 echo "| LLM enabled | ${{ inputs.llm || 'true' }} |"
367 echo "| Per-run cap | ${{ inputs.max-packages || '25' }} |"
368 echo "| Changes detected | ${{ steps.detect.outputs.changes || 'false' }} |"
369 echo "| Files modified | ${{ steps.detect.outputs.changed_files || '0' }} |"
370 echo ""
371 if [ -f /tmp/docs-autogen.log ]; then
372 echo "### CLI output (truncated)"
373 echo '```'
374 tail -n 200 /tmp/docs-autogen.log || true
375 echo '```'
376 fi
377 } >> "$GITHUB_STEP_SUMMARY"
378