microsoft/hve-core

Public

mirrored fromhttps://github.com/microsoft/hve-coreAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
feat/1637-d-skill-paths

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/evals/Build-AgentBehaviorSpec.ps1

359lines · modecode

1#!/usr/bin/env pwsh
2# Copyright (c) Microsoft Corporation.
3# SPDX-License-Identifier: MIT
4#Requires -Version 7.0
5
6<#
7.SYNOPSIS
8 Regenerate evals/agent-behavior/eval.yaml from per-agent stimulus partials.
9
10.DESCRIPTION
11 Concatenates committed per-agent partials (one stimulus list per agent slug)
12 into the agent-behavior suite spec. Partials are discovered under
13 `<RepoRoot>/evals/agent-behavior/stimuli/*.yml` and rendered in alphabetical
14 order by file name. The agent slug is taken from the partial's base name
15 and injected as `tags.agent: <slug>` on every emitted stimulus, so partial
16 authors never duplicate the tag.
17
18 Top-level keys (everything except `stimuli:`) from the existing output file
19 are preserved verbatim. The single-line banner
20 `# Generated by Build-AgentBehaviorSpec.ps1 - do not edit by hand.` is
21 re-prepended on every run and de-duplicated, so re-running on the script's
22 own output is idempotent.
23
24 With -WhatIf, the script renders in-memory and exits 0 when the on-disk
25 output already matches; otherwise it writes a line-based diff to
26 `<RepoRoot>/logs/agent-behavior-spec-drift.diff` and exits 1.
27
28.PARAMETER RepoRoot
29 Repository root. Defaults to `git rev-parse --show-toplevel`.
30
31.PARAMETER PartialsDir
32 Directory containing `<slug>.yml` partials. Defaults to
33 `<RepoRoot>/evals/agent-behavior/stimuli`.
34
35.PARAMETER OutputPath
36 Output spec path. Defaults to `<RepoRoot>/evals/agent-behavior/eval.yaml`.
37
38.PARAMETER DriftDiffPath
39 Path to write the line-based diff under -WhatIf. Defaults to
40 `<RepoRoot>/logs/agent-behavior-spec-drift.diff`.
41
42.PARAMETER Force
43 Overwrite the output regardless of existing content. Without -Force an
44 unchanged file is left untouched (no-op), and a changed file triggers an
45 error so accidental clobbering of unrelated edits is surfaced.
46
47.EXAMPLE
48 pwsh scripts/evals/Build-AgentBehaviorSpec.ps1
49
50.EXAMPLE
51 pwsh scripts/evals/Build-AgentBehaviorSpec.ps1 -WhatIf
52
53.NOTES
54 Mirrors the generate-and-commit drift-check pattern used by
55 `scripts/evals/New-AgentSurfaceSignatures.ps1`.
56#>
57[CmdletBinding(SupportsShouldProcess)]
58[OutputType([string])]
59param(
60 [string]$RepoRoot,
61
62 [string]$PartialsDir,
63
64 [string]$OutputPath,
65
66 [string]$DriftDiffPath,
67
68 [switch]$Force
69)
70
71Set-StrictMode -Version Latest
72$ErrorActionPreference = 'Stop'
73
74#region Constants
75
76$script:GeneratorBanner = '# Generated by Build-AgentBehaviorSpec.ps1 - do not edit by hand.'
77
78#endregion Constants
79
80#region Functions
81
82function Resolve-RepoRoot {
83 [CmdletBinding()]
84 [OutputType([string])]
85 param([string]$Override)
86
87 if ($Override) {
88 return (Resolve-Path -LiteralPath $Override).Path
89 }
90
91 try {
92 $root = (& git rev-parse --show-toplevel 2>$null).Trim()
93 if ($LASTEXITCODE -eq 0 -and $root) { return $root }
94 } catch {
95 Write-Verbose "git rev-parse failed: $($_.Exception.Message)"
96 }
97
98 return (Get-Location).Path
99}
100
101function Import-YamlModule {
102 [CmdletBinding()]
103 param()
104
105 if (Get-Module -Name 'powershell-yaml') { return }
106 if (-not (Get-Module -ListAvailable -Name 'powershell-yaml')) {
107 throw "Required module 'powershell-yaml' is not installed. Run 'Install-Module powershell-yaml -Scope CurrentUser' before invoking this script."
108 }
109 Import-Module powershell-yaml -ErrorAction Stop | Out-Null
110}
111
112function Get-PartialFiles {
113 [CmdletBinding()]
114 [OutputType([System.IO.FileInfo[]])]
115 param([Parameter(Mandatory)] [string]$PartialsDir)
116
117 if (-not (Test-Path -LiteralPath $PartialsDir)) {
118 return @()
119 }
120 return @(Get-ChildItem -Path $PartialsDir -Filter '*.yml' -File | Sort-Object -Property Name)
121}
122
123function Read-PartialStimuli {
124 [CmdletBinding()]
125 [OutputType([System.Collections.IList])]
126 param(
127 [Parameter(Mandatory)] [string]$Path,
128 [Parameter(Mandatory)] [string]$Slug
129 )
130
131 $raw = [System.IO.File]::ReadAllText($Path)
132 try {
133 $parsed = ConvertFrom-Yaml -Yaml $raw -Ordered
134 } catch {
135 throw "Failed to parse partial '$Path' as YAML: $($_.Exception.Message)"
136 }
137
138 if ($null -eq $parsed) {
139 return @()
140 }
141
142 if ($parsed -isnot [System.Collections.IDictionary]) {
143 throw "Partial '$Path' must be a YAML mapping with a top-level 'stimuli' key."
144 }
145
146 if (-not $parsed.Contains('stimuli')) {
147 return @()
148 }
149
150 $stimuli = $parsed['stimuli']
151 if ($null -eq $stimuli) {
152 return @()
153 }
154 if ($stimuli -isnot [System.Collections.IList]) {
155 throw "Partial '$Path' has a 'stimuli' key that is not a list."
156 }
157
158 $injected = [System.Collections.Generic.List[object]]::new()
159 foreach ($item in $stimuli) {
160 if ($item -isnot [System.Collections.IDictionary]) {
161 throw "Partial '$Path' contains a stimulus entry that is not a mapping."
162 }
163 if (-not $item.Contains('name') -or [string]::IsNullOrWhiteSpace([string]$item['name'])) {
164 throw "Partial '$Path' contains a stimulus missing a non-empty 'name' field."
165 }
166 if (-not $item.Contains('prompt') -or [string]::IsNullOrWhiteSpace([string]$item['prompt'])) {
167 throw "Partial '$Path' stimulus '$($item['name'])' is missing a non-empty 'prompt' field."
168 }
169
170 $tags = if ($item.Contains('tags')) { $item['tags'] } else { $null }
171 if ($null -eq $tags) {
172 $tags = [ordered]@{}
173 $item['tags'] = $tags
174 } elseif ($tags -isnot [System.Collections.IDictionary]) {
175 throw "Partial '$Path' stimulus '$($item['name'])' has a non-mapping 'tags' value."
176 }
177
178 if ($tags.Contains('agent')) {
179 $existing = [string]$tags['agent']
180 if ($existing -ne $Slug) {
181 throw "Partial '$Path' stimulus '$($item['name'])' declares tags.agent='$existing' but file slug is '$Slug'. Remove the agent tag from the partial; the generator injects it from the file name."
182 }
183 } else {
184 $tags['agent'] = $Slug
185 }
186
187 $injected.Add($item)
188 }
189 return , $injected
190}
191
192function Split-ExistingPrelude {
193 [CmdletBinding()]
194 [OutputType([hashtable])]
195 param([string]$ExistingText)
196
197 if (-not $ExistingText) {
198 return @{ Prelude = ''; HadStimuli = $false }
199 }
200
201 $lines = $ExistingText -split "(?<=`n)"
202 for ($i = 0; $i -lt $lines.Count; $i++) {
203 if ($lines[$i] -match '^stimuli\s*:') {
204 $preludeLines = if ($i -gt 0) { $lines[0..($i - 1)] } else { @() }
205 return @{ Prelude = ($preludeLines -join ''); HadStimuli = $true }
206 }
207 }
208
209 $trailingNewline = if ($ExistingText.EndsWith("`n")) { '' } else { "`n" }
210 return @{ Prelude = ($ExistingText + $trailingNewline); HadStimuli = $false }
211}
212
213function Remove-LeadingBanner {
214 [CmdletBinding()]
215 [OutputType([string])]
216 param([string]$Prelude)
217
218 if (-not $Prelude) { return '' }
219 $lines = $Prelude -split "(?<=`n)"
220 $skip = 0
221 while ($skip -lt $lines.Count -and $lines[$skip].TrimEnd("`r", "`n").StartsWith('# Generated by Build-AgentBehaviorSpec.ps1')) {
222 $skip++
223 }
224 if ($skip -eq 0) { return $Prelude }
225 if ($skip -ge $lines.Count) { return '' }
226 return ($lines[$skip..($lines.Count - 1)] -join '')
227}
228
229function Format-StimuliBlock {
230 [CmdletBinding()]
231 [OutputType([string])]
232 param([Parameter()] [System.Collections.IList]$Stimuli)
233
234 if (-not $Stimuli -or $Stimuli.Count -eq 0) {
235 return "stimuli: []`n"
236 }
237
238 $wrapper = [ordered]@{ stimuli = $Stimuli }
239 $rendered = ConvertTo-Yaml -Data $wrapper
240 if (-not $rendered.EndsWith("`n")) { $rendered += "`n" }
241 return $rendered
242}
243
244function Get-RenderedSpec {
245 [CmdletBinding()]
246 [OutputType([string])]
247 param(
248 [Parameter()] [string]$ExistingText,
249 [Parameter()] [System.Collections.IList]$Stimuli
250 )
251
252 $split = Split-ExistingPrelude -ExistingText $ExistingText
253 $prelude = Remove-LeadingBanner -Prelude $split.Prelude
254
255 $sb = [System.Text.StringBuilder]::new()
256 [void]$sb.Append($script:GeneratorBanner)
257 [void]$sb.Append("`n")
258 if ($prelude) {
259 [void]$sb.Append($prelude)
260 if (-not $prelude.EndsWith("`n")) { [void]$sb.Append("`n") }
261 }
262 [void]$sb.Append((Format-StimuliBlock -Stimuli $Stimuli))
263 return $sb.ToString()
264}
265
266function Get-LineDiff {
267 [CmdletBinding()]
268 [OutputType([string])]
269 param(
270 [Parameter(Mandatory)] [string]$Expected,
271 [Parameter(Mandatory)] [string]$Actual,
272 [Parameter(Mandatory)] [string]$Path
273 )
274
275 $expectedLines = $Expected -split "`r?`n"
276 $actualLines = $Actual -split "`r?`n"
277 $sb = [System.Text.StringBuilder]::new()
278 [void]$sb.AppendLine("--- expected $Path")
279 [void]$sb.AppendLine("+++ actual $Path")
280
281 $diff = Compare-Object -ReferenceObject $expectedLines -DifferenceObject $actualLines
282 foreach ($entry in $diff) {
283 $prefix = if ($entry.SideIndicator -eq '<=') { '-' } else { '+' }
284 [void]$sb.AppendLine("$prefix$($entry.InputObject)")
285 }
286 return $sb.ToString()
287}
288
289#endregion Functions
290
291#region Main Execution
292
293$resolvedRoot = Resolve-RepoRoot -Override $RepoRoot
294if (-not $PartialsDir) {
295 $PartialsDir = Join-Path $resolvedRoot 'evals/agent-behavior/stimuli'
296}
297if (-not $OutputPath) {
298 $OutputPath = Join-Path $resolvedRoot 'evals/agent-behavior/eval.yaml'
299}
300if (-not $DriftDiffPath) {
301 $DriftDiffPath = Join-Path $resolvedRoot 'logs/agent-behavior-spec-drift.diff'
302}
303
304Import-YamlModule
305
306$partials = Get-PartialFiles -PartialsDir $PartialsDir
307$allStimuli = [System.Collections.Generic.List[object]]::new()
308foreach ($partial in $partials) {
309 $slug = $partial.BaseName
310 foreach ($stimulus in (Read-PartialStimuli -Path $partial.FullName -Slug $slug)) {
311 $allStimuli.Add($stimulus)
312 }
313}
314
315$existingText = if (Test-Path -LiteralPath $OutputPath) {
316 [System.IO.File]::ReadAllText($OutputPath) -replace "`r`n", "`n"
317} else {
318 ''
319}
320
321$rendered = Get-RenderedSpec -ExistingText $existingText -Stimuli $allStimuli
322# ConvertTo-Yaml emits CRLF on Windows; normalize to LF so on-disk content
323# stays platform-stable and drift comparisons are byte-accurate.
324$rendered = $rendered -replace "`r`n", "`n"
325
326if ($WhatIfPreference) {
327 if ($existingText -eq $rendered) {
328 Write-Host "no drift: $OutputPath" -ForegroundColor Green
329 exit 0
330 }
331 $diffDir = Split-Path -Parent $DriftDiffPath
332 if ($diffDir -and -not (Test-Path -LiteralPath $diffDir)) {
333 # -WhatIf:$false bypasses inherited WhatIfPreference so the diff dir is
334 # always materialized during drift detection runs.
335 New-Item -ItemType Directory -Path $diffDir -Force -WhatIf:$false | Out-Null
336 }
337 $diffText = Get-LineDiff -Expected $rendered -Actual $existingText -Path $OutputPath
338 [System.IO.File]::WriteAllText($DriftDiffPath, $diffText)
339 Write-Host "drift detected; diff written to $DriftDiffPath" -ForegroundColor Yellow
340 exit 1
341}
342
343if ((Test-Path -LiteralPath $OutputPath) -and -not $Force) {
344 if ($existingText -eq $rendered) {
345 Write-Host "skipped (no changes): $OutputPath" -ForegroundColor Gray
346 return $OutputPath
347 }
348 throw "Output file already exists and differs from rendered content. Re-run with -Force to overwrite: $OutputPath"
349}
350
351$outputDir = Split-Path -Parent $OutputPath
352if ($outputDir -and -not (Test-Path -LiteralPath $outputDir)) {
353 New-Item -ItemType Directory -Path $outputDir -Force | Out-Null
354}
355[System.IO.File]::WriteAllText($OutputPath, $rendered)
356Write-Host "wrote: $OutputPath" -ForegroundColor Green
357return $OutputPath
358
359#endregion Main Execution
360