microsoft/hve-core

Public

mirrored fromhttps://github.com/microsoft/hve-coreAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
feat/1637-b-tracking-paths

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/tests/evals/Build-AgentBehaviorSpec.Tests.ps1

323lines · modecode

1#Requires -Modules Pester
2# Copyright (c) Microsoft Corporation.
3# SPDX-License-Identifier: MIT
4
5BeforeAll {
6 $script:ScriptPath = Join-Path $PSScriptRoot '../../evals/Build-AgentBehaviorSpec.ps1'
7
8 Import-Module powershell-yaml -ErrorAction Stop
9
10 function script:Invoke-Generator {
11 param(
12 [Parameter(Mandatory)] [string]$Root,
13 [switch]$DryRun,
14 [switch]$Force
15 )
16 $argList = @('-NoProfile', '-NoLogo', '-File', $script:ScriptPath, '-RepoRoot', $Root)
17 if ($DryRun) { $argList += '-WhatIf' }
18 if ($Force) { $argList += '-Force' }
19 $stdout = & pwsh @argList 2>&1
20 return [pscustomobject]@{
21 ExitCode = $LASTEXITCODE
22 Output = ($stdout | Out-String)
23 }
24 }
25
26 function script:Initialize-FixtureRoot {
27 param([Parameter(Mandatory)] [string]$Root)
28 New-Item -ItemType Directory -Path $Root -Force | Out-Null
29 New-Item -ItemType Directory -Path (Join-Path $Root 'evals/agent-behavior/stimuli') -Force | Out-Null
30 }
31
32 function script:Write-Partial {
33 param(
34 [Parameter(Mandatory)] [string]$Root,
35 [Parameter(Mandatory)] [string]$Slug,
36 [Parameter(Mandatory)] [string]$Content
37 )
38 $path = Join-Path $Root "evals/agent-behavior/stimuli/$Slug.yml"
39 [System.IO.File]::WriteAllText($path, $Content)
40 return $path
41 }
42
43 function script:Write-SeedEvalYaml {
44 param(
45 [Parameter(Mandatory)] [string]$Root,
46 [Parameter(Mandatory)] [string]$Content
47 )
48 $path = Join-Path $Root 'evals/agent-behavior/eval.yaml'
49 [System.IO.File]::WriteAllText($path, $Content)
50 return $path
51 }
52
53 function script:Read-OutputYaml {
54 param([Parameter(Mandatory)] [string]$Root)
55 $path = Join-Path $Root 'evals/agent-behavior/eval.yaml'
56 return [System.IO.File]::ReadAllText($path)
57 }
58
59 function script:Read-OutputObject {
60 param([Parameter(Mandatory)] [string]$Root)
61 return ConvertFrom-Yaml -Yaml (script:Read-OutputYaml -Root $Root)
62 }
63}
64
65Describe 'Build-AgentBehaviorSpec.ps1' -Tag 'Unit' {
66 BeforeEach {
67 $script:TestRoot = Join-Path $TestDrive ([Guid]::NewGuid().ToString())
68 Initialize-FixtureRoot -Root $script:TestRoot
69 }
70
71 Context 'Rendering with multiple partials' {
72 It 'Concatenates partials in alphabetical order and injects agent tag from slug' {
73 Write-Partial -Root $script:TestRoot -Slug 'beta' -Content @"
74stimuli:
75 - name: beta-case
76 prompt: Beta agent prompt.
77 graders:
78 - type: output-matches
79 name: beta-grader
80 config:
81 pattern: "(?i)beta"
82"@
83 Write-Partial -Root $script:TestRoot -Slug 'alpha' -Content @"
84stimuli:
85 - name: alpha-case
86 prompt: Alpha agent prompt.
87 graders:
88 - type: output-matches
89 name: alpha-grader
90 config:
91 pattern: "(?i)alpha"
92"@
93
94 $result = Invoke-Generator -Root $script:TestRoot
95 $result.ExitCode | Should -Be 0
96
97 $spec = Read-OutputObject -Root $script:TestRoot
98 $spec.stimuli | Should -HaveCount 2
99 $spec.stimuli[0].name | Should -Be 'alpha-case'
100 $spec.stimuli[1].name | Should -Be 'beta-case'
101 $spec.stimuli[0].tags.agent | Should -Be 'alpha'
102 $spec.stimuli[1].tags.agent | Should -Be 'beta'
103 }
104
105 It 'Writes the generator banner as the first line' {
106 Write-Partial -Root $script:TestRoot -Slug 'solo' -Content @"
107stimuli:
108 - name: solo-case
109 prompt: Solo agent prompt.
110"@
111 (Invoke-Generator -Root $script:TestRoot).ExitCode | Should -Be 0
112 $text = Read-OutputYaml -Root $script:TestRoot
113 $firstLine = ($text -split "`n")[0]
114 $firstLine | Should -Be '# Generated by Build-AgentBehaviorSpec.ps1 - do not edit by hand.'
115 }
116 }
117
118 Context 'No partials' {
119 It 'Emits an empty stimuli list and exits 0' {
120 $result = Invoke-Generator -Root $script:TestRoot
121 $result.ExitCode | Should -Be 0
122 $text = Read-OutputYaml -Root $script:TestRoot
123 $text | Should -Match '(?m)^stimuli:\s*\[\]\s*$'
124 }
125 }
126
127 Context 'Top-level key preservation' {
128 It 'Preserves byte-identical top-level keys from the existing eval.yaml prelude' {
129 $seed = @"
130# Generated by Build-AgentBehaviorSpec.ps1 - do not edit by hand.
131suite: agent-behavior-test
132version: 1
133description: >
134 Multi-line
135 description block.
136config:
137 executor: copilot-sdk
138 runs: 3
139stimuli: []
140"@
141 Write-SeedEvalYaml -Root $script:TestRoot -Content $seed
142 Write-Partial -Root $script:TestRoot -Slug 'gamma' -Content @"
143stimuli:
144 - name: gamma-case
145 prompt: Gamma agent prompt.
146"@
147
148 $result = Invoke-Generator -Root $script:TestRoot -Force
149 $result.ExitCode | Should -Be 0
150
151 $regenerated = Read-OutputYaml -Root $script:TestRoot
152 $seedLines = ($seed -replace "`r`n", "`n") -split "`n"
153 $newLines = ($regenerated -replace "`r`n", "`n") -split "`n"
154 for ($i = 0; $i -lt 8; $i++) {
155 $newLines[$i] | Should -Be $seedLines[$i]
156 }
157 }
158 }
159
160 Context 'Tag injection conflict' {
161 It 'Halts when a partial declares tags.agent that disagrees with the file slug' {
162 Write-Partial -Root $script:TestRoot -Slug 'expected-slug' -Content @"
163stimuli:
164 - name: mismatched
165 prompt: A prompt.
166 tags:
167 agent: other-slug
168"@
169 $result = Invoke-Generator -Root $script:TestRoot
170 $result.ExitCode | Should -Not -Be 0
171 $result.Output | Should -Match "expected-slug"
172 $result.Output | Should -Match "other-slug"
173 }
174
175 It 'Accepts a partial that explicitly tags the matching agent slug' {
176 Write-Partial -Root $script:TestRoot -Slug 'matched-slug' -Content @"
177stimuli:
178 - name: matched
179 prompt: A prompt.
180 tags:
181 agent: matched-slug
182 category: agent-behavior
183"@
184 $result = Invoke-Generator -Root $script:TestRoot
185 $result.ExitCode | Should -Be 0
186 $spec = Read-OutputObject -Root $script:TestRoot
187 $spec.stimuli[0].tags.agent | Should -Be 'matched-slug'
188 $spec.stimuli[0].tags.category | Should -Be 'agent-behavior'
189 }
190 }
191
192 Context 'Drift detection (-WhatIf)' {
193 It 'Exits 0 when on-disk output already matches the rendered spec' {
194 Write-Partial -Root $script:TestRoot -Slug 'driftless' -Content @"
195stimuli:
196 - name: driftless-case
197 prompt: Driftless prompt.
198"@
199 (Invoke-Generator -Root $script:TestRoot).ExitCode | Should -Be 0
200
201 $result = Invoke-Generator -Root $script:TestRoot -DryRun
202 $result.ExitCode | Should -Be 0
203 $diffPath = Join-Path $script:TestRoot 'logs/agent-behavior-spec-drift.diff'
204 Test-Path -LiteralPath $diffPath | Should -BeFalse
205 }
206
207 It 'Exits 1 and writes a drift diff when on-disk content differs' {
208 Write-Partial -Root $script:TestRoot -Slug 'drift' -Content @"
209stimuli:
210 - name: drift-case
211 prompt: Drift prompt.
212"@
213 (Invoke-Generator -Root $script:TestRoot).ExitCode | Should -Be 0
214
215 Write-Partial -Root $script:TestRoot -Slug 'drift' -Content @"
216stimuli:
217 - name: drift-case
218 prompt: Drift prompt UPDATED.
219"@
220 $result = Invoke-Generator -Root $script:TestRoot -DryRun
221 $result.ExitCode | Should -Be 1
222
223 $diffPath = Join-Path $script:TestRoot 'logs/agent-behavior-spec-drift.diff'
224 Test-Path -LiteralPath $diffPath | Should -BeTrue
225 $diff = [System.IO.File]::ReadAllText($diffPath)
226 $diff | Should -Match 'expected'
227 $diff | Should -Match 'actual'
228 }
229 }
230
231 Context 'Overwrite semantics' {
232 It 'Refuses to overwrite an existing file that differs without -Force' {
233 Write-SeedEvalYaml -Root $script:TestRoot -Content "stimuli: []`n"
234 Write-Partial -Root $script:TestRoot -Slug 'agent-a' -Content @"
235stimuli:
236 - name: agent-a-case
237 prompt: Prompt.
238"@
239 $result = Invoke-Generator -Root $script:TestRoot
240 $result.ExitCode | Should -Not -Be 0
241 $result.Output | Should -Match 'Force'
242 }
243
244 It 'Overwrites the existing file with -Force' {
245 Write-SeedEvalYaml -Root $script:TestRoot -Content "stimuli: []`n"
246 Write-Partial -Root $script:TestRoot -Slug 'agent-b' -Content @"
247stimuli:
248 - name: agent-b-case
249 prompt: Prompt.
250"@
251 $result = Invoke-Generator -Root $script:TestRoot -Force
252 $result.ExitCode | Should -Be 0
253 $spec = Read-OutputObject -Root $script:TestRoot
254 $spec.stimuli[0].name | Should -Be 'agent-b-case'
255 }
256
257 It 'Skips writing when -Force is set but content is identical' {
258 Write-Partial -Root $script:TestRoot -Slug 'idem' -Content @"
259stimuli:
260 - name: idem-case
261 prompt: Prompt.
262"@
263 (Invoke-Generator -Root $script:TestRoot).ExitCode | Should -Be 0
264 $first = Read-OutputYaml -Root $script:TestRoot
265 (Invoke-Generator -Root $script:TestRoot -Force).ExitCode | Should -Be 0
266 $second = Read-OutputYaml -Root $script:TestRoot
267 $second | Should -Be $first
268 }
269 }
270
271 Context 'Idempotency' {
272 It 'Produces the same output when run twice in a row' {
273 Write-Partial -Root $script:TestRoot -Slug 'idem' -Content @"
274stimuli:
275 - name: idem-case
276 prompt: Prompt.
277"@
278 (Invoke-Generator -Root $script:TestRoot).ExitCode | Should -Be 0
279 $first = Read-OutputYaml -Root $script:TestRoot
280 $drift = Invoke-Generator -Root $script:TestRoot -DryRun
281 $drift.ExitCode | Should -Be 0
282 $second = Read-OutputYaml -Root $script:TestRoot
283 $second | Should -Be $first
284 }
285 }
286
287 Context 'Partial validation errors' {
288 It 'Names the offending file when a partial is invalid YAML' {
289 Write-Partial -Root $script:TestRoot -Slug 'broken' -Content "stimuli:`n - name: x`n bad-indent:"
290 $result = Invoke-Generator -Root $script:TestRoot
291 $result.ExitCode | Should -Not -Be 0
292 $result.Output | Should -Match 'broken\.yml'
293 }
294
295 It 'Fails when a stimulus is missing the name field' {
296 Write-Partial -Root $script:TestRoot -Slug 'no-name' -Content @"
297stimuli:
298 - prompt: A prompt with no name.
299"@
300 $result = Invoke-Generator -Root $script:TestRoot
301 $result.ExitCode | Should -Not -Be 0
302 $result.Output | Should -Match "name"
303 }
304
305 It 'Fails when a stimulus is missing the prompt field' {
306 Write-Partial -Root $script:TestRoot -Slug 'no-prompt' -Content @"
307stimuli:
308 - name: prompt-less
309"@
310 $result = Invoke-Generator -Root $script:TestRoot
311 $result.ExitCode | Should -Not -Be 0
312 $result.Output | Should -Match "prompt"
313 }
314
315 It 'Silently skips a partial whose stimuli list is empty' {
316 Write-Partial -Root $script:TestRoot -Slug 'silent' -Content "stimuli: []`n"
317 $result = Invoke-Generator -Root $script:TestRoot
318 $result.ExitCode | Should -Be 0
319 $spec = Read-OutputObject -Root $script:TestRoot
320 ($null -eq $spec.stimuli -or $spec.stimuli.Count -eq 0) | Should -BeTrue
321 }
322 }
323}
324