microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
scripts/evals/Modules/StimulusIndex.psm1
200lines · modecode
| 1 | # Copyright (c) Microsoft Corporation. |
| 2 | # SPDX-License-Identifier: MIT |
| 3 | |
| 4 | # StimulusIndex.psm1 |
| 5 | # |
| 6 | # Purpose: Build an in-memory index of eval-spec stimulus backlinks keyed by (kind, slug) |
| 7 | # so AI-artifact coverage checks can resolve which evals exercise a given artifact. |
| 8 | # Author: HVE Core Team |
| 9 | |
| 10 | #Requires -Version 7.0 |
| 11 | |
| 12 | Set-StrictMode -Version Latest |
| 13 | |
| 14 | $script:BacklinkKinds = @('skill', 'agent', 'prompt', 'instruction') |
| 15 | |
| 16 | function Get-StimulusBacklink { |
| 17 | <# |
| 18 | .SYNOPSIS |
| 19 | Extracts artifact backlinks declared on a single stimulus entry. |
| 20 | |
| 21 | .DESCRIPTION |
| 22 | Looks for `tags.<kind>` keys on the stimulus mapping (where kind ∈ skill/agent/prompt/instruction) |
| 23 | and returns one record per non-empty backlink. |
| 24 | |
| 25 | .PARAMETER Stimulus |
| 26 | Parsed stimulus mapping from a spec's `stimuli[]` array. |
| 27 | |
| 28 | .OUTPUTS |
| 29 | [hashtable[]] Each entry is `@{ kind; slug }`. |
| 30 | #> |
| 31 | [CmdletBinding()] |
| 32 | [OutputType([hashtable[]])] |
| 33 | param( |
| 34 | [Parameter(Mandatory = $true)] |
| 35 | [AllowNull()] |
| 36 | $Stimulus |
| 37 | ) |
| 38 | |
| 39 | if ($null -eq $Stimulus -or -not ($Stimulus -is [System.Collections.IDictionary])) { |
| 40 | return ,@() |
| 41 | } |
| 42 | |
| 43 | if (-not $Stimulus.Contains('tags')) { |
| 44 | return ,@() |
| 45 | } |
| 46 | |
| 47 | $tags = $Stimulus['tags'] |
| 48 | if ($null -eq $tags -or -not ($tags -is [System.Collections.IDictionary])) { |
| 49 | return ,@() |
| 50 | } |
| 51 | |
| 52 | $results = [System.Collections.Generic.List[hashtable]]::new() |
| 53 | foreach ($kind in $script:BacklinkKinds) { |
| 54 | if (-not $tags.Contains($kind)) { continue } |
| 55 | $slug = [string]$tags[$kind] |
| 56 | if ([string]::IsNullOrWhiteSpace($slug)) { continue } |
| 57 | $results.Add(@{ kind = $kind; slug = $slug.Trim() }) |
| 58 | } |
| 59 | |
| 60 | return ,$results.ToArray() |
| 61 | } |
| 62 | |
| 63 | function New-StimulusIndex { |
| 64 | <# |
| 65 | .SYNOPSIS |
| 66 | Scans an eval root for spec files and builds a (kind:slug) → spec-paths index. |
| 67 | |
| 68 | .DESCRIPTION |
| 69 | Walks `EvalRoot` for `*.yaml` and `*.yml` files, parses each via `ConvertFrom-Yaml`, and |
| 70 | records every stimulus backlink. Specs that fail to parse are reported under `errors` |
| 71 | rather than thrown so callers can decide how strict to be. |
| 72 | |
| 73 | Requires the `powershell-yaml` module to be importable. |
| 74 | |
| 75 | .PARAMETER EvalRoot |
| 76 | Filesystem path to the `evals/` root (absolute or relative to the current location). |
| 77 | |
| 78 | .OUTPUTS |
| 79 | [hashtable] `@{ root; specsScanned; coverage = @{ 'kind:slug' = @(specPath, ...) }; errors = @(@{ path; message }) }`. |
| 80 | #> |
| 81 | [CmdletBinding()] |
| 82 | [OutputType([hashtable])] |
| 83 | param( |
| 84 | [Parameter(Mandatory = $true)] |
| 85 | [string]$EvalRoot |
| 86 | ) |
| 87 | |
| 88 | if (-not (Test-Path -LiteralPath $EvalRoot -PathType Container)) { |
| 89 | return @{ |
| 90 | root = $EvalRoot |
| 91 | specsScanned = 0 |
| 92 | coverage = @{} |
| 93 | errors = @() |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | $resolvedRoot = (Resolve-Path -LiteralPath $EvalRoot).ProviderPath |
| 98 | $coverage = @{} |
| 99 | $errors = [System.Collections.Generic.List[hashtable]]::new() |
| 100 | $specsScanned = 0 |
| 101 | |
| 102 | $specFiles = Get-ChildItem -LiteralPath $resolvedRoot -Recurse -File -Include '*.yaml', '*.yml' -ErrorAction SilentlyContinue |
| 103 | foreach ($file in $specFiles) { |
| 104 | $specsScanned++ |
| 105 | $relPath = [System.IO.Path]::GetRelativePath($resolvedRoot, $file.FullName) -replace '\\', '/' |
| 106 | |
| 107 | $parsed = $null |
| 108 | try { |
| 109 | $raw = Get-Content -LiteralPath $file.FullName -Raw -ErrorAction Stop |
| 110 | if ([string]::IsNullOrWhiteSpace($raw)) { |
| 111 | $errors.Add(@{ path = $relPath; message = 'Spec file is empty' }) |
| 112 | continue |
| 113 | } |
| 114 | $parsed = ConvertFrom-Yaml -Yaml $raw |
| 115 | } |
| 116 | catch { |
| 117 | $errors.Add(@{ path = $relPath; message = "YAML parse error: $($_.Exception.Message)" }) |
| 118 | continue |
| 119 | } |
| 120 | |
| 121 | if ($null -eq $parsed -or -not ($parsed -is [System.Collections.IDictionary])) { |
| 122 | $errors.Add(@{ path = $relPath; message = 'Spec root is not a mapping' }) |
| 123 | continue |
| 124 | } |
| 125 | |
| 126 | if (-not $parsed.Contains('stimuli')) { continue } |
| 127 | $stimuli = $parsed['stimuli'] |
| 128 | if ($null -eq $stimuli -or -not ($stimuli -is [System.Collections.IEnumerable]) -or $stimuli -is [string]) { continue } |
| 129 | |
| 130 | foreach ($stimulus in $stimuli) { |
| 131 | $links = Get-StimulusBacklink -Stimulus $stimulus |
| 132 | if ($null -eq $links) { continue } |
| 133 | foreach ($link in $links) { |
| 134 | if ($null -eq $link -or -not ($link -is [System.Collections.IDictionary])) { continue } |
| 135 | $key = "$($link['kind']):$($link['slug'])" |
| 136 | if (-not $coverage.ContainsKey($key)) { |
| 137 | $coverage[$key] = [System.Collections.Generic.List[string]]::new() |
| 138 | } |
| 139 | if (-not $coverage[$key].Contains($relPath)) { |
| 140 | $coverage[$key].Add($relPath) |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | $flat = @{} |
| 147 | foreach ($key in $coverage.Keys) { |
| 148 | $flat[$key] = $coverage[$key].ToArray() |
| 149 | } |
| 150 | |
| 151 | return @{ |
| 152 | root = $resolvedRoot |
| 153 | specsScanned = $specsScanned |
| 154 | coverage = $flat |
| 155 | errors = $errors.ToArray() |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | function Test-StimulusCoverage { |
| 160 | <# |
| 161 | .SYNOPSIS |
| 162 | Returns the list of spec paths that backlink a given artifact, or an empty array. |
| 163 | |
| 164 | .PARAMETER Index |
| 165 | An index produced by `New-StimulusIndex`. |
| 166 | |
| 167 | .PARAMETER Kind |
| 168 | Artifact kind: skill / agent / prompt / instruction. |
| 169 | |
| 170 | .PARAMETER ArtifactId |
| 171 | Artifact slug. |
| 172 | |
| 173 | .OUTPUTS |
| 174 | [string[]] Spec paths that cover the artifact (empty when no coverage). |
| 175 | #> |
| 176 | [CmdletBinding()] |
| 177 | [OutputType([string[]])] |
| 178 | param( |
| 179 | [Parameter(Mandatory = $true)] |
| 180 | [hashtable]$Index, |
| 181 | |
| 182 | [Parameter(Mandatory = $true)] |
| 183 | [string]$Kind, |
| 184 | |
| 185 | [Parameter(Mandatory = $true)] |
| 186 | [string]$ArtifactId |
| 187 | ) |
| 188 | |
| 189 | $key = "$Kind`:$ArtifactId" |
| 190 | if (-not $Index.ContainsKey('coverage')) { return ,@() } |
| 191 | $coverage = $Index['coverage'] |
| 192 | if ($null -eq $coverage -or -not $coverage.ContainsKey($key)) { return ,@() } |
| 193 | return ,@($coverage[$key]) |
| 194 | } |
| 195 | |
| 196 | Export-ModuleMember -Function @( |
| 197 | 'Get-StimulusBacklink', |
| 198 | 'New-StimulusIndex', |
| 199 | 'Test-StimulusCoverage' |
| 200 | ) |
| 201 | |