microsoft/hve-core

Public

mirrored fromhttps://github.com/microsoft/hve-coreAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
feat/1637-d-skill-paths

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/evals/Modules/CorpusReader.psm1

90lines · modecode

1# Copyright (c) Microsoft Corporation.
2# SPDX-License-Identifier: MIT
3# CorpusReader.psm1
4# Purpose: Read AI corpus markdown files with YAML frontmatter stripping for moderation input.
5#Requires -Version 7.0
6
7<#
8.SYNOPSIS
9 Returns the markdown body of a file with the YAML frontmatter block removed.
10
11.DESCRIPTION
12 Reads a UTF-8 markdown file and strips a leading YAML frontmatter block delimited
13 by `---` on the first line and a matching `---` line that follows. When no
14 frontmatter is present the original content is returned unchanged.
15
16.PARAMETER Path
17 Absolute or relative path to the markdown file.
18
19.OUTPUTS
20 System.String - File body without frontmatter.
21#>
22function Get-CorpusArtifactBody {
23 [CmdletBinding()]
24 [OutputType([string])]
25 param(
26 [Parameter(Mandatory = $true)]
27 [string]$Path
28 )
29
30 if (-not (Test-Path -LiteralPath $Path)) {
31 throw "Corpus file not found: $Path"
32 }
33
34 $content = Get-Content -LiteralPath $Path -Raw -Encoding utf8
35 if ([string]::IsNullOrEmpty($content)) {
36 return ''
37 }
38
39 # Match leading frontmatter: --- on line 1, body, closing --- on its own line.
40 $pattern = '^---\r?\n(?:.*?\r?\n)*?---\r?\n'
41 return [regex]::Replace($content, $pattern, '', [System.Text.RegularExpressions.RegexOptions]::Singleline)
42}
43
44<#
45.SYNOPSIS
46 Filters a changed-artifacts manifest to AI corpus markdown paths.
47
48.DESCRIPTION
49 Reads `logs/changed-ai-artifacts.json` (or a compatible structure) and returns the
50 file paths under `.github/agents`, `.github/prompts`, `.github/instructions`, and
51 `.github/skills` with `.md` extension. Removed entries are excluded.
52
53.PARAMETER ManifestPath
54 Path to the changed-artifacts JSON manifest.
55
56.OUTPUTS
57 System.String[] - Repository-relative paths of corpus markdown files to moderate.
58#>
59function Get-CorpusArtifactPaths {
60 [CmdletBinding()]
61 [OutputType([string[]])]
62 param(
63 [Parameter(Mandatory = $true)]
64 [string]$ManifestPath
65 )
66
67 if (-not (Test-Path -LiteralPath $ManifestPath)) {
68 throw "Manifest not found: $ManifestPath"
69 }
70
71 $manifest = Get-Content -LiteralPath $ManifestPath -Raw -Encoding utf8 | ConvertFrom-Json
72 if (-not $manifest.artifacts) {
73 return @()
74 }
75
76 $pattern = '^\.github/(agents|prompts|instructions|skills)/.+\.md$'
77 $paths = foreach ($artifact in $manifest.artifacts) {
78 $path = ($artifact.path -replace '\\', '/')
79 if ($artifact.status -ne 'removed' -and $path -match $pattern) {
80 $path
81 }
82 }
83
84 return @($paths)
85}
86
87Export-ModuleMember -Function @(
88 'Get-CorpusArtifactBody',
89 'Get-CorpusArtifactPaths'
90)
91