microsoft/hve-core

Public

mirrored fromhttps://github.com/microsoft/hve-coreAvailable

CodeCommitsIssuesPull requestsActionsInsightsSecurity
feat/1873-devcontainer

Branches

Tags

  • No tags available.
0Branches0Tags
Go to file
Add file
Code

Clone

HTTPS

Download ZIP

scripts/evals/Modules/retext-runner.mjs

234lines · modecode

1// Copyright (c) Microsoft Corporation.
2// SPDX-License-Identifier: MIT
3//
4// retext-runner.mjs
5//
6// Runs alex.js (inclusive-language linter) and retext-profanities against
7// stimulus prompt text supplied via a JSON manifest on stdin. Emits a JSON
8// report on stdout and exits with code 1 when any message is flagged.
9//
10// Manifest schema:
11// [{ "spec": "<rel-path>", "stimulus": "<name>", "text": "<prompt>" }, ...]
12//
13// Report schema:
14// { "results": [ { spec, stimulus, source, messages: [{rule, message, line, column}] } ] }
15
16import { stdin as input, stdout as output, exit, stderr } from 'node:process';
17import { text as alexText } from 'alex';
18import { unified } from 'unified';
19import retextEnglish from 'retext-english';
20import retextProfanities from 'retext-profanities';
21import retextStringify from 'retext-stringify';
22
23// Phrase-aware allowlist keyed by rule ID. When a rule fires, the ±60-char
24// window around the match is tested against each regex. A match suppresses
25// the message, so bare uses ("abuse") still flag while established technical
26// bigrams ("token abuse", "penetration test") pass through.
27const PHRASE_ALLOWLIST = {
28 execution: [
29 /\b(code|command|remote|arbitrary|script|query|task|job|pipeline|workflow|test|order|parallel|sequential|tool|function|program|process|step)[\s-]+execution\b/i,
30 /\bexecution\s+(context|environment|order|mode|model|engine|plan|policy|time|path|trace|step|flow|phase)\b/i,
31 ],
32 execute: [
33 /\b(can|may|will|to|cannot|must|shall|should|able\s+to|allowed\s+to|attempts?\s+to|tries\s+to)\s+execute\b/i,
34 /\bexecute\s+(the|a|an|this|that|code|commands?|scripts?|queries|query|tests?|workflows?|steps?|actions?|tools?)\b/i,
35 ],
36 executes: [/\bexecutes?\s+(the|a|an|in|on|when|once|after|before|with|via|inside|against)\b/i],
37 executed: [
38 /\b(is|was|been|gets?|being|are|were)\s+executed\b/i,
39 /\bexecuted\s+(by|in|on|when|with|via|against|successfully|inside)\b/i,
40 ],
41 attack: [/\battack\s+(surface|vector|tree|chain|path|pattern|scenario|model|simulation|graph)\b/i],
42 attacks: [/\b(injection|replay|phishing|brute[- ]?force|dos|ddos|mitm|csrf|xss|sql|prompt|side[- ]?channel|timing|downgrade|impersonation)\s+attacks?\b/i],
43 failure: [
44 /\bfailure\s+(modes?|points?|rate|domain|recovery|handling|scenarios?|injection)\b/i,
45 /\b(single\s+point\s+of|point\s+of|build|test|deployment|pipeline|silent|graceful|hardware|network|system|cascading|partial|validation)\s+failure\b/i,
46 ],
47 failures: [/\b(test|build|pipeline|deployment|validation|cascading|silent|partial|transient)\s+failures\b/i],
48 failed: [
49 /\b(test|build|step|job|request|attempt|validation|check|deployment|login|authentication)\s+failed\b/i,
50 /\bfailed\s+(to|with|because|due|tests?|requests?|attempts?|jobs?|builds?|logins?)\b/i,
51 ],
52 abuse: [
53 /\b(token|privilege|api|rate[- ]?limit|resource|trust|process|permission|credential|service|account|session|workflow|pipeline|cache|memory|tool|prompt|model|context|chain|insider|lateral|optimization|reservation|scalper|automated)[\s-]+abuse\b/i,
54 /\bbusiness\s+logic\s+abuse\b/i,
55 /\babuse\s+(of\s+)?(tokens?|privileges?|apis?|rate[- ]?limits?|resources?|trust|processes?|permissions?|credentials?|services?|accounts?|sessions?|tools?)\b/i,
56 /\babuse\s+(prevention|scenarios?|the\s+\w+)\b/i,
57 /\b(to|of|for|against|from|by|contain|prevent|reduce|stop|mitigate|deter|resist|combat|enable|enables|enabling|allow|allows|allowing|cause|causes|causing|make|makes|making|trigger|triggers|triggering|detect|detects|detecting|report|reports|reporting|monitor|monitors|monitoring|investigate|investigates|investigating|susceptible\s+to|vulnerable\s+to|prone\s+to|subject\s+to|protect\s+against|guard\s+against|safeguard\s+against|defend\s+against)\s+abuse\b/i,
58 ],
59 abuses: [/\babuses\s+(permissions?|trust|tokens?|credentials?|rate[- ]?limits?|access)\b/i],
60 penetration: [
61 /\bpen(etration)?[- ]?test(ing|er|ers|s)?\b/i,
62 /\b(renewable|market|grid|water|gas|oil|broadband|internet|solar|wind)\s+penetration\b/i,
63 ],
64 invalid: [
65 /\binvalid\s+(input|token|argument|arguments?|request|signature|state|format|payload|configuration|key|certificate|hash|json|yaml|xml|url|uri|path|response|reference|operation|character|syntax|schema|type|value|parameter|option|credential|claim|header|message|field|entry|record|file|user|session|cursor)\b/i,
66 ],
67 'host-hostess': [
68 /\b(http|https|host|virtual|bastion|jump|docker|container|kubernetes|kube|vm|web|database|build|target|source|remote|local|origin|destination|build|runner|agent)\s+host\b/i,
69 /\bhost\s+(header|name|names|file|key|machine|machines|os|process|system|environment|configuration|address|port|computer)\b/i,
70 ],
71 'hostesses-hosts': [
72 /\bhosts?\s+(file|header|key|name|configuration|environment)\b/i,
73 /\b(virtual|build|target|remote|local|allowed|trusted|known)\s+hosts\b/i,
74 ],
75 white: [
76 /\bwhite[- ]?list(ed|ing|s)?\b/i,
77 /\bwhite[- ]?paper\b/i,
78 /\bwhite[- ]?box\b/i,
79 /\bwhite[- ]?hat\b/i,
80 /\bwhite[- ]?space\b/i,
81 /\bwhite\s+(background|text|fill|colou?r)\b/i,
82 /\bblack[- ]?and[- ]?white\b/i,
83 /\bplain\s+white\b/i,
84 /\bWHITE\b/,
85 /\btext\s+(is\s+)?white\b/i,
86 ],
87 premature: [/\bpremature\s+(optimization|optimisation|return|exit|termination|closure|abort|completion)\b/i],
88 remains: [/\bremains?\s+(valid|stable|consistent|the\s+same|unchanged|active|available|open|closed|empty|in|at|on)\b/i],
89 color: [
90 /\b(syntax|terminal|theme|background|foreground|text|font|highlight|border|accent|primary|secondary|css|hex|rgb|rgba|hsl|ansi)\s+colou?rs?\b/i,
91 /\bcolou?rs?\s+(scheme|theme|palette|code|codes|map|space|picker|wheel|value|values)\b/i,
92 ],
93 colors: [/\b(syntax|terminal|theme|background|foreground|text|font|highlight|border|accent|primary|secondary|css|hex|rgb|rgba|hsl|ansi)\s+colou?rs\b/i],
94 period: [/\b(time|grace|trial|retention|warm[- ]?up|cool[- ]?down|warranty|notice|review|incubation|sampling|polling|wait|sleep|sla)\s+period\b/i],
95 periods: [/\b(time|grace|trial|retention|sampling|polling)\s+periods\b/i],
96 uk: [/\b(uk|u\.k\.)\s+(government|gov|english|spelling|date|locale|user|users|region|usage)\b/i],
97 australian: [/\baustralian\s+(english|spelling|locale|date|user|users|region)\b/i],
98 cracks: [
99 /\b(password|hash|encryption|crypto|code)\s+crack(s|ing|ed|er)?\b/i,
100 /\bcrack(s|ing|ed)?\s+(the\s+)?(password|hash|code|encryption|cipher)\b/i,
101 /\bfall(s|ing)?\s+through\s+the\s+cracks\b/i,
102 ],
103 crack: [
104 /\b(password|hash|encryption|crypto|code)\s+crack(s|ing|ed|er)?\b/i,
105 /\b(guess|brute[- ]?force)\s+or\s+crack\b/i,
106 ],
107 threeway: [/\bthree[- ]?way\s+(handshake|merge|join|sync|comparison|matching|reconciliation)\b/i],
108 black: [
109 /\bblack[- ]?box\b/i,
110 /\bblack[- ]?list(ed|ing|s)?\b/i,
111 /\bblack[- ]?hat\b/i,
112 /\bBlack\s+(formatter|format|compatible)\b/,
113 ],
114 trap: [
115 /\b(trap[- ]?door|trap\s+handler|trap\s+event|debug\s+trap|signal\s+trap|stack\s+trap|error\s+trap)\b/i,
116 /\b(common|easy|classic|usual|interface|design|prompt|mockup|fidelity)\W+trap\b/i,
117 ],
118 devils: [/\bdevil['\u2019]s\s+advocate\b/i],
119 god: [/\bgod[- ]?(object|class(es)?|mode|method(s)?)\b/i],
120 drug: [/\bdrug\s+(data|dosage|administration|trial|interaction|safety|protocol|delivery)\b/i],
121};
122
123const CONTEXT_RADIUS = 60;
124
125function messageOffsets(message) {
126 const place = message.place ?? message.position;
127 const start = place?.start?.offset;
128 const end = place?.end?.offset ?? start;
129 return start == null ? null : { start, end };
130}
131
132function isAllowedByPhrase(message, text) {
133 const patterns = PHRASE_ALLOWLIST[message.ruleId];
134 if (!patterns || patterns.length === 0) {
135 return false;
136 }
137 const offsets = messageOffsets(message);
138 if (!offsets) {
139 return false;
140 }
141 const windowStart = Math.max(0, offsets.start - CONTEXT_RADIUS);
142 const windowEnd = Math.min(text.length, offsets.end + CONTEXT_RADIUS);
143 const window = text.slice(windowStart, windowEnd);
144 return patterns.some((re) => re.test(window));
145}
146
147async function readStdin() {
148 let data = '';
149 input.setEncoding('utf8');
150 for await (const chunk of input) {
151 data += chunk;
152 }
153 return data;
154}
155
156function normalizeMessage(message, source) {
157 return {
158 source,
159 rule: message.ruleId ?? message.source ?? source,
160 message: message.reason ?? String(message),
161 line: message.line ?? null,
162 column: message.column ?? null,
163 };
164}
165
166async function runAlex(text) {
167 const vfile = alexText(text);
168 return (vfile.messages ?? [])
169 .filter((m) => !isAllowedByPhrase(m, text))
170 .map((m) => normalizeMessage(m, 'alex'));
171}
172
173const profanityProcessor = unified()
174 .use(retextEnglish)
175 .use(retextProfanities, { sureness: 1 })
176 .use(retextStringify);
177
178async function runProfanities(text) {
179 const file = await profanityProcessor.process(text);
180 return (file.messages ?? [])
181 .filter((m) => !isAllowedByPhrase(m, text))
182 .map((m) => normalizeMessage(m, 'retext-profanities'));
183}
184
185async function main() {
186 const raw = await readStdin();
187 if (!raw.trim()) {
188 output.write(JSON.stringify({ results: [] }) + '\n');
189 exit(0);
190 }
191
192 let manifest;
193 try {
194 manifest = JSON.parse(raw);
195 } catch (err) {
196 stderr.write(`retext-runner: failed to parse manifest JSON — ${err.message}\n`);
197 exit(2);
198 }
199
200 if (!Array.isArray(manifest)) {
201 stderr.write('retext-runner: manifest must be a JSON array\n');
202 exit(2);
203 }
204
205 const results = [];
206 let flagged = 0;
207
208 for (const item of manifest) {
209 const spec = item?.spec ?? '<unknown>';
210 const stimulus = item?.stimulus ?? '<unknown>';
211 const text = typeof item?.text === 'string' ? item.text : '';
212 if (!text.trim()) {
213 continue;
214 }
215
216 const [alexMessages, profMessages] = await Promise.all([
217 runAlex(text),
218 runProfanities(text),
219 ]);
220 const messages = [...alexMessages, ...profMessages];
221 if (messages.length > 0) {
222 flagged += messages.length;
223 results.push({ spec, stimulus, messages });
224 }
225 }
226
227 output.write(JSON.stringify({ results }) + '\n');
228 exit(flagged > 0 ? 1 : 0);
229}
230
231main().catch((err) => {
232 stderr.write(`retext-runner: unexpected error — ${err.stack ?? err.message}\n`);
233 exit(2);
234});
235