microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
scripts/linting/Link-Lang-Check.ps1
370lines · modecode
| 1 | <# |
| 2 | .SYNOPSIS |
| 3 | Language Path Link Checker and Fixer |
| 4 | |
| 5 | .DESCRIPTION |
| 6 | This script finds and optionally fixes URLs in git-tracked text files that contain |
| 7 | the language path segment 'en-us'. It helps maintain links that work regardless |
| 8 | of user language settings by removing unnecessary language path segments. |
| 9 | |
| 10 | Functionality: |
| 11 | - Scans git-tracked text files for URLs containing 'en-us' |
| 12 | - Identifies link locations by file and line number |
| 13 | - Optionally removes 'en-us/' from URLs to make them language-neutral |
| 14 | - Reports changes in human-readable or JSON format |
| 15 | |
| 16 | .PARAMETER Fix |
| 17 | Fix URLs by removing "en-us/" instead of just reporting them |
| 18 | |
| 19 | .PARAMETER ExcludePaths |
| 20 | Glob patterns for paths to exclude from checking (e.g., 'scripts/tests/**') |
| 21 | |
| 22 | .EXAMPLE |
| 23 | # Search for URLs containing 'en-us' and output as JSON |
| 24 | .\Link-Lang-Check.ps1 |
| 25 | |
| 26 | .EXAMPLE |
| 27 | # Fix URLs by removing 'en-us/' with verbose output |
| 28 | .\Link-Lang-Check.ps1 -Fix -Verbose |
| 29 | |
| 30 | .NOTES |
| 31 | The script is designed to help maintain documentation links that work regardless |
| 32 | of the user's language settings in their browser. |
| 33 | |
| 34 | Dependencies: |
| 35 | - git: Required for identifying text files under source control |
| 36 | - PowerShell 5.1 or PowerShell 7+ |
| 37 | |
| 38 | Returns: |
| 39 | - JSON array or console output: When not in fix mode, outputs a JSON array of found links |
| 40 | When in fix mode, outputs human-readable summary of changes |
| 41 | |
| 42 | See Also: |
| 43 | - Microsoft documentation guidance on language neutrality: https://learn.microsoft.com/style-guide/urls-web-addresses |
| 44 | #> |
| 45 | |
| 46 | [CmdletBinding()] |
| 47 | param( |
| 48 | [switch]$Fix, |
| 49 | [string[]]$ExcludePaths = @() |
| 50 | ) |
| 51 | |
| 52 | function Get-GitTextFile { |
| 53 | <# |
| 54 | .SYNOPSIS |
| 55 | Get list of all text files under git source control, excluding binary files. |
| 56 | |
| 57 | .DESCRIPTION |
| 58 | Uses git's built-in binary detection to exclude non-text files from processing. |
| 59 | |
| 60 | .OUTPUTS |
| 61 | System.String[] |
| 62 | A list of file paths to text files tracked by git. |
| 63 | #> |
| 64 | |
| 65 | try { |
| 66 | # Use git's binary detection with -I flag (--no-binary) |
| 67 | $result = & git grep -I --name-only -e '' 2>&1 |
| 68 | |
| 69 | if ($LASTEXITCODE -gt 1) { |
| 70 | Write-Error "Error executing git grep: $result" |
| 71 | return @() |
| 72 | } |
| 73 | |
| 74 | if ($result -and $result.Count -gt 0) { |
| 75 | return $result | Where-Object { $_ -is [string] -and $_.Trim() -ne '' } |
| 76 | } |
| 77 | |
| 78 | return @() |
| 79 | } |
| 80 | catch { |
| 81 | Write-Error "Error getting git text files: $_" |
| 82 | return @() |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | function Find-LinksInFile { |
| 87 | <# |
| 88 | .SYNOPSIS |
| 89 | Find links with 'en-us' in them and return details. |
| 90 | |
| 91 | .DESCRIPTION |
| 92 | Scans the specified file for URLs containing the 'en-us' path segment and |
| 93 | collects information about each occurrence. |
| 94 | |
| 95 | .PARAMETER FilePath |
| 96 | Path to the file to scan |
| 97 | |
| 98 | .OUTPUTS |
| 99 | System.Object[] |
| 100 | A list of objects, each containing information about a link: |
| 101 | - File: The file path |
| 102 | - LineNumber: The line number where the link appears |
| 103 | - OriginalUrl: The original URL with 'en-us' |
| 104 | - FixedUrl: The URL with 'en-us/' removed |
| 105 | #> |
| 106 | |
| 107 | [CmdletBinding()] |
| 108 | param( |
| 109 | [string]$FilePath |
| 110 | ) |
| 111 | |
| 112 | $linksFound = @() |
| 113 | |
| 114 | try { |
| 115 | $lines = @(Get-Content -Path $FilePath -Encoding UTF8 -ErrorAction Stop) |
| 116 | } |
| 117 | catch { |
| 118 | Write-Verbose "Could not read $FilePath`: $_" |
| 119 | return $linksFound |
| 120 | } |
| 121 | |
| 122 | # Regular expression to find URLs containing "en-us/" |
| 123 | $urlPattern = 'https?://[^\s<>"'']+?en-us/[^\s<>"'']+' |
| 124 | |
| 125 | for ($i = 0; $i -lt $lines.Count; $i++) { |
| 126 | $line = $lines[$i] |
| 127 | $urlMatches = [regex]::Matches($line, $urlPattern) |
| 128 | |
| 129 | foreach ($match in $urlMatches) { |
| 130 | $linksFound += [PSCustomObject]@{ |
| 131 | File = $FilePath |
| 132 | LineNumber = $i + 1 |
| 133 | OriginalUrl = $match.Value |
| 134 | FixedUrl = $match.Value -replace 'en-us/', '' |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | return $linksFound |
| 140 | } |
| 141 | |
| 142 | function Repair-LinksInFile { |
| 143 | <# |
| 144 | .SYNOPSIS |
| 145 | Fix links in a single file by removing 'en-us/' from URLs. |
| 146 | |
| 147 | .DESCRIPTION |
| 148 | Opens the file, replaces URLs containing 'en-us/' with versions without it, |
| 149 | and writes the changes back to the file. |
| 150 | |
| 151 | .PARAMETER FilePath |
| 152 | Path to the file to modify |
| 153 | |
| 154 | .PARAMETER Links |
| 155 | Array of link objects for the file, each containing: |
| 156 | - OriginalUrl: The original URL to replace |
| 157 | - FixedUrl: The URL to replace it with |
| 158 | |
| 159 | .OUTPUTS |
| 160 | System.Boolean |
| 161 | True if the file was modified, False otherwise |
| 162 | #> |
| 163 | |
| 164 | [CmdletBinding()] |
| 165 | param( |
| 166 | [string]$FilePath, |
| 167 | [PSCustomObject[]]$Links |
| 168 | ) |
| 169 | |
| 170 | try { |
| 171 | $content = Get-Content -Path $FilePath -Raw -Encoding UTF8 -ErrorAction Stop |
| 172 | } |
| 173 | catch { |
| 174 | Write-Verbose "Could not read $FilePath`: $_" |
| 175 | return $false |
| 176 | } |
| 177 | |
| 178 | # Replace each link |
| 179 | $modifiedContent = $content |
| 180 | foreach ($link in $Links) { |
| 181 | $modifiedContent = $modifiedContent -replace [regex]::Escape($link.OriginalUrl), $link.FixedUrl |
| 182 | } |
| 183 | |
| 184 | # Only write if changes were made |
| 185 | if ($modifiedContent -ne $content) { |
| 186 | try { |
| 187 | Set-Content -Path $FilePath -Value $modifiedContent -Encoding UTF8 -NoNewline -ErrorAction Stop |
| 188 | return $true |
| 189 | } |
| 190 | catch { |
| 191 | Write-Verbose "Could not write to $FilePath`: $_" |
| 192 | return $false |
| 193 | } |
| 194 | } |
| 195 | return $false |
| 196 | } |
| 197 | |
| 198 | function Repair-AllLink { |
| 199 | <# |
| 200 | .SYNOPSIS |
| 201 | Fix all links in their respective files. |
| 202 | |
| 203 | .DESCRIPTION |
| 204 | Groups links by file, then calls Repair-LinksInFile for each file. |
| 205 | |
| 206 | .PARAMETER AllLinks |
| 207 | Array of all link objects found across files |
| 208 | |
| 209 | .OUTPUTS |
| 210 | System.Int32 |
| 211 | Number of files that were successfully modified |
| 212 | #> |
| 213 | |
| 214 | [CmdletBinding()] |
| 215 | param( |
| 216 | [PSCustomObject[]]$AllLinks |
| 217 | ) |
| 218 | |
| 219 | # Group links by file |
| 220 | $linksByFile = $AllLinks | Group-Object -Property File |
| 221 | $filesModified = 0 |
| 222 | |
| 223 | # Fix links in each file |
| 224 | foreach ($fileGroup in $linksByFile) { |
| 225 | $filePath = $fileGroup.Name |
| 226 | $links = $fileGroup.Group |
| 227 | |
| 228 | Write-Verbose "Fixing links in $filePath..." |
| 229 | |
| 230 | if (Repair-LinksInFile -FilePath $filePath -Links $links) { |
| 231 | $filesModified++ |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | return $filesModified |
| 236 | } |
| 237 | |
| 238 | function ConvertTo-JsonOutput { |
| 239 | <# |
| 240 | .SYNOPSIS |
| 241 | Prepare links for JSON output by formatting as an array of link objects. |
| 242 | |
| 243 | .DESCRIPTION |
| 244 | Creates a clean representation without internal fields used for processing. |
| 245 | |
| 246 | .PARAMETER Links |
| 247 | The complete array of link objects |
| 248 | |
| 249 | .OUTPUTS |
| 250 | System.Object[] |
| 251 | An array of objects ready for JSON serialization, each containing: |
| 252 | - File: The file path |
| 253 | - LineNumber: The line number where the link appears |
| 254 | - OriginalUrl: The original URL with 'en-us' |
| 255 | #> |
| 256 | |
| 257 | [CmdletBinding()] |
| 258 | param( |
| 259 | [PSCustomObject[]]$Links |
| 260 | ) |
| 261 | |
| 262 | $jsonData = @() |
| 263 | foreach ($link in $Links) { |
| 264 | # Create a copy without the FixedUrl field |
| 265 | $jsonData += [PSCustomObject]@{ |
| 266 | file = $link.File |
| 267 | line_number = $link.LineNumber |
| 268 | original_url = $link.OriginalUrl |
| 269 | } |
| 270 | } |
| 271 | return $jsonData |
| 272 | } |
| 273 | |
| 274 | # Main script execution |
| 275 | try { |
| 276 | if ($Verbose) { |
| 277 | Write-Information "Getting list of git-tracked text files..." -InformationAction Continue |
| 278 | } |
| 279 | |
| 280 | $files = Get-GitTextFile |
| 281 | |
| 282 | # Apply exclusion patterns |
| 283 | if ($ExcludePaths.Count -gt 0) { |
| 284 | $originalCount = $files.Count |
| 285 | $files = $files | Where-Object { |
| 286 | $filePath = $_ |
| 287 | $excluded = $false |
| 288 | foreach ($pattern in $ExcludePaths) { |
| 289 | if ($filePath -like $pattern) { |
| 290 | $excluded = $true |
| 291 | break |
| 292 | } |
| 293 | } |
| 294 | -not $excluded |
| 295 | } |
| 296 | if ($Verbose) { |
| 297 | $excludedCount = $originalCount - $files.Count |
| 298 | Write-Information "Excluded $excludedCount files matching exclusion patterns" -InformationAction Continue |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | if ($Verbose) { |
| 303 | Write-Information "Found $($files.Count) git-tracked text files" -InformationAction Continue |
| 304 | } |
| 305 | |
| 306 | $allLinks = @() |
| 307 | |
| 308 | foreach ($filePath in $files) { |
| 309 | if (-not (Test-Path -Path $filePath -PathType Leaf)) { |
| 310 | if ($Verbose) { |
| 311 | Write-Warning "Skipping $filePath`: not a regular file" |
| 312 | } |
| 313 | continue |
| 314 | } |
| 315 | |
| 316 | if ($Verbose) { |
| 317 | Write-Verbose "Processing $filePath..." |
| 318 | } |
| 319 | |
| 320 | $links = Find-LinksInFile -FilePath $filePath |
| 321 | $allLinks += $links |
| 322 | } |
| 323 | |
| 324 | # Report findings |
| 325 | if ($allLinks.Count -gt 0) { |
| 326 | if ($Fix) { |
| 327 | # Human-readable output when fixing links |
| 328 | if ($Verbose) { |
| 329 | Write-Information "`nFound $($allLinks.Count) URLs containing 'en-us':`n" -InformationAction Continue |
| 330 | foreach ($linkInfo in $allLinks) { |
| 331 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 332 | Write-Information " URL: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 333 | Write-Information "" -InformationAction Continue |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | $filesModified = Repair-AllLink -AllLinks $allLinks |
| 338 | Write-Output "Fixed $($allLinks.Count) URLs in $filesModified files." |
| 339 | |
| 340 | if ($Verbose) { |
| 341 | Write-Information "`nDetails of fixes:" -InformationAction Continue |
| 342 | foreach ($linkInfo in $allLinks) { |
| 343 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 344 | Write-Information " Original: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 345 | Write-Information " Fixed: $($linkInfo.FixedUrl)" -InformationAction Continue |
| 346 | Write-Information "" -InformationAction Continue |
| 347 | } |
| 348 | } |
| 349 | } |
| 350 | else { |
| 351 | # JSON output when not fixing links |
| 352 | $jsonOutput = ConvertTo-JsonOutput -Links $allLinks |
| 353 | Write-Output ($jsonOutput | ConvertTo-Json -Depth 3) |
| 354 | } |
| 355 | } |
| 356 | else { |
| 357 | if (-not $Fix) { |
| 358 | # Empty JSON array if no links found |
| 359 | Write-Output "[]" |
| 360 | } |
| 361 | else { |
| 362 | Write-Output "No URLs containing 'en-us' were found." |
| 363 | } |
| 364 | } |
| 365 | } |
| 366 | catch { |
| 367 | Write-Error "An error occurred: $_" |
| 368 | exit 1 |
| 369 | } |
| 370 | |
| 371 | |