microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
scripts/linting/Link-Lang-Check.ps1
394lines · modecode
| 1 | #!/usr/bin/env pwsh |
| 2 | # Copyright (c) Microsoft Corporation. |
| 3 | # SPDX-License-Identifier: MIT |
| 4 | #Requires -Version 7.0 |
| 5 | |
| 6 | <# |
| 7 | .SYNOPSIS |
| 8 | Language Path Link Checker and Fixer |
| 9 | |
| 10 | .DESCRIPTION |
| 11 | This script finds and optionally fixes URLs in git-tracked text files that contain |
| 12 | the language path segment 'en-us'. It helps maintain links that work regardless |
| 13 | of user language settings by removing unnecessary language path segments. |
| 14 | |
| 15 | Functionality: |
| 16 | - Scans git-tracked text files for URLs containing 'en-us' |
| 17 | - Identifies link locations by file and line number |
| 18 | - Optionally removes 'en-us/' from URLs to make them language-neutral |
| 19 | - Reports changes in human-readable or JSON format |
| 20 | |
| 21 | .PARAMETER Fix |
| 22 | Fix URLs by removing "en-us/" instead of just reporting them |
| 23 | |
| 24 | .PARAMETER ExcludePaths |
| 25 | Glob patterns for paths to exclude from checking (e.g., 'scripts/tests/**') |
| 26 | |
| 27 | .EXAMPLE |
| 28 | # Search for URLs containing 'en-us' and output as JSON |
| 29 | .\Link-Lang-Check.ps1 |
| 30 | |
| 31 | .EXAMPLE |
| 32 | # Fix URLs by removing 'en-us/' with verbose output |
| 33 | .\Link-Lang-Check.ps1 -Fix -Verbose |
| 34 | |
| 35 | .NOTES |
| 36 | The script is designed to help maintain documentation links that work regardless |
| 37 | of the user's language settings in their browser. |
| 38 | |
| 39 | Dependencies: |
| 40 | - git: Required for identifying text files under source control |
| 41 | - PowerShell 5.1 or PowerShell 7+ |
| 42 | |
| 43 | Returns: |
| 44 | - JSON array or console output: When not in fix mode, outputs a JSON array of found links |
| 45 | When in fix mode, outputs human-readable summary of changes |
| 46 | |
| 47 | See Also: |
| 48 | - Microsoft documentation guidance on language neutrality: https://learn.microsoft.com/style-guide/urls-web-addresses |
| 49 | #> |
| 50 | |
| 51 | [CmdletBinding()] |
| 52 | param( |
| 53 | [switch]$Fix, |
| 54 | [string[]]$ExcludePaths = @() |
| 55 | ) |
| 56 | |
| 57 | $ErrorActionPreference = 'Stop' |
| 58 | |
| 59 | Import-Module (Join-Path $PSScriptRoot "../lib/Modules/CIHelpers.psm1") -Force |
| 60 | |
| 61 | function Get-GitTextFile { |
| 62 | <# |
| 63 | .SYNOPSIS |
| 64 | Get list of all text files under git source control, excluding binary files. |
| 65 | |
| 66 | .DESCRIPTION |
| 67 | Uses git's built-in binary detection to exclude non-text files from processing. |
| 68 | |
| 69 | .OUTPUTS |
| 70 | System.String[] |
| 71 | A list of file paths to text files tracked by git. |
| 72 | #> |
| 73 | |
| 74 | try { |
| 75 | # Use git's binary detection with -I flag (--no-binary) |
| 76 | $result = & git grep -I --name-only -e '' 2>&1 |
| 77 | |
| 78 | if ($LASTEXITCODE -gt 1) { |
| 79 | Write-Error "Error executing git grep: $result" |
| 80 | return @() |
| 81 | } |
| 82 | |
| 83 | if ($result -and $result.Count -gt 0) { |
| 84 | return $result | Where-Object { $_ -is [string] -and $_.Trim() -ne '' } |
| 85 | } |
| 86 | |
| 87 | return @() |
| 88 | } |
| 89 | catch { |
| 90 | Write-Error "Error getting git text files: $_" |
| 91 | return @() |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | function Find-LinksInFile { |
| 96 | <# |
| 97 | .SYNOPSIS |
| 98 | Find links with 'en-us' in them and return details. |
| 99 | |
| 100 | .DESCRIPTION |
| 101 | Scans the specified file for URLs containing the 'en-us' path segment and |
| 102 | collects information about each occurrence. |
| 103 | |
| 104 | .PARAMETER FilePath |
| 105 | Path to the file to scan |
| 106 | |
| 107 | .OUTPUTS |
| 108 | System.Object[] |
| 109 | A list of objects, each containing information about a link: |
| 110 | - File: The file path |
| 111 | - LineNumber: The line number where the link appears |
| 112 | - OriginalUrl: The original URL with 'en-us' |
| 113 | - FixedUrl: The URL with 'en-us/' removed |
| 114 | #> |
| 115 | |
| 116 | [CmdletBinding()] |
| 117 | param( |
| 118 | [string]$FilePath |
| 119 | ) |
| 120 | |
| 121 | $linksFound = @() |
| 122 | |
| 123 | try { |
| 124 | $lines = @(Get-Content -Path $FilePath -Encoding UTF8 -ErrorAction Stop) |
| 125 | } |
| 126 | catch { |
| 127 | Write-Verbose "Could not read $FilePath`: $_" |
| 128 | return $linksFound |
| 129 | } |
| 130 | |
| 131 | # Regular expression to find URLs containing "en-us/" |
| 132 | $urlPattern = 'https?://[^\s<>"'']+?en-us/[^\s<>"'']+' |
| 133 | |
| 134 | for ($i = 0; $i -lt $lines.Count; $i++) { |
| 135 | $line = $lines[$i] |
| 136 | $urlMatches = [regex]::Matches($line, $urlPattern) |
| 137 | |
| 138 | foreach ($match in $urlMatches) { |
| 139 | $linksFound += [PSCustomObject]@{ |
| 140 | File = $FilePath |
| 141 | LineNumber = $i + 1 |
| 142 | OriginalUrl = $match.Value |
| 143 | FixedUrl = $match.Value -replace 'en-us/', '' |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | return $linksFound |
| 149 | } |
| 150 | |
| 151 | function Repair-LinksInFile { |
| 152 | <# |
| 153 | .SYNOPSIS |
| 154 | Fix links in a single file by removing 'en-us/' from URLs. |
| 155 | |
| 156 | .DESCRIPTION |
| 157 | Opens the file, replaces URLs containing 'en-us/' with versions without it, |
| 158 | and writes the changes back to the file. |
| 159 | |
| 160 | .PARAMETER FilePath |
| 161 | Path to the file to modify |
| 162 | |
| 163 | .PARAMETER Links |
| 164 | Array of link objects for the file, each containing: |
| 165 | - OriginalUrl: The original URL to replace |
| 166 | - FixedUrl: The URL to replace it with |
| 167 | |
| 168 | .OUTPUTS |
| 169 | System.Boolean |
| 170 | True if the file was modified, False otherwise |
| 171 | #> |
| 172 | |
| 173 | [CmdletBinding()] |
| 174 | param( |
| 175 | [string]$FilePath, |
| 176 | [PSCustomObject[]]$Links |
| 177 | ) |
| 178 | |
| 179 | try { |
| 180 | $content = Get-Content -Path $FilePath -Raw -Encoding UTF8 -ErrorAction Stop |
| 181 | } |
| 182 | catch { |
| 183 | Write-Verbose "Could not read $FilePath`: $_" |
| 184 | return $false |
| 185 | } |
| 186 | |
| 187 | # Replace each link |
| 188 | $modifiedContent = $content |
| 189 | foreach ($link in $Links) { |
| 190 | $modifiedContent = $modifiedContent -replace [regex]::Escape($link.OriginalUrl), $link.FixedUrl |
| 191 | } |
| 192 | |
| 193 | # Only write if changes were made |
| 194 | if ($modifiedContent -ne $content) { |
| 195 | try { |
| 196 | Set-Content -Path $FilePath -Value $modifiedContent -Encoding UTF8 -NoNewline -ErrorAction Stop |
| 197 | return $true |
| 198 | } |
| 199 | catch { |
| 200 | Write-Verbose "Could not write to $FilePath`: $_" |
| 201 | return $false |
| 202 | } |
| 203 | } |
| 204 | return $false |
| 205 | } |
| 206 | |
| 207 | function Repair-AllLink { |
| 208 | <# |
| 209 | .SYNOPSIS |
| 210 | Fix all links in their respective files. |
| 211 | |
| 212 | .DESCRIPTION |
| 213 | Groups links by file, then calls Repair-LinksInFile for each file. |
| 214 | |
| 215 | .PARAMETER AllLinks |
| 216 | Array of all link objects found across files |
| 217 | |
| 218 | .OUTPUTS |
| 219 | System.Int32 |
| 220 | Number of files that were successfully modified |
| 221 | #> |
| 222 | |
| 223 | [CmdletBinding()] |
| 224 | param( |
| 225 | [PSCustomObject[]]$AllLinks |
| 226 | ) |
| 227 | |
| 228 | # Group links by file |
| 229 | $linksByFile = $AllLinks | Group-Object -Property File |
| 230 | $filesModified = 0 |
| 231 | |
| 232 | # Fix links in each file |
| 233 | foreach ($fileGroup in $linksByFile) { |
| 234 | $filePath = $fileGroup.Name |
| 235 | $links = $fileGroup.Group |
| 236 | |
| 237 | Write-Verbose "Fixing links in $filePath..." |
| 238 | |
| 239 | if (Repair-LinksInFile -FilePath $filePath -Links $links) { |
| 240 | $filesModified++ |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | return $filesModified |
| 245 | } |
| 246 | |
| 247 | function ConvertTo-JsonOutput { |
| 248 | <# |
| 249 | .SYNOPSIS |
| 250 | Prepare links for JSON output by formatting as an array of link objects. |
| 251 | |
| 252 | .DESCRIPTION |
| 253 | Creates a clean representation without internal fields used for processing. |
| 254 | |
| 255 | .PARAMETER Links |
| 256 | The complete array of link objects |
| 257 | |
| 258 | .OUTPUTS |
| 259 | System.Object[] |
| 260 | An array of objects ready for JSON serialization, each containing: |
| 261 | - File: The file path |
| 262 | - LineNumber: The line number where the link appears |
| 263 | - OriginalUrl: The original URL with 'en-us' |
| 264 | #> |
| 265 | |
| 266 | [CmdletBinding()] |
| 267 | param( |
| 268 | [PSCustomObject[]]$Links |
| 269 | ) |
| 270 | |
| 271 | $jsonData = @() |
| 272 | foreach ($link in $Links) { |
| 273 | # Create a copy without the FixedUrl field |
| 274 | $jsonData += [PSCustomObject]@{ |
| 275 | file = $link.File |
| 276 | line_number = $link.LineNumber |
| 277 | original_url = $link.OriginalUrl |
| 278 | } |
| 279 | } |
| 280 | return $jsonData |
| 281 | } |
| 282 | |
| 283 | function Invoke-LinkLanguageCheck { |
| 284 | [CmdletBinding()] |
| 285 | [OutputType([void])] |
| 286 | param( |
| 287 | [switch]$Fix, |
| 288 | [string[]]$ExcludePaths = @() |
| 289 | ) |
| 290 | |
| 291 | if ($Verbose) { |
| 292 | Write-Information "Getting list of git-tracked text files..." -InformationAction Continue |
| 293 | } |
| 294 | |
| 295 | $files = Get-GitTextFile |
| 296 | |
| 297 | # Apply exclusion patterns |
| 298 | if ($ExcludePaths.Count -gt 0) { |
| 299 | $originalCount = $files.Count |
| 300 | $files = $files | Where-Object { |
| 301 | $filePath = $_ |
| 302 | $excluded = $false |
| 303 | foreach ($pattern in $ExcludePaths) { |
| 304 | if ($filePath -like $pattern) { |
| 305 | $excluded = $true |
| 306 | break |
| 307 | } |
| 308 | } |
| 309 | -not $excluded |
| 310 | } |
| 311 | if ($Verbose) { |
| 312 | $excludedCount = $originalCount - $files.Count |
| 313 | Write-Information "Excluded $excludedCount files matching exclusion patterns" -InformationAction Continue |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | if ($Verbose) { |
| 318 | Write-Information "Found $($files.Count) git-tracked text files" -InformationAction Continue |
| 319 | } |
| 320 | |
| 321 | $allLinks = @() |
| 322 | |
| 323 | foreach ($filePath in $files) { |
| 324 | if (-not (Test-Path -Path $filePath -PathType Leaf)) { |
| 325 | if ($Verbose) { |
| 326 | Write-Warning "Skipping $filePath`: not a regular file" |
| 327 | } |
| 328 | continue |
| 329 | } |
| 330 | |
| 331 | if ($Verbose) { |
| 332 | Write-Verbose "Processing $filePath..." |
| 333 | } |
| 334 | |
| 335 | $links = Find-LinksInFile -FilePath $filePath |
| 336 | $allLinks += $links |
| 337 | } |
| 338 | |
| 339 | # Report findings |
| 340 | if ($allLinks.Count -gt 0) { |
| 341 | if ($Fix) { |
| 342 | # Human-readable output when fixing links |
| 343 | if ($Verbose) { |
| 344 | Write-Information "`nFound $($allLinks.Count) URLs containing 'en-us':`n" -InformationAction Continue |
| 345 | foreach ($linkInfo in $allLinks) { |
| 346 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 347 | Write-Information " URL: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 348 | Write-Information "" -InformationAction Continue |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | $filesModified = Repair-AllLink -AllLinks $allLinks |
| 353 | Write-Output "Fixed $($allLinks.Count) URLs in $filesModified files." |
| 354 | |
| 355 | if ($Verbose) { |
| 356 | Write-Information "`nDetails of fixes:" -InformationAction Continue |
| 357 | foreach ($linkInfo in $allLinks) { |
| 358 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 359 | Write-Information " Original: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 360 | Write-Information " Fixed: $($linkInfo.FixedUrl)" -InformationAction Continue |
| 361 | Write-Information "" -InformationAction Continue |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | else { |
| 366 | # JSON output when not fixing links |
| 367 | $jsonOutput = ConvertTo-JsonOutput -Links $allLinks |
| 368 | Write-Output ($jsonOutput | ConvertTo-Json -Depth 3) |
| 369 | } |
| 370 | } |
| 371 | else { |
| 372 | if (-not $Fix) { |
| 373 | # Empty JSON array if no links found |
| 374 | Write-Output "[]" |
| 375 | } |
| 376 | else { |
| 377 | Write-Output "No URLs containing 'en-us' were found." |
| 378 | } |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | #region Main Execution |
| 383 | if ($MyInvocation.InvocationName -ne '.') { |
| 384 | try { |
| 385 | Invoke-LinkLanguageCheck -Fix:$Fix -ExcludePaths $ExcludePaths |
| 386 | exit 0 |
| 387 | } |
| 388 | catch { |
| 389 | Write-Error -ErrorAction Continue "Link-Lang-Check failed: $($_.Exception.Message)" |
| 390 | Write-CIAnnotation -Message $_.Exception.Message -Level Error |
| 391 | exit 1 |
| 392 | } |
| 393 | } |
| 394 | #endregion Main Execution |
| 395 | |