microsoft/hve-core
Publicmirrored fromhttps://github.com/microsoft/hve-coreAvailable
scripts/linting/Link-Lang-Check.ps1
346lines · modecode
| 1 | <# |
| 2 | .SYNOPSIS |
| 3 | Language Path Link Checker and Fixer |
| 4 | |
| 5 | .DESCRIPTION |
| 6 | This script finds and optionally fixes URLs in git-tracked text files that contain |
| 7 | the language path segment 'en-us'. It helps maintain links that work regardless |
| 8 | of user language settings by removing unnecessary language path segments. |
| 9 | |
| 10 | Functionality: |
| 11 | - Scans git-tracked text files for URLs containing 'en-us' |
| 12 | - Identifies link locations by file and line number |
| 13 | - Optionally removes 'en-us/' from URLs to make them language-neutral |
| 14 | - Reports changes in human-readable or JSON format |
| 15 | |
| 16 | .PARAMETER Fix |
| 17 | Fix URLs by removing "en-us/" instead of just reporting them |
| 18 | |
| 19 | .EXAMPLE |
| 20 | # Search for URLs containing 'en-us' and output as JSON |
| 21 | .\Link-Lang-Check.ps1 |
| 22 | |
| 23 | .EXAMPLE |
| 24 | # Fix URLs by removing 'en-us/' with verbose output |
| 25 | .\Link-Lang-Check.ps1 -Fix -Verbose |
| 26 | |
| 27 | .NOTES |
| 28 | The script is designed to help maintain documentation links that work regardless |
| 29 | of the user's language settings in their browser. |
| 30 | |
| 31 | Dependencies: |
| 32 | - git: Required for identifying text files under source control |
| 33 | - PowerShell 5.1 or PowerShell 7+ |
| 34 | |
| 35 | Returns: |
| 36 | - JSON array or console output: When not in fix mode, outputs a JSON array of found links |
| 37 | When in fix mode, outputs human-readable summary of changes |
| 38 | |
| 39 | See Also: |
| 40 | - Microsoft documentation guidance on language neutrality: https://learn.microsoft.com/style-guide/urls-web-addresses |
| 41 | #> |
| 42 | |
| 43 | [CmdletBinding()] |
| 44 | param( |
| 45 | [switch]$Fix |
| 46 | ) |
| 47 | |
| 48 | function Get-GitTextFile { |
| 49 | <# |
| 50 | .SYNOPSIS |
| 51 | Get list of all text files under git source control, excluding binary files. |
| 52 | |
| 53 | .DESCRIPTION |
| 54 | Uses git's built-in binary detection to exclude non-text files from processing. |
| 55 | |
| 56 | .OUTPUTS |
| 57 | System.String[] |
| 58 | A list of file paths to text files tracked by git. |
| 59 | #> |
| 60 | |
| 61 | try { |
| 62 | # Use git's binary detection with -I flag (--no-binary) |
| 63 | $result = & git grep -I --name-only -e '' 2>&1 |
| 64 | |
| 65 | if ($LASTEXITCODE -gt 1) { |
| 66 | Write-Error "Error executing git grep: $result" |
| 67 | return @() |
| 68 | } |
| 69 | |
| 70 | if ($result -and $result.Count -gt 0) { |
| 71 | return $result | Where-Object { $_ -is [string] -and $_.Trim() -ne '' } |
| 72 | } |
| 73 | |
| 74 | return @() |
| 75 | } |
| 76 | catch { |
| 77 | Write-Error "Error getting git text files: $_" |
| 78 | return @() |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | function Find-LinksInFile { |
| 83 | <# |
| 84 | .SYNOPSIS |
| 85 | Find links with 'en-us' in them and return details. |
| 86 | |
| 87 | .DESCRIPTION |
| 88 | Scans the specified file for URLs containing the 'en-us' path segment and |
| 89 | collects information about each occurrence. |
| 90 | |
| 91 | .PARAMETER FilePath |
| 92 | Path to the file to scan |
| 93 | |
| 94 | .OUTPUTS |
| 95 | System.Object[] |
| 96 | A list of objects, each containing information about a link: |
| 97 | - File: The file path |
| 98 | - LineNumber: The line number where the link appears |
| 99 | - OriginalUrl: The original URL with 'en-us' |
| 100 | - FixedUrl: The URL with 'en-us/' removed |
| 101 | #> |
| 102 | |
| 103 | [CmdletBinding()] |
| 104 | param( |
| 105 | [string]$FilePath |
| 106 | ) |
| 107 | |
| 108 | $linksFound = @() |
| 109 | |
| 110 | try { |
| 111 | $lines = Get-Content -Path $FilePath -Encoding UTF8 -ErrorAction Stop |
| 112 | } |
| 113 | catch { |
| 114 | Write-Verbose "Could not read $FilePath`: $_" |
| 115 | return $linksFound |
| 116 | } |
| 117 | |
| 118 | # Regular expression to find URLs containing "en-us/" |
| 119 | $urlPattern = 'https?://[^\s<>"'']+?en-us/[^\s<>"'']+' |
| 120 | |
| 121 | for ($i = 0; $i -lt $lines.Count; $i++) { |
| 122 | $line = $lines[$i] |
| 123 | $urlMatches = [regex]::Matches($line, $urlPattern) |
| 124 | |
| 125 | foreach ($match in $urlMatches) { |
| 126 | $linksFound += [PSCustomObject]@{ |
| 127 | File = $FilePath |
| 128 | LineNumber = $i + 1 |
| 129 | OriginalUrl = $match.Value |
| 130 | FixedUrl = $match.Value -replace 'en-us/', '' |
| 131 | } |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | return $linksFound |
| 136 | } |
| 137 | |
| 138 | function Repair-LinksInFile { |
| 139 | <# |
| 140 | .SYNOPSIS |
| 141 | Fix links in a single file by removing 'en-us/' from URLs. |
| 142 | |
| 143 | .DESCRIPTION |
| 144 | Opens the file, replaces URLs containing 'en-us/' with versions without it, |
| 145 | and writes the changes back to the file. |
| 146 | |
| 147 | .PARAMETER FilePath |
| 148 | Path to the file to modify |
| 149 | |
| 150 | .PARAMETER Links |
| 151 | Array of link objects for the file, each containing: |
| 152 | - OriginalUrl: The original URL to replace |
| 153 | - FixedUrl: The URL to replace it with |
| 154 | |
| 155 | .OUTPUTS |
| 156 | System.Boolean |
| 157 | True if the file was modified, False otherwise |
| 158 | #> |
| 159 | |
| 160 | [CmdletBinding()] |
| 161 | param( |
| 162 | [string]$FilePath, |
| 163 | [PSCustomObject[]]$Links |
| 164 | ) |
| 165 | |
| 166 | try { |
| 167 | $content = Get-Content -Path $FilePath -Raw -Encoding UTF8 -ErrorAction Stop |
| 168 | } |
| 169 | catch { |
| 170 | Write-Verbose "Could not read $FilePath`: $_" |
| 171 | return $false |
| 172 | } |
| 173 | |
| 174 | # Replace each link |
| 175 | $modifiedContent = $content |
| 176 | foreach ($link in $Links) { |
| 177 | $modifiedContent = $modifiedContent -replace [regex]::Escape($link.OriginalUrl), $link.FixedUrl |
| 178 | } |
| 179 | |
| 180 | # Only write if changes were made |
| 181 | if ($modifiedContent -ne $content) { |
| 182 | try { |
| 183 | Set-Content -Path $FilePath -Value $modifiedContent -Encoding UTF8 -NoNewline -ErrorAction Stop |
| 184 | return $true |
| 185 | } |
| 186 | catch { |
| 187 | Write-Verbose "Could not write to $FilePath`: $_" |
| 188 | return $false |
| 189 | } |
| 190 | } |
| 191 | return $false |
| 192 | } |
| 193 | |
| 194 | function Repair-AllLink { |
| 195 | <# |
| 196 | .SYNOPSIS |
| 197 | Fix all links in their respective files. |
| 198 | |
| 199 | .DESCRIPTION |
| 200 | Groups links by file, then calls Repair-LinksInFile for each file. |
| 201 | |
| 202 | .PARAMETER AllLinks |
| 203 | Array of all link objects found across files |
| 204 | |
| 205 | .OUTPUTS |
| 206 | System.Int32 |
| 207 | Number of files that were successfully modified |
| 208 | #> |
| 209 | |
| 210 | [CmdletBinding()] |
| 211 | param( |
| 212 | [PSCustomObject[]]$AllLinks |
| 213 | ) |
| 214 | |
| 215 | # Group links by file |
| 216 | $linksByFile = $AllLinks | Group-Object -Property File |
| 217 | $filesModified = 0 |
| 218 | |
| 219 | # Fix links in each file |
| 220 | foreach ($fileGroup in $linksByFile) { |
| 221 | $filePath = $fileGroup.Name |
| 222 | $links = $fileGroup.Group |
| 223 | |
| 224 | Write-Verbose "Fixing links in $filePath..." |
| 225 | |
| 226 | if (Repair-LinksInFile -FilePath $filePath -Links $links) { |
| 227 | $filesModified++ |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | return $filesModified |
| 232 | } |
| 233 | |
| 234 | function ConvertTo-JsonOutput { |
| 235 | <# |
| 236 | .SYNOPSIS |
| 237 | Prepare links for JSON output by formatting as an array of link objects. |
| 238 | |
| 239 | .DESCRIPTION |
| 240 | Creates a clean representation without internal fields used for processing. |
| 241 | |
| 242 | .PARAMETER Links |
| 243 | The complete array of link objects |
| 244 | |
| 245 | .OUTPUTS |
| 246 | System.Object[] |
| 247 | An array of objects ready for JSON serialization, each containing: |
| 248 | - File: The file path |
| 249 | - LineNumber: The line number where the link appears |
| 250 | - OriginalUrl: The original URL with 'en-us' |
| 251 | #> |
| 252 | |
| 253 | [CmdletBinding()] |
| 254 | param( |
| 255 | [PSCustomObject[]]$Links |
| 256 | ) |
| 257 | |
| 258 | $jsonData = @() |
| 259 | foreach ($link in $Links) { |
| 260 | # Create a copy without the FixedUrl field |
| 261 | $jsonData += [PSCustomObject]@{ |
| 262 | file = $link.File |
| 263 | line_number = $link.LineNumber |
| 264 | original_url = $link.OriginalUrl |
| 265 | } |
| 266 | } |
| 267 | return $jsonData |
| 268 | } |
| 269 | |
| 270 | # Main script execution |
| 271 | try { |
| 272 | if ($Verbose) { |
| 273 | Write-Information "Getting list of git-tracked text files..." -InformationAction Continue |
| 274 | } |
| 275 | |
| 276 | $files = Get-GitTextFile |
| 277 | |
| 278 | if ($Verbose) { |
| 279 | Write-Information "Found $($files.Count) git-tracked text files" -InformationAction Continue |
| 280 | } |
| 281 | |
| 282 | $allLinks = @() |
| 283 | |
| 284 | foreach ($filePath in $files) { |
| 285 | if (-not (Test-Path -Path $filePath -PathType Leaf)) { |
| 286 | if ($Verbose) { |
| 287 | Write-Warning "Skipping $filePath`: not a regular file" |
| 288 | } |
| 289 | continue |
| 290 | } |
| 291 | |
| 292 | if ($Verbose) { |
| 293 | Write-Verbose "Processing $filePath..." |
| 294 | } |
| 295 | |
| 296 | $links = Find-LinksInFile -FilePath $filePath |
| 297 | $allLinks += $links |
| 298 | } |
| 299 | |
| 300 | # Report findings |
| 301 | if ($allLinks.Count -gt 0) { |
| 302 | if ($Fix) { |
| 303 | # Human-readable output when fixing links |
| 304 | if ($Verbose) { |
| 305 | Write-Information "`nFound $($allLinks.Count) URLs containing 'en-us':`n" -InformationAction Continue |
| 306 | foreach ($linkInfo in $allLinks) { |
| 307 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 308 | Write-Information " URL: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 309 | Write-Information "" -InformationAction Continue |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | $filesModified = Repair-AllLink -AllLinks $allLinks |
| 314 | Write-Output "Fixed $($allLinks.Count) URLs in $filesModified files." |
| 315 | |
| 316 | if ($Verbose) { |
| 317 | Write-Information "`nDetails of fixes:" -InformationAction Continue |
| 318 | foreach ($linkInfo in $allLinks) { |
| 319 | Write-Information "File: $($linkInfo.File), Line: $($linkInfo.LineNumber)" -InformationAction Continue |
| 320 | Write-Information " Original: $($linkInfo.OriginalUrl)" -InformationAction Continue |
| 321 | Write-Information " Fixed: $($linkInfo.FixedUrl)" -InformationAction Continue |
| 322 | Write-Information "" -InformationAction Continue |
| 323 | } |
| 324 | } |
| 325 | } |
| 326 | else { |
| 327 | # JSON output when not fixing links |
| 328 | $jsonOutput = ConvertTo-JsonOutput -Links $allLinks |
| 329 | Write-Output ($jsonOutput | ConvertTo-Json -Depth 3) |
| 330 | } |
| 331 | } |
| 332 | else { |
| 333 | if (-not $Fix) { |
| 334 | # Empty JSON array if no links found |
| 335 | Write-Output "[]" |
| 336 | } |
| 337 | else { |
| 338 | Write-Output "No URLs containing 'en-us' were found." |
| 339 | } |
| 340 | } |
| 341 | } |
| 342 | catch { |
| 343 | Write-Error "An error occurred: $_" |
| 344 | exit 1 |
| 345 | } |
| 346 | |
| 347 | |