cloudflare/pint
Publicmirrored fromhttps://github.com/cloudflare/pintAvailable
internal/git/changes.go
390lines · modecode
| 1 | package git |
| 2 | |
| 3 | import ( |
| 4 | "bufio" |
| 5 | "bytes" |
| 6 | "fmt" |
| 7 | "log/slog" |
| 8 | "os" |
| 9 | "path" |
| 10 | "slices" |
| 11 | "strings" |
| 12 | ) |
| 13 | |
| 14 | type FileStatus rune |
| 15 | |
| 16 | const ( |
| 17 | FileAdded FileStatus = 'A' |
| 18 | FileCopied FileStatus = 'C' |
| 19 | FileDeleted FileStatus = 'D' |
| 20 | FileRenamed FileStatus = 'R' |
| 21 | FileModified FileStatus = 'M' |
| 22 | FileTypeChanged FileStatus = 'T' |
| 23 | ) |
| 24 | |
| 25 | type PathType uint8 |
| 26 | |
| 27 | const ( |
| 28 | Missing PathType = iota |
| 29 | Dir |
| 30 | File |
| 31 | Symlink |
| 32 | ) |
| 33 | |
| 34 | type TypeDiff struct { |
| 35 | Before PathType |
| 36 | After PathType |
| 37 | } |
| 38 | |
| 39 | type BodyDiff struct { |
| 40 | Before []byte |
| 41 | After []byte |
| 42 | ModifiedLines []int |
| 43 | } |
| 44 | |
| 45 | type Path struct { |
| 46 | Name string |
| 47 | SymlinkTarget string |
| 48 | Type PathType |
| 49 | } |
| 50 | |
| 51 | func (p Path) EffectivePath() string { |
| 52 | if p.SymlinkTarget != "" && p.Name != p.SymlinkTarget { |
| 53 | return p.SymlinkTarget |
| 54 | } |
| 55 | return p.Name |
| 56 | } |
| 57 | |
| 58 | type PathDiff struct { |
| 59 | Before Path |
| 60 | After Path |
| 61 | } |
| 62 | |
| 63 | type FileChange struct { |
| 64 | Path PathDiff |
| 65 | Body BodyDiff |
| 66 | Commits []string |
| 67 | Status FileStatus |
| 68 | } |
| 69 | |
| 70 | func Changes(cmd CommandRunner, baseBranch string, filter PathFilter) ([]*FileChange, error) { |
| 71 | out, err := cmd("log", "--reverse", "--no-merges", "--first-parent", "--format=%H", "--name-status", baseBranch+"..HEAD") |
| 72 | if err != nil { |
| 73 | return nil, fmt.Errorf("failed to get the list of modified files from git: %w", err) |
| 74 | } |
| 75 | |
| 76 | var changes []*FileChange |
| 77 | var commit string |
| 78 | s := bufio.NewScanner(bytes.NewReader(out)) |
| 79 | for s.Scan() { |
| 80 | line := s.Text() |
| 81 | |
| 82 | parts := strings.Split(line, "\t") |
| 83 | |
| 84 | if len(parts) == 0 { |
| 85 | continue |
| 86 | } |
| 87 | |
| 88 | if len(parts) == 1 { |
| 89 | if parts[0] != "" { |
| 90 | commit = parts[0] |
| 91 | } |
| 92 | continue |
| 93 | } |
| 94 | |
| 95 | status := FileStatus(parts[0][0]) |
| 96 | srcPath := parts[1] |
| 97 | dstPath := parts[len(parts)-1] |
| 98 | slog.Debug("Git file change", slog.String("change", parts[0]), slog.String("path", dstPath), slog.String("commit", commit)) |
| 99 | |
| 100 | if !filter.IsPathAllowed(dstPath) { |
| 101 | slog.Debug("Skipping file due to include/exclude rules", slog.String("path", dstPath)) |
| 102 | continue |
| 103 | } |
| 104 | |
| 105 | // This should never really happen since git doesn't track directories, only files. |
| 106 | if isDir, _ := isDirectoryPath(dstPath); isDir { |
| 107 | slog.Debug("Skipping directory entry change", slog.String("path", dstPath)) |
| 108 | continue |
| 109 | } |
| 110 | |
| 111 | // Rest is populated inside the next loop. |
| 112 | change := &FileChange{ // nolint: exhaustruct |
| 113 | Status: status, |
| 114 | Path: PathDiff{ // nolint: exhaustruct |
| 115 | After: Path{ // nolint: exhaustruct |
| 116 | Name: dstPath, |
| 117 | }, |
| 118 | }, |
| 119 | } |
| 120 | |
| 121 | prev := getChangeByPath(changes, srcPath) |
| 122 | slog.Debug("Looking for previous changes", |
| 123 | slog.String("src", srcPath), |
| 124 | slog.String("dst", dstPath), |
| 125 | slog.String("commit", commit), |
| 126 | ) |
| 127 | if prev != nil { |
| 128 | slog.Debug("Found a previous change", |
| 129 | slog.Any("commits", prev.Commits), |
| 130 | slog.String("status", string(prev.Status)), |
| 131 | slog.String("path", prev.Path.Before.Name), |
| 132 | slog.String("target", prev.Path.Before.SymlinkTarget), |
| 133 | slog.Any("type", prev.Path.Before.Type), |
| 134 | ) |
| 135 | change.Commits = append(change.Commits, prev.Commits...) |
| 136 | change.Path.Before = prev.Path.Before |
| 137 | // Remove any changes for "BEFORE" path we might already have |
| 138 | changes = changesWithout(changes, srcPath) |
| 139 | } else { |
| 140 | slog.Debug("No previous change found") |
| 141 | switch change.Status { |
| 142 | case FileAdded, FileCopied: |
| 143 | change.Path.Before.Name = "" |
| 144 | change.Path.Before.SymlinkTarget = "" |
| 145 | // If a path changed type we'll see A but we can still query for old type. |
| 146 | change.Path.Before.Type = getTypeForPath(cmd, commit+"^", srcPath) |
| 147 | if change.Path.Before.Type != Missing { |
| 148 | // If it was a type change then |
| 149 | change.Path.Before.Name = srcPath |
| 150 | change.Path.Before.Type = getTypeForPath(cmd, commit+"^", srcPath) |
| 151 | } |
| 152 | case FileDeleted, FileRenamed, FileModified, FileTypeChanged: |
| 153 | change.Path.Before.Name = srcPath |
| 154 | change.Path.Before.Type = getTypeForPath(cmd, commit+"^", srcPath) |
| 155 | change.Path.Before.SymlinkTarget = resolveSymlinkTarget(cmd, commit+"^", srcPath, change.Path.Before.Type) |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | change.Commits = append(change.Commits, commit) |
| 160 | |
| 161 | changes = append(changes, change) |
| 162 | } |
| 163 | |
| 164 | slog.Debug("Parsed git log", slog.Int("changes", len(changes))) |
| 165 | |
| 166 | for _, change := range changes { |
| 167 | slog.Debug( |
| 168 | "File change", |
| 169 | slog.Any("commits", change.Commits), |
| 170 | slog.String("status", string(change.Status)), |
| 171 | slog.String("before", change.Path.Before.Name), |
| 172 | slog.String("after", change.Path.After.Name), |
| 173 | ) |
| 174 | |
| 175 | if change.Path.Before.Name != "" { |
| 176 | change.Path.Before.Type = getTypeForPath(cmd, change.Commits[0]+"^", change.Path.Before.Name) |
| 177 | change.Path.Before.SymlinkTarget = resolveSymlinkTarget(cmd, change.Commits[0]+"^", change.Path.Before.Name, change.Path.Before.Type) |
| 178 | change.Body.Before = getContentAtCommit(cmd, change.Commits[0]+"^", change.Path.Before.EffectivePath()) |
| 179 | } |
| 180 | |
| 181 | lastCommit := change.Commits[len(change.Commits)-1] |
| 182 | if change.Path.After.Name != "" && change.Status != FileDeleted { |
| 183 | change.Path.After.Type = getTypeForPath(cmd, lastCommit, change.Path.After.Name) |
| 184 | change.Path.After.SymlinkTarget = resolveSymlinkTarget(cmd, lastCommit, change.Path.After.Name, change.Path.After.Type) |
| 185 | change.Body.After = getContentAtCommit(cmd, lastCommit, change.Path.After.EffectivePath()) |
| 186 | } |
| 187 | |
| 188 | slog.Debug( |
| 189 | "Updated file change", |
| 190 | slog.Any("commits", change.Commits), |
| 191 | slog.String("before.path", change.Path.Before.Name), |
| 192 | slog.String("before.target", change.Path.Before.SymlinkTarget), |
| 193 | slog.Any("before.type", change.Path.Before.Type), |
| 194 | slog.String("before.body", string(change.Body.Before)), |
| 195 | slog.String("after.path", change.Path.After.Name), |
| 196 | slog.String("after.target", change.Path.After.SymlinkTarget), |
| 197 | slog.Any("after.type", change.Path.After.Type), |
| 198 | slog.String("after.body", string(change.Body.After)), |
| 199 | slog.Any("modifiedLines", change.Body.ModifiedLines), |
| 200 | ) |
| 201 | |
| 202 | switch { |
| 203 | case change.Path.Before.Type != Missing && change.Path.After.Type == Symlink: |
| 204 | slog.Debug("File was turned into a symlink", slog.String("path", change.Path.After.Name)) |
| 205 | change.Body.ModifiedLines = CountLines(change.Body.After) |
| 206 | case change.Path.Before.Type != Missing && change.Path.After.Type != Missing && change.Path.After.Type != Symlink: |
| 207 | change.Body.ModifiedLines, err = getModifiedLines(cmd, change.Commits, change.Path.After.EffectivePath(), lastCommit, change.Body.Before, change.Body.After) |
| 208 | if err != nil { |
| 209 | return nil, fmt.Errorf("failed to run git blame for %s: %w", change.Path.After.EffectivePath(), err) |
| 210 | } |
| 211 | if len(change.Body.ModifiedLines) == 0 && change.Path.Before.EffectivePath() != change.Path.After.EffectivePath() { |
| 212 | // File was moved or renamed. Mark it all as modified. |
| 213 | change.Body.ModifiedLines = CountLines(change.Body.After) |
| 214 | slog.Debug("File was moved or renamed", slog.String("path", change.Path.After.Name)) |
| 215 | } else { |
| 216 | slog.Debug("File was modified", slog.String("path", change.Path.After.Name), slog.Any("lines", change.Body.ModifiedLines)) |
| 217 | } |
| 218 | case change.Path.Before.Type == Symlink && change.Path.After.Type == Symlink: |
| 219 | slog.Debug("Symlink was modified", slog.String("path", change.Path.After.Name)) |
| 220 | // symlink was modified, every source line is modification |
| 221 | change.Body.ModifiedLines = CountLines(change.Body.After) |
| 222 | case change.Path.Before.Type == Missing && change.Path.After.Type != Missing: |
| 223 | slog.Debug("File was added", slog.String("path", change.Path.After.Name)) |
| 224 | // old file body is empty, meaning that every line was modified |
| 225 | change.Body.ModifiedLines = CountLines(change.Body.After) |
| 226 | case change.Path.Before.Type != Missing && change.Path.After.Type == Missing: |
| 227 | slog.Debug("File was removed", slog.String("path", change.Path.After.Name)) |
| 228 | // new file body is empty, meaning that every line was modified |
| 229 | change.Body.ModifiedLines = CountLines(change.Body.Before) |
| 230 | case change.Path.Before.Type == Missing && change.Path.After.Type == Missing: |
| 231 | slog.Debug("File was added and removed", slog.String("path", change.Path.After.Name)) |
| 232 | // file was added and then removed |
| 233 | change.Body.ModifiedLines = []int{} |
| 234 | default: |
| 235 | slog.Warn("Unhandled change", slog.String("change", fmt.Sprintf("+%v", change))) |
| 236 | } |
| 237 | |
| 238 | if change.Path.Before.Name == change.Path.Before.SymlinkTarget { |
| 239 | change.Path.Before.SymlinkTarget = "" |
| 240 | } |
| 241 | if change.Path.After.Name == change.Path.After.SymlinkTarget { |
| 242 | change.Path.After.SymlinkTarget = "" |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | return changes, nil |
| 247 | } |
| 248 | |
| 249 | func changesWithout(changes []*FileChange, fpath string) []*FileChange { |
| 250 | return slices.DeleteFunc(changes, func(e *FileChange) bool { |
| 251 | return e.Path.After.Name == fpath |
| 252 | }) |
| 253 | } |
| 254 | |
| 255 | func getChangeByPath(changes []*FileChange, fpath string) *FileChange { |
| 256 | for _, c := range changes { |
| 257 | if c.Path.After.Name == fpath { |
| 258 | return c |
| 259 | } |
| 260 | } |
| 261 | return nil |
| 262 | } |
| 263 | |
| 264 | func getModifiedLines(cmd CommandRunner, commits []string, fpath, atCommit string, bodyBefore, bodyAfter []byte) ([]int, error) { |
| 265 | slog.Debug("Getting list of modified lines", |
| 266 | slog.Any("commits", commits), |
| 267 | slog.String("path", fpath), |
| 268 | ) |
| 269 | lines, err := Blame(cmd, fpath, atCommit) |
| 270 | if err != nil { |
| 271 | return nil, err |
| 272 | } |
| 273 | |
| 274 | linesBefore := bytes.Split(bodyBefore, []byte("\n")) |
| 275 | linesAfter := bytes.Split(bodyAfter, []byte("\n")) |
| 276 | slog.Debug("Number of lines", slog.Int("before", len(linesBefore)), slog.Int("after", len(linesAfter))) |
| 277 | |
| 278 | modLines := make([]int, 0, len(lines)) |
| 279 | for _, line := range lines { |
| 280 | slog.Debug("Checking line", slog.String("commit", line.Commit), slog.Int("prev", line.PrevLine), slog.Int("line", line.Line)) |
| 281 | if !slices.Contains(commits, line.Commit) { |
| 282 | continue |
| 283 | } |
| 284 | |
| 285 | if line.PrevLine <= len(linesBefore) && line.Line <= len(linesAfter) { |
| 286 | slog.Debug("Checking line content", slog.String("before", string(linesBefore[line.PrevLine-1])), slog.String("after", string(linesAfter[line.Line-1]))) |
| 287 | if bytes.Equal(linesBefore[line.PrevLine-1], linesAfter[line.Line-1]) { |
| 288 | continue |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | modLines = append(modLines, line.Line) |
| 293 | } |
| 294 | slog.Debug("List of modified lines", |
| 295 | slog.Any("commits", commits), |
| 296 | slog.String("path", fpath), |
| 297 | slog.Any("lines", modLines), |
| 298 | ) |
| 299 | return modLines, nil |
| 300 | } |
| 301 | |
| 302 | func getTypeForPath(cmd CommandRunner, commit, fpath string) PathType { |
| 303 | args := []string{"ls-tree", commit, fpath} |
| 304 | out, err := cmd(args...) |
| 305 | if err != nil { |
| 306 | slog.Debug("git command returned an error", slog.Any("err", err), slog.String("args", fmt.Sprint(args))) |
| 307 | return Missing |
| 308 | } |
| 309 | |
| 310 | s := bufio.NewScanner(bytes.NewReader(out)) |
| 311 | for s.Scan() { |
| 312 | parts := strings.SplitN(s.Text(), " ", 3) |
| 313 | if len(parts) != 3 { |
| 314 | continue |
| 315 | } |
| 316 | objmode := parts[0] |
| 317 | objtype := parts[1] |
| 318 | |
| 319 | parts = strings.SplitN(parts[2], "\t", 2) |
| 320 | if len(parts) != 2 { |
| 321 | continue |
| 322 | } |
| 323 | objpath := parts[1] |
| 324 | slog.Debug("ls-tree line", |
| 325 | slog.String("mode", objmode), |
| 326 | slog.String("type", objtype), |
| 327 | slog.String("path", objpath), |
| 328 | ) |
| 329 | |
| 330 | // not our file |
| 331 | if objpath != fpath { |
| 332 | continue |
| 333 | } |
| 334 | if objtype == "tree" { |
| 335 | return Dir |
| 336 | } |
| 337 | // not a blob - could be a tree or a tag |
| 338 | if objtype != "blob" { |
| 339 | continue |
| 340 | } |
| 341 | |
| 342 | if objmode == "120000" { |
| 343 | return Symlink |
| 344 | } |
| 345 | |
| 346 | return File |
| 347 | } |
| 348 | |
| 349 | return Missing |
| 350 | } |
| 351 | |
| 352 | // recursively find the final target of a symlink. |
| 353 | func resolveSymlinkTarget(cmd CommandRunner, commit, fpath string, typ PathType) string { |
| 354 | if typ != Symlink { |
| 355 | return fpath |
| 356 | } |
| 357 | raw := string(getContentAtCommit(cmd, commit, fpath)) |
| 358 | spath := path.Clean(path.Join(path.Dir(fpath), raw)) |
| 359 | stype := getTypeForPath(cmd, commit, spath) |
| 360 | return resolveSymlinkTarget(cmd, commit, spath, stype) |
| 361 | } |
| 362 | |
| 363 | func getContentAtCommit(cmd CommandRunner, commit, fpath string) []byte { |
| 364 | args := []string{"cat-file", "blob", fmt.Sprintf("%s:%s", commit, fpath)} |
| 365 | body, err := cmd(args...) |
| 366 | if err != nil { |
| 367 | slog.Debug("git command returned an error", slog.Any("err", err), slog.String("args", fmt.Sprint(args))) |
| 368 | return nil |
| 369 | } |
| 370 | return body |
| 371 | } |
| 372 | |
| 373 | func CountLines(body []byte) (lines []int) { |
| 374 | var line int |
| 375 | s := bufio.NewScanner(bytes.NewReader(body)) |
| 376 | for s.Scan() { |
| 377 | line++ |
| 378 | lines = append(lines, line) |
| 379 | } |
| 380 | return lines |
| 381 | } |
| 382 | |
| 383 | func isDirectoryPath(path string) (bool, error) { |
| 384 | fileInfo, err := os.Stat(path) |
| 385 | if err != nil { |
| 386 | return false, err |
| 387 | } |
| 388 | |
| 389 | return fileInfo.IsDir(), err |
| 390 | } |
| 391 | |