arche / internal/cli/cmd_gc.go

commit 154431fd
  1package cli
  2
  3import (
  4	"fmt"
  5	"os"
  6	"strings"
  7
  8	"arche/internal/store"
  9
 10	"github.com/spf13/cobra"
 11)
 12
 13var (
 14	gcRetentionDays int
 15	gcTrainDict     bool
 16)
 17
 18var gcCmd = &cobra.Command{
 19	Use:   "gc",
 20	Short: "Run garbage collection on the repository",
 21	Long: `Mark-and-sweep garbage collection that removes objects no longer reachable
 22from any bookmark, HEAD, or operation log entry.
 23
 24GC proceeds in four steps:
 25  1. Collect roots: all bookmark commits, current HEAD commit, and every commit
 26     hash ever recorded in the operation log (to preserve full undo history).
 27  2. Mark: traverse the commit/tree/blob DAG from every root and mark all
 28     reachable objects as live.
 29  3. Sweep: delete any SQLite-stored object not in the live set.
 30  4. Repack: copy only live pack-file blobs into new pack files, update
 31     pack_index atomically, and delete the old pack files.
 32
 33ObsoleteMarkers are retained if their predecessor commit is still live, or if
 34they were created within --retention days (default 90). Older markers whose
 35predecessor has been swept are also removed.`,
 36	RunE: func(cmd *cobra.Command, args []string) error {
 37		r := openRepo()
 38		defer r.Close()
 39
 40		gcer, ok := r.Store.(store.GCer)
 41		if !ok {
 42			return fmt.Errorf("store does not support GC")
 43		}
 44
 45		isatty := isTerminal(os.Stdout)
 46		var lastPhase string
 47
 48		progress := func(phase string, done, total int) {
 49			if !isatty {
 50				if phase != lastPhase {
 51					lastPhase = phase
 52					switch phase {
 53					case "roots":
 54						fmt.Println("Collecting roots...")
 55					case "mark":
 56						fmt.Println("Marking reachable objects...")
 57					case "sweep":
 58						fmt.Println("Sweeping unreachable objects...")
 59					case "repack":
 60						fmt.Printf("Repacking: %d/%d blobs\n", done, total)
 61					}
 62				}
 63				return
 64			}
 65
 66			var line string
 67			switch phase {
 68			case "roots":
 69				line = "  Collecting roots …"
 70			case "mark":
 71				if total > 0 {
 72					line = fmt.Sprintf("  Marking objects   %s  %d/%d roots",
 73						progressBar(done, total, 24), done, total)
 74				} else {
 75					line = "  Marking objects …"
 76				}
 77			case "sweep":
 78				line = "  Sweeping objects …"
 79			case "repack":
 80				if total > 0 {
 81					line = fmt.Sprintf("  Repacking blobs   %s  %d/%d",
 82						progressBar(done, total, 24), done, total)
 83				} else {
 84					line = "  Repacking blobs …"
 85				}
 86			}
 87			fmt.Printf("\r\033[K%s", line)
 88		}
 89
 90		if isatty {
 91			fmt.Printf("GC > retention %d days\n", gcRetentionDays)
 92		} else {
 93			fmt.Printf("Running garbage collection (retention: %d days)...\n", gcRetentionDays)
 94		}
 95
 96		stats, err := gcer.GC(gcRetentionDays, progress)
 97		if err != nil {
 98			if isatty {
 99				fmt.Println()
100			}
101			return err
102		}
103
104		if isatty {
105			fmt.Println()
106		}
107
108		if stats.ObjectsDeleted == 0 && stats.PackEntriesDeleted == 0 {
109			fmt.Println("Nothing to collect — repository is already clean.")
110			return nil
111		}
112
113		fmt.Printf("Objects removed:       %d\n", stats.ObjectsDeleted)
114		fmt.Printf("Pack entries removed:  %d\n", stats.PackEntriesDeleted)
115		if stats.PackFilesRebuilt > 0 {
116			fmt.Printf("Pack files rebuilt:    %d\n", stats.PackFilesRebuilt)
117		}
118		if stats.BytesFreed > 0 {
119			fmt.Printf("Disk space freed:      %s\n", formatBytes(stats.BytesFreed))
120		}
121
122		if gcTrainDict {
123			dt, ok := r.Store.(store.DictTrainer)
124			if !ok {
125				return fmt.Errorf("store does not support dictionary training")
126			}
127			if isatty {
128				fmt.Print("Training zstd dictionary from sampled blobs…")
129			} else {
130				fmt.Println("Training zstd dictionary...")
131			}
132			if err := dt.TrainAndSaveDict(); err != nil {
133				if isatty {
134					fmt.Println()
135				}
136				return fmt.Errorf("dict training: %w", err)
137			}
138			if isatty {
139				fmt.Println(" done")
140			} else {
141				fmt.Println("Dictionary saved. Future compressions will use it.")
142			}
143		}
144		return nil
145	},
146}
147
148func init() {
149	gcCmd.Flags().IntVar(&gcRetentionDays, "retention", 90,
150		"Keep ObsoleteMarkers created within this many days even if predecessor is unreachable")
151	gcCmd.Flags().BoolVar(&gcTrainDict, "train-dict", false,
152		"Train a zstd compression dictionary from sampled blobs after GC (zstd only)")
153}
154
155func progressBar(done, total, width int) string {
156	if total <= 0 {
157		return "[" + strings.Repeat("░", width) + "]"
158	}
159	filled := done * width / total
160	if filled > width {
161		filled = width
162	}
163	return "[" + strings.Repeat("█", filled) + strings.Repeat("░", width-filled) + "]"
164}
165
166func formatBytes(n int64) string {
167	switch {
168	case n >= 1<<30:
169		return fmt.Sprintf("%.1f GiB", float64(n)/(1<<30))
170	case n >= 1<<20:
171		return fmt.Sprintf("%.1f MiB", float64(n)/(1<<20))
172	case n >= 1<<10:
173		return fmt.Sprintf("%.1f KiB", float64(n)/(1<<10))
174	default:
175		return fmt.Sprintf("%d B", n)
176	}
177}