arche / internal/cli/cmd_grep.go

commit 154431fd
  1package cli
  2
  3import (
  4	"fmt"
  5	"regexp"
  6	"sort"
  7	"strings"
  8
  9	"arche/internal/diff"
 10	"arche/internal/object"
 11	"arche/internal/repo"
 12
 13	"github.com/spf13/cobra"
 14)
 15
 16var (
 17	grepAt      string
 18	grepHistory bool
 19	grepPickaxe bool
 20	grepFixed   bool
 21)
 22
 23var grepCmd = &cobra.Command{
 24	Use:   "grep <pattern>",
 25	Short: "Search file content at a commit or across history",
 26	Args:  cobra.ExactArgs(1),
 27	RunE: func(cmd *cobra.Command, args []string) error {
 28		r := openRepo()
 29		defer r.Close()
 30
 31		pat := args[0]
 32		if grepFixed {
 33			pat = regexp.QuoteMeta(pat)
 34		}
 35		re, err := regexp.Compile(pat)
 36		if err != nil {
 37			return fmt.Errorf("invalid pattern: %w", err)
 38		}
 39
 40		switch {
 41		case grepHistory:
 42			return grepHistory_(r, re)
 43		case grepPickaxe:
 44			return grepPickaxe_(r, re)
 45		default:
 46			ref := grepAt
 47			if ref == "" {
 48				ref = "@"
 49			}
 50			commitID, err := resolveRef(r, ref)
 51			if err != nil {
 52				return err
 53			}
 54			c, err := r.ReadCommit(commitID)
 55			if err != nil {
 56				return err
 57			}
 58			return grepTree(r, c.TreeID, re)
 59		}
 60	},
 61}
 62
 63func grepTree(r *repo.Repo, treeID [32]byte, re *regexp.Regexp) error {
 64	blobs := make(map[string][32]byte)
 65	if err := grepFlattenTree(r, treeID, "", blobs); err != nil {
 66		return err
 67	}
 68
 69	paths := make([]string, 0, len(blobs))
 70	for p := range blobs {
 71		paths = append(paths, p)
 72	}
 73	sort.Strings(paths)
 74
 75	found := false
 76	for _, path := range paths {
 77		blobID := blobs[path]
 78		_, raw, err := r.Store.ReadObject(blobID)
 79		if err != nil {
 80			continue
 81		}
 82		b, err := object.DecodeBlob(raw)
 83		if err != nil {
 84			continue
 85		}
 86		content := string(b.Content)
 87		if strings.ContainsRune(content, 0) {
 88			continue
 89		}
 90		for i, line := range strings.Split(content, "\n") {
 91			if re.MatchString(line) {
 92				fmt.Printf("%s:%d:%s\n", path, i+1, line)
 93				found = true
 94			}
 95		}
 96	}
 97	if !found {
 98		return fmt.Errorf("no matches found")
 99	}
100	return nil
101}
102
103func grepFlattenTree(r *repo.Repo, treeID [32]byte, prefix string, out map[string][32]byte) error {
104	if treeID == object.ZeroID {
105		return nil
106	}
107	t, err := r.ReadTree(treeID)
108	if err != nil {
109		return err
110	}
111	for _, e := range t.Entries {
112		rel := e.Name
113		if prefix != "" {
114			rel = prefix + "/" + e.Name
115		}
116		switch e.Mode {
117		case object.ModeDir:
118			if err := grepFlattenTree(r, e.ObjectID, rel, out); err != nil {
119				return err
120			}
121		default:
122			out[rel] = e.ObjectID
123		}
124	}
125	return nil
126}
127
128func grepHistory_(r *repo.Repo, re *regexp.Regexp) error {
129	_, headID, err := r.HeadCommit()
130	if err != nil {
131		return err
132	}
133
134	type blobSeen struct {
135		path     string
136		changeID string
137		msg      string
138	}
139
140	seen := make(map[[32]byte]bool)
141	blobIndex := make(map[[32]byte]blobSeen)
142
143	queue := [][32]byte{headID}
144	seenCommit := make(map[[32]byte]bool)
145
146	for len(queue) > 0 {
147		id := queue[0]
148		queue = queue[1:]
149		if seenCommit[id] {
150			continue
151		}
152		seenCommit[id] = true
153
154		c, err := r.ReadCommit(id)
155		if err != nil {
156			continue
157		}
158
159		blobs := make(map[string][32]byte)
160		if err := grepFlattenTree(r, c.TreeID, "", blobs); err != nil {
161			continue
162		}
163		for path, blobID := range blobs {
164			if !seen[blobID] {
165				seen[blobID] = true
166				blobIndex[blobID] = blobSeen{path: path, changeID: c.ChangeID, msg: bisectFirstLine(c.Message)}
167			}
168		}
169
170		for _, p := range c.Parents {
171			if !seenCommit[p] {
172				queue = append(queue, p)
173			}
174		}
175	}
176
177	found := false
178	for blobID, info := range blobIndex {
179		content, err := r.ReadBlob(blobID)
180		if err != nil {
181			continue
182		}
183		if strings.ContainsRune(string(content), 0) {
184			continue
185		}
186		for i, line := range strings.Split(string(content), "\n") {
187			if re.MatchString(line) {
188				fmt.Printf("ch:%-8s  %s:%d:%s\n", info.changeID[:8], info.path, i+1, line)
189				found = true
190			}
191		}
192	}
193	if !found {
194		return fmt.Errorf("no matches found in history")
195	}
196	return nil
197}
198
199func grepPickaxe_(r *repo.Repo, re *regexp.Regexp) error {
200	_, headID, err := r.HeadCommit()
201	if err != nil {
202		return err
203	}
204
205	var commits [][32]byte
206	seenCommit := make(map[[32]byte]bool)
207	queue := [][32]byte{headID}
208	for len(queue) > 0 {
209		id := queue[0]
210		queue = queue[1:]
211		if seenCommit[id] {
212			continue
213		}
214		seenCommit[id] = true
215		commits = append(commits, id)
216		c, err := r.ReadCommit(id)
217		if err != nil {
218			continue
219		}
220		for _, p := range c.Parents {
221			if !seenCommit[p] {
222				queue = append(queue, p)
223			}
224		}
225	}
226
227	found := false
228	for _, id := range commits {
229		c, err := r.ReadCommit(id)
230		if err != nil {
231			continue
232		}
233		if len(c.Parents) == 0 {
234			continue
235		}
236
237		parentC, err := r.ReadCommit(c.Parents[0])
238		if err != nil {
239			continue
240		}
241
242		fileDiffs, err := diff.TreeDiff(r, parentC.TreeID, c.TreeID)
243		if err != nil {
244			continue
245		}
246
247		var matchPaths []string
248		for _, fd := range fileDiffs {
249			for _, line := range strings.Split(fd.Patch, "\n") {
250				if len(line) < 1 {
251					continue
252				}
253				ch := line[0]
254				if (ch == '+' || ch == '-') && !strings.HasPrefix(line, "---") && !strings.HasPrefix(line, "+++") {
255					if re.MatchString(line[1:]) {
256						matchPaths = append(matchPaths, fd.Path)
257						break
258					}
259				}
260			}
261		}
262		sort.Strings(matchPaths)
263
264		if len(matchPaths) > 0 {
265			fmt.Printf("ch:%-8s  %s\n", c.ChangeID[:8], bisectFirstLine(c.Message))
266			for _, p := range matchPaths {
267				fmt.Printf("    %s\n", p)
268			}
269			found = true
270		}
271	}
272	if !found {
273		return fmt.Errorf("no commits added or removed a matching line")
274	}
275	return nil
276}
277
278func init() {
279	grepCmd.Flags().StringVar(&grepAt, "at", "", "search at a specific commit/change ID (default: current HEAD)")
280	grepCmd.Flags().BoolVar(&grepHistory, "history", false, "search across all commits reachable from HEAD")
281	grepCmd.Flags().BoolVar(&grepPickaxe, "pickaxe", false, "find commits that added or removed a matching line")
282	grepCmd.Flags().BoolVarP(&grepFixed, "fixed", "F", false, "treat pattern as a literal fixed string (not regex)")
283}