mirror of
https://gitlab.com/gitlab-org/gitlab-foss.git
synced 2025-07-25 16:03:48 +00:00
284 lines
6.8 KiB
Go
284 lines
6.8 KiB
Go
package git
|
|
|
|
import (
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitlab.com/gitlab-org/gitlab/workhorse/internal/log"
|
|
)
|
|
|
|
const (
|
|
// lastModifiedTimeMins is the oldest duration a file archive can be
|
|
lastModifiedTimeMins = 120
|
|
// scanPeriod is the period when the archive cleaner will run
|
|
scanPeriod = time.Hour
|
|
|
|
// For `/path/project-N/sha/@v2/archive.zip`, `find /path -maxdepth 4` will find this file
|
|
maxArchiveDepth = 4
|
|
)
|
|
|
|
// archiveCleaner removes cached repository archives
|
|
// that are generated on-the-fly by Gitaly. These files are stored in the
|
|
// following form (as defined in lib/gitlab/git/repository.rb) and served
|
|
// by GitLab Workhorse:
|
|
//
|
|
// /path/to/repository/downloads/project-N/sha/@v2/archive.format
|
|
//
|
|
// Legacy paths omit the @v2 prefix.
|
|
//
|
|
// For example:
|
|
//
|
|
// /var/opt/gitlab/gitlab-rails/shared/cache/archive/project-1/master/@v2/archive.zip
|
|
type archiveCleaner struct {
|
|
path string
|
|
enabled bool
|
|
shutdown chan struct{}
|
|
mutex sync.Mutex
|
|
}
|
|
|
|
func newArchiveCleaner() *archiveCleaner {
|
|
return &archiveCleaner{shutdown: make(chan struct{})}
|
|
}
|
|
|
|
// RegisterPath registers the root storage path for the archive cache.
|
|
func (s *archiveCleaner) RegisterPath(path string) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
if path != "" {
|
|
absPath, err := filepath.Abs(path)
|
|
if err != nil {
|
|
log.WithFields(log.Fields{"storage_path": path}).WithError(err).Error("archive cleaner unable to resolve path")
|
|
return
|
|
}
|
|
path = absPath
|
|
}
|
|
|
|
switch {
|
|
case s.path == "":
|
|
log.WithFields(log.Fields{"storage_path": path}).Info("archive cleaner path registered")
|
|
case s.path == path:
|
|
return
|
|
default:
|
|
log.WithFields(log.Fields{"old_path": s.path, "new_path": path}).Error("archive cleaner path already set")
|
|
// Abort because this should have been set correctly the first time, and this avoids
|
|
// having to worry about race conditions with the already-running Goroutine.
|
|
if s.enabled {
|
|
return
|
|
}
|
|
}
|
|
|
|
s.path = path
|
|
|
|
if !s.enabled && s.isValidPath() {
|
|
log.Info("archive cleaner starting")
|
|
s.enabled = true
|
|
s.start()
|
|
}
|
|
}
|
|
|
|
// Shutdown gracefully stops the archive cleaner.
|
|
func (s *archiveCleaner) Shutdown() {
|
|
log.Info("archive cleaner: shutting down")
|
|
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
select {
|
|
case <-s.shutdown:
|
|
// already closed
|
|
default:
|
|
close(s.shutdown)
|
|
}
|
|
}
|
|
|
|
func (s *archiveCleaner) isValidPath() bool {
|
|
return s.path != "" && s.path != "/" && isDirectory(s.path)
|
|
}
|
|
|
|
func (s *archiveCleaner) start() {
|
|
// Run Goroutine to clean up old archives
|
|
go func() {
|
|
ticker := time.NewTicker(scanPeriod)
|
|
defer ticker.Stop()
|
|
|
|
// Run once immediately
|
|
s.execute()
|
|
|
|
for {
|
|
select {
|
|
// Then run periodically
|
|
case <-ticker.C:
|
|
s.execute()
|
|
case <-s.shutdown:
|
|
// Received shutdown signal, exit the Goroutine
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (s *archiveCleaner) execute() {
|
|
s.cleanUpOldArchives()
|
|
s.cleanUpEmptyDirectories()
|
|
}
|
|
|
|
// cleanUpOldArchives removes archive files older than the specified time.
|
|
func (s *archiveCleaner) cleanUpOldArchives() {
|
|
cutoffTime := time.Now().Add(-time.Duration(lastModifiedTimeMins) * time.Minute)
|
|
|
|
err := filepath.WalkDir(s.path, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
return s.cleanEntry(path, d, cutoffTime)
|
|
})
|
|
|
|
if err != nil {
|
|
log.WithError(err).Error("error walking archive cleaner path")
|
|
}
|
|
}
|
|
|
|
func (s *archiveCleaner) cleanEntry(path string, d fs.DirEntry, cutoffTime time.Time) error {
|
|
if d.IsDir() {
|
|
// Skip directories beyond our max depth
|
|
relPath, err := filepath.Rel(s.path, path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Count depth (empty string splits to 1 for root path)
|
|
depth := len(strings.Split(relPath, string(os.PathSeparator)))
|
|
if depth > maxArchiveDepth {
|
|
return fs.SkipDir
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Process only archive files
|
|
if isArchiveFile(path) {
|
|
info, err := d.Info()
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Delete if older than cutoff
|
|
if info.ModTime().Before(cutoffTime) {
|
|
log.WithFields(log.Fields{"path": path, "modified_time": info.ModTime()}).Info("Removing old archive")
|
|
err := os.Remove(path)
|
|
|
|
if err != nil {
|
|
log.WithFields(log.Fields{"path": path, "error": err}).Error("failed to remove old archive")
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// cleanUpEmptyDirectories removes empty directories starting from the deepest level.
|
|
func (s *archiveCleaner) cleanUpEmptyDirectories() {
|
|
// We'll need to make multiple passes to handle nested empty directories
|
|
for depth := maxArchiveDepth - 1; depth >= 1; depth-- {
|
|
s.cleanUpEmptyDirectoriesWithDepth(depth)
|
|
}
|
|
}
|
|
|
|
// cleanUpEmptyDirectoriesWithDepth removes empty directories at a specific depth.
|
|
func (s *archiveCleaner) cleanUpEmptyDirectoriesWithDepth(targetDepth int) {
|
|
var dirsToCheck []string
|
|
|
|
// First collect directories at the right depth
|
|
err := filepath.WalkDir(s.path, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil || !d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
relPath, err := filepath.Rel(s.path, path)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Skip root directory
|
|
if relPath == "." {
|
|
return nil
|
|
}
|
|
|
|
// Count depth
|
|
depth := len(strings.Split(relPath, string(os.PathSeparator)))
|
|
if depth == targetDepth {
|
|
dirsToCheck = append(dirsToCheck, path)
|
|
return fs.SkipDir // Skip deeper directories
|
|
} else if depth > targetDepth {
|
|
return fs.SkipDir
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
log.WithError(err).Error("error walking archiveCleaner path for empty directories")
|
|
}
|
|
|
|
// Now check each directory and delete if empty
|
|
for _, dir := range dirsToCheck {
|
|
isEmpty, err := isDirEmpty(dir)
|
|
if err == nil && isEmpty {
|
|
log.WithFields(log.Fields{"path": dir}).Info("Removing empty directory")
|
|
removeErr := os.Remove(dir)
|
|
|
|
if removeErr != nil {
|
|
log.WithFields(log.Fields{"path": dir}).WithError(err).Error("error removing directory")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// isArchiveFile checks if a file has an archive extension.
|
|
func isArchiveFile(path string) bool {
|
|
ext := strings.ToLower(filepath.Ext(path))
|
|
switch ext {
|
|
// Allowed formats: https://docs.gitlab.com/api/repositories/#get-file-archive
|
|
// These get mapped to `Gitaly::GetArchiveRequest::Format` types (see lib/gitlab/workhorse.rb).
|
|
case ".tar", ".bz2", ".zip":
|
|
return true
|
|
case ".gz":
|
|
// Only accept .gz if it's .tar.gz
|
|
return strings.HasSuffix(strings.ToLower(path), ".tar.gz")
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// isDirEmpty checks if a directory is empty.
|
|
func isDirEmpty(dir string) (bool, error) {
|
|
f, err := os.Open(filepath.Clean(dir))
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
|
|
_, err = f.Readdirnames(1)
|
|
if err == nil {
|
|
// Directory is not empty
|
|
return false, nil
|
|
}
|
|
|
|
// Directory is empty if we got EOF
|
|
return true, nil
|
|
}
|
|
|
|
func isDirectory(path string) bool {
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
return info.IsDir()
|
|
}
|