aboutsummaryrefslogtreecommitdiff
path: root/weed/filer/empty_folder_cleanup/cleanup_queue.go
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-12-03 21:12:19 -0800
committerGitHub <noreply@github.com>2025-12-03 21:12:19 -0800
commit39ba19eea6d47a5d35c67064d560fb569c6c5baf (patch)
tree6213a3d8acd5698964eb200555cd276c3c3285fe /weed/filer/empty_folder_cleanup/cleanup_queue.go
parent268cc84e8c8629c4824d4cc30c79cc8dac0a5142 (diff)
downloadseaweedfs-39ba19eea6d47a5d35c67064d560fb569c6c5baf.tar.xz
seaweedfs-39ba19eea6d47a5d35c67064d560fb569c6c5baf.zip
filer: async empty folder cleanup via metadata events (#7614)
* filer: async empty folder cleanup via metadata events Implements asynchronous empty folder cleanup when files are deleted in S3. Key changes: 1. EmptyFolderCleaner - New component that handles folder cleanup: - Uses consistent hashing (LockRing) to determine folder ownership - Each filer owns specific folders, avoiding duplicate cleanup work - Debounces delete events (10s delay) to batch multiple deletes - Caches rough folder counts to skip unnecessary checks - Cancels pending cleanup when new files are created - Handles both file and subdirectory deletions 2. Integration with metadata events: - Listens to both local and remote filer metadata events - Processes create/delete/rename events to track folder state - Only processes folders under /buckets/<bucket>/... 3. Removed synchronous empty folder cleanup from S3 handlers: - DeleteObjectHandler no longer calls DoDeleteEmptyParentDirectories - DeleteMultipleObjectsHandler no longer tracks/cleans directories - Cleanup now happens asynchronously via metadata events Benefits: - Non-blocking: S3 delete requests return immediately - Coordinated: Only one filer (the owner) cleans each folder - Efficient: Batching and caching reduce unnecessary checks - Event-driven: Folder deletion triggers parent folder check automatically * filer: add CleanupQueue data structure for deduplicated folder cleanup CleanupQueue uses a linked list for FIFO ordering and a hashmap for O(1) deduplication. Processing is triggered when: - Queue size reaches maxSize (default 1000), OR - Oldest item exceeds maxAge (default 10 minutes) Key features: - O(1) Add, Remove, Pop, Contains operations - Duplicate folders are ignored (keeps original position/time) - Testable with injectable time function - Thread-safe with mutex protection * filer: use CleanupQueue for empty folder cleanup Replace timer-per-folder approach with queue-based processing: - Use CleanupQueue for deduplication and ordered processing - Process queue when full (1000 items) or oldest item exceeds 10 minutes - Background processor checks queue every 10 seconds - Remove from queue on create events to cancel pending cleanup Benefits: - Bounded memory: queue has max size, not unlimited timers - Efficient: O(1) add/remove/contains operations - Batch processing: handle many folders efficiently - Better for high-volume delete scenarios * filer: CleanupQueue.Add moves duplicate to back with updated time When adding a folder that already exists in the queue: - Remove it from its current position - Add it to the back of the queue - Update the queue time to current time This ensures that folders with recent delete activity are processed later, giving more time for additional deletes to occur. * filer: CleanupQueue uses event time and inserts in sorted order Changes: - Add() now takes eventTime parameter instead of using current time - Insert items in time-sorted order (oldest at front) to handle out-of-order events - When updating duplicate with newer time, reposition to maintain sort order - Ignore updates with older time (keep existing later time) This ensures proper ordering when processing events from distributed filers where event arrival order may not match event occurrence order. * filer: remove unused CleanupQueue functions (SetNowFunc, GetAll) Removed test-only functions: - SetNowFunc: tests now use real time with past event times - GetAll: tests now use Pop() to verify order Kept functions used in production: - Peek: used in filer_notify_read.go - OldestAge: used in empty_folder_cleaner.go logging * filer: initialize cache entry on first delete/create event Previously, roughCount was only updated if the cache entry already existed, but entries were only created during executeCleanup. This meant delete/create events before the first cleanup didn't track the count. Now create the cache entry on first event, so roughCount properly tracks all changes from the start. * filer: skip adding to cleanup queue if roughCount > 0 If the cached roughCount indicates there are still items in the folder, don't bother adding it to the cleanup queue. This avoids unnecessary queue entries and reduces wasted cleanup checks. * filer: don't create cache entry on create event Only update roughCount if the folder is already being tracked. New folders don't need tracking until we see a delete event. * filer: move empty folder cleanup to its own package - Created weed/filer/empty_folder_cleanup package - Defined FilerOperations interface to break circular dependency - Added CountDirectoryEntries method to Filer - Exported IsUnderPath and IsUnderBucketPath helper functions * filer: make isUnderPath and isUnderBucketPath private These helpers are only used within the empty_folder_cleanup package.
Diffstat (limited to 'weed/filer/empty_folder_cleanup/cleanup_queue.go')
-rw-r--r--weed/filer/empty_folder_cleanup/cleanup_queue.go206
1 files changed, 206 insertions, 0 deletions
diff --git a/weed/filer/empty_folder_cleanup/cleanup_queue.go b/weed/filer/empty_folder_cleanup/cleanup_queue.go
new file mode 100644
index 000000000..66889e930
--- /dev/null
+++ b/weed/filer/empty_folder_cleanup/cleanup_queue.go
@@ -0,0 +1,206 @@
+package empty_folder_cleanup
+
+import (
+ "container/list"
+ "sync"
+ "time"
+)
+
+// CleanupQueue manages a deduplicated queue of folders pending cleanup.
+// It uses a doubly-linked list ordered by event time (oldest at front) and a map for O(1) deduplication.
+// Processing is triggered when:
+// - Queue size reaches maxSize, OR
+// - Oldest item exceeds maxAge
+type CleanupQueue struct {
+ mu sync.Mutex
+ items *list.List // Linked list of *queueItem ordered by time (front = oldest)
+ itemsMap map[string]*list.Element // folder -> list element for O(1) lookup
+ maxSize int // Max queue size before triggering cleanup
+ maxAge time.Duration // Max age before triggering cleanup
+}
+
+// queueItem represents an item in the cleanup queue
+type queueItem struct {
+ folder string
+ queueTime time.Time
+}
+
+// NewCleanupQueue creates a new CleanupQueue with the specified limits
+func NewCleanupQueue(maxSize int, maxAge time.Duration) *CleanupQueue {
+ return &CleanupQueue{
+ items: list.New(),
+ itemsMap: make(map[string]*list.Element),
+ maxSize: maxSize,
+ maxAge: maxAge,
+ }
+}
+
+// Add adds a folder to the queue with the specified event time.
+// The item is inserted in time-sorted order (oldest at front) to handle out-of-order events.
+// If folder already exists with an older time, the time is updated and position adjusted.
+// Returns true if the folder was newly added, false if it was updated.
+func (q *CleanupQueue) Add(folder string, eventTime time.Time) bool {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ // Check if folder already exists
+ if elem, exists := q.itemsMap[folder]; exists {
+ existingItem := elem.Value.(*queueItem)
+ // Only update if new event is later
+ if eventTime.After(existingItem.queueTime) {
+ // Remove from current position
+ q.items.Remove(elem)
+ // Re-insert with new time in sorted position
+ newElem := q.insertSorted(folder, eventTime)
+ q.itemsMap[folder] = newElem
+ }
+ return false
+ }
+
+ // Insert new folder in sorted position
+ elem := q.insertSorted(folder, eventTime)
+ q.itemsMap[folder] = elem
+ return true
+}
+
+// insertSorted inserts an item in the correct position to maintain time ordering (oldest at front)
+func (q *CleanupQueue) insertSorted(folder string, eventTime time.Time) *list.Element {
+ item := &queueItem{
+ folder: folder,
+ queueTime: eventTime,
+ }
+
+ // Find the correct position (insert before the first item with a later time)
+ for elem := q.items.Back(); elem != nil; elem = elem.Prev() {
+ existingItem := elem.Value.(*queueItem)
+ if !eventTime.Before(existingItem.queueTime) {
+ // Insert after this element
+ return q.items.InsertAfter(item, elem)
+ }
+ }
+
+ // This item is the oldest, insert at front
+ return q.items.PushFront(item)
+}
+
+// Remove removes a specific folder from the queue (e.g., when a file is created).
+// Returns true if the folder was found and removed.
+func (q *CleanupQueue) Remove(folder string) bool {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ elem, exists := q.itemsMap[folder]
+ if !exists {
+ return false
+ }
+
+ q.items.Remove(elem)
+ delete(q.itemsMap, folder)
+ return true
+}
+
+// ShouldProcess returns true if the queue should be processed.
+// This is true when:
+// - Queue size >= maxSize, OR
+// - Oldest item age > maxAge
+func (q *CleanupQueue) ShouldProcess() bool {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ return q.shouldProcessLocked()
+}
+
+// shouldProcessLocked checks if processing is needed (caller must hold lock)
+func (q *CleanupQueue) shouldProcessLocked() bool {
+ if q.items.Len() == 0 {
+ return false
+ }
+
+ // Check if queue is full
+ if q.items.Len() >= q.maxSize {
+ return true
+ }
+
+ // Check if oldest item exceeds max age
+ front := q.items.Front()
+ if front != nil {
+ item := front.Value.(*queueItem)
+ if time.Since(item.queueTime) > q.maxAge {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Pop removes and returns the oldest folder from the queue.
+// Returns the folder and true if an item was available, or empty string and false if queue is empty.
+func (q *CleanupQueue) Pop() (string, bool) {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ front := q.items.Front()
+ if front == nil {
+ return "", false
+ }
+
+ item := front.Value.(*queueItem)
+ q.items.Remove(front)
+ delete(q.itemsMap, item.folder)
+
+ return item.folder, true
+}
+
+// Peek returns the oldest folder without removing it.
+// Returns the folder and queue time if available, or empty values if queue is empty.
+func (q *CleanupQueue) Peek() (folder string, queueTime time.Time, ok bool) {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ front := q.items.Front()
+ if front == nil {
+ return "", time.Time{}, false
+ }
+
+ item := front.Value.(*queueItem)
+ return item.folder, item.queueTime, true
+}
+
+// Len returns the current queue size.
+func (q *CleanupQueue) Len() int {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+ return q.items.Len()
+}
+
+// Contains checks if a folder is in the queue.
+func (q *CleanupQueue) Contains(folder string) bool {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+ _, exists := q.itemsMap[folder]
+ return exists
+}
+
+// Clear removes all items from the queue.
+func (q *CleanupQueue) Clear() {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ q.items.Init()
+ q.itemsMap = make(map[string]*list.Element)
+}
+
+// OldestAge returns the age of the oldest item in the queue, or 0 if empty.
+func (q *CleanupQueue) OldestAge() time.Duration {
+ q.mu.Lock()
+ defer q.mu.Unlock()
+
+ front := q.items.Front()
+ if front == nil {
+ return 0
+ }
+
+ item := front.Value.(*queueItem)
+ return time.Since(item.queueTime)
+}
+