aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-12-03 21:12:19 -0800
committerGitHub <noreply@github.com>2025-12-03 21:12:19 -0800
commit39ba19eea6d47a5d35c67064d560fb569c6c5baf (patch)
tree6213a3d8acd5698964eb200555cd276c3c3285fe /weed/s3api
parent268cc84e8c8629c4824d4cc30c79cc8dac0a5142 (diff)
downloadseaweedfs-39ba19eea6d47a5d35c67064d560fb569c6c5baf.tar.xz
seaweedfs-39ba19eea6d47a5d35c67064d560fb569c6c5baf.zip
filer: async empty folder cleanup via metadata events (#7614)
* filer: async empty folder cleanup via metadata events Implements asynchronous empty folder cleanup when files are deleted in S3. Key changes: 1. EmptyFolderCleaner - New component that handles folder cleanup: - Uses consistent hashing (LockRing) to determine folder ownership - Each filer owns specific folders, avoiding duplicate cleanup work - Debounces delete events (10s delay) to batch multiple deletes - Caches rough folder counts to skip unnecessary checks - Cancels pending cleanup when new files are created - Handles both file and subdirectory deletions 2. Integration with metadata events: - Listens to both local and remote filer metadata events - Processes create/delete/rename events to track folder state - Only processes folders under /buckets/<bucket>/... 3. Removed synchronous empty folder cleanup from S3 handlers: - DeleteObjectHandler no longer calls DoDeleteEmptyParentDirectories - DeleteMultipleObjectsHandler no longer tracks/cleans directories - Cleanup now happens asynchronously via metadata events Benefits: - Non-blocking: S3 delete requests return immediately - Coordinated: Only one filer (the owner) cleans each folder - Efficient: Batching and caching reduce unnecessary checks - Event-driven: Folder deletion triggers parent folder check automatically * filer: add CleanupQueue data structure for deduplicated folder cleanup CleanupQueue uses a linked list for FIFO ordering and a hashmap for O(1) deduplication. Processing is triggered when: - Queue size reaches maxSize (default 1000), OR - Oldest item exceeds maxAge (default 10 minutes) Key features: - O(1) Add, Remove, Pop, Contains operations - Duplicate folders are ignored (keeps original position/time) - Testable with injectable time function - Thread-safe with mutex protection * filer: use CleanupQueue for empty folder cleanup Replace timer-per-folder approach with queue-based processing: - Use CleanupQueue for deduplication and ordered processing - Process queue when full (1000 items) or oldest item exceeds 10 minutes - Background processor checks queue every 10 seconds - Remove from queue on create events to cancel pending cleanup Benefits: - Bounded memory: queue has max size, not unlimited timers - Efficient: O(1) add/remove/contains operations - Batch processing: handle many folders efficiently - Better for high-volume delete scenarios * filer: CleanupQueue.Add moves duplicate to back with updated time When adding a folder that already exists in the queue: - Remove it from its current position - Add it to the back of the queue - Update the queue time to current time This ensures that folders with recent delete activity are processed later, giving more time for additional deletes to occur. * filer: CleanupQueue uses event time and inserts in sorted order Changes: - Add() now takes eventTime parameter instead of using current time - Insert items in time-sorted order (oldest at front) to handle out-of-order events - When updating duplicate with newer time, reposition to maintain sort order - Ignore updates with older time (keep existing later time) This ensures proper ordering when processing events from distributed filers where event arrival order may not match event occurrence order. * filer: remove unused CleanupQueue functions (SetNowFunc, GetAll) Removed test-only functions: - SetNowFunc: tests now use real time with past event times - GetAll: tests now use Pop() to verify order Kept functions used in production: - Peek: used in filer_notify_read.go - OldestAge: used in empty_folder_cleaner.go logging * filer: initialize cache entry on first delete/create event Previously, roughCount was only updated if the cache entry already existed, but entries were only created during executeCleanup. This meant delete/create events before the first cleanup didn't track the count. Now create the cache entry on first event, so roughCount properly tracks all changes from the start. * filer: skip adding to cleanup queue if roughCount > 0 If the cached roughCount indicates there are still items in the folder, don't bother adding it to the cleanup queue. This avoids unnecessary queue entries and reduces wasted cleanup checks. * filer: don't create cache entry on create event Only update roughCount if the folder is already being tracked. New folders don't need tracking until we see a delete event. * filer: move empty folder cleanup to its own package - Created weed/filer/empty_folder_cleanup package - Defined FilerOperations interface to break circular dependency - Added CountDirectoryEntries method to Filer - Exported IsUnderPath and IsUnderBucketPath helper functions * filer: make isUnderPath and isUnderBucketPath private These helpers are only used within the empty_folder_cleanup package.
Diffstat (limited to 'weed/s3api')
-rw-r--r--weed/s3api/s3api_object_handlers_delete.go57
1 files changed, 5 insertions, 52 deletions
diff --git a/weed/s3api/s3api_object_handlers_delete.go b/weed/s3api/s3api_object_handlers_delete.go
index f779a6edc..6e373bb4e 100644
--- a/weed/s3api/s3api_object_handlers_delete.go
+++ b/weed/s3api/s3api_object_handlers_delete.go
@@ -1,12 +1,10 @@
package s3api
import (
- "context"
"encoding/xml"
"fmt"
"io"
"net/http"
- "slices"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
@@ -127,22 +125,9 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque
dir, name := target.DirAndName()
err := s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
- // Use operation context that won't be cancelled if request terminates
- // This ensures deletion completes atomically to avoid inconsistent state
- opCtx := context.WithoutCancel(r.Context())
-
- if err := doDeleteEntry(client, dir, name, true, false); err != nil {
- return err
- }
-
- // Cleanup empty directories
- if !s3a.option.AllowEmptyFolder && strings.LastIndex(object, "/") > 0 {
- bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
- // Recursively delete empty parent directories, stop at bucket path
- filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dir), util.FullPath(bucketPath), nil)
- }
-
- return nil
+ return doDeleteEntry(client, dir, name, true, false)
+ // Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
+ // which listens to metadata events and uses consistent hashing for coordination
})
if err != nil {
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
@@ -222,8 +207,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
var deleteErrors []DeleteError
var auditLog *s3err.AccessLog
- directoriesWithDeletion := make(map[string]bool)
-
if s3err.Logger != nil {
auditLog = s3err.GetAccessLog(r, http.StatusNoContent, s3err.ErrNone)
}
@@ -245,10 +228,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
versioningConfigured := (versioningState != "")
s3a.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
- // Use operation context that won't be cancelled if request terminates
- // This ensures batch deletion completes atomically to avoid inconsistent state
- opCtx := context.WithoutCancel(r.Context())
-
// delete file entries
for _, object := range deleteObjects.Objects {
if object.Key == "" {
@@ -357,10 +336,6 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
err := doDeleteEntry(client, parentDirectoryPath, entryName, isDeleteData, isRecursive)
if err == nil {
- // Track directory for empty directory cleanup
- if !s3a.option.AllowEmptyFolder {
- directoriesWithDeletion[parentDirectoryPath] = true
- }
deletedObjects = append(deletedObjects, object)
} else if strings.Contains(err.Error(), filer.MsgFailDelNonEmptyFolder) {
deletedObjects = append(deletedObjects, object)
@@ -380,30 +355,8 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
}
}
- // Cleanup empty directories - optimize by processing deepest first
- if !s3a.option.AllowEmptyFolder && len(directoriesWithDeletion) > 0 {
- bucketPath := fmt.Sprintf("%s/%s", s3a.option.BucketsPath, bucket)
-
- // Collect and sort directories by depth (deepest first) to avoid redundant checks
- var allDirs []string
- for dirPath := range directoriesWithDeletion {
- allDirs = append(allDirs, dirPath)
- }
- // Sort by depth (deeper directories first)
- slices.SortFunc(allDirs, func(a, b string) int {
- return strings.Count(b, "/") - strings.Count(a, "/")
- })
-
- // Track already-checked directories to avoid redundant work
- checked := make(map[string]bool)
- for _, dirPath := range allDirs {
- if !checked[dirPath] {
- // Recursively delete empty parent directories, stop at bucket path
- // Mark this directory and all its parents as checked during recursion
- filer_pb.DoDeleteEmptyParentDirectories(opCtx, client, util.FullPath(dirPath), util.FullPath(bucketPath), checked)
- }
- }
- }
+ // Note: Empty folder cleanup is now handled asynchronously by EmptyFolderCleaner
+ // which listens to metadata events and uses consistent hashing for coordination
return nil
})