aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api/s3api_implicit_directory_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'weed/s3api/s3api_implicit_directory_test.go')
-rw-r--r--weed/s3api/s3api_implicit_directory_test.go285
1 files changed, 285 insertions, 0 deletions
diff --git a/weed/s3api/s3api_implicit_directory_test.go b/weed/s3api/s3api_implicit_directory_test.go
new file mode 100644
index 000000000..e7c3633fc
--- /dev/null
+++ b/weed/s3api/s3api_implicit_directory_test.go
@@ -0,0 +1,285 @@
+package s3api
+
+import (
+ "io"
+ "testing"
+
+ "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
+)
+
+// TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection
+// This tests the decision logic without requiring a full S3 server setup
+func TestImplicitDirectoryBehaviorLogic(t *testing.T) {
+ tests := []struct {
+ name string
+ objectPath string
+ hasTrailingSlash bool
+ fileSize uint64
+ isDirectory bool
+ hasChildren bool
+ versioningEnabled bool
+ shouldReturn404 bool
+ description string
+ }{
+ {
+ name: "Implicit directory: 0-byte file with children, no trailing slash",
+ objectPath: "dataset",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: false,
+ hasChildren: true,
+ versioningEnabled: false,
+ shouldReturn404: true,
+ description: "Should return 404 to force s3fs LIST-based discovery",
+ },
+ {
+ name: "Implicit directory: actual directory with children, no trailing slash",
+ objectPath: "dataset",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: true,
+ hasChildren: true,
+ versioningEnabled: false,
+ shouldReturn404: true,
+ description: "Should return 404 for directory with children",
+ },
+ {
+ name: "Explicit directory request: trailing slash",
+ objectPath: "dataset/",
+ hasTrailingSlash: true,
+ fileSize: 0,
+ isDirectory: true,
+ hasChildren: true,
+ versioningEnabled: false,
+ shouldReturn404: false,
+ description: "Should return 200 for explicit directory request (trailing slash)",
+ },
+ {
+ name: "Empty file: 0-byte file without children",
+ objectPath: "empty.txt",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: false,
+ hasChildren: false,
+ versioningEnabled: false,
+ shouldReturn404: false,
+ description: "Should return 200 for legitimate empty file",
+ },
+ {
+ name: "Empty directory: 0-byte directory without children",
+ objectPath: "empty-dir",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: true,
+ hasChildren: false,
+ versioningEnabled: false,
+ shouldReturn404: false,
+ description: "Should return 200 for empty directory",
+ },
+ {
+ name: "Regular file: non-zero size",
+ objectPath: "file.txt",
+ hasTrailingSlash: false,
+ fileSize: 100,
+ isDirectory: false,
+ hasChildren: false,
+ versioningEnabled: false,
+ shouldReturn404: false,
+ description: "Should return 200 for regular file with content",
+ },
+ {
+ name: "Versioned bucket: implicit directory should return 200",
+ objectPath: "dataset",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: false,
+ hasChildren: true,
+ versioningEnabled: true,
+ shouldReturn404: false,
+ description: "Should return 200 for versioned buckets (skip implicit dir check)",
+ },
+ {
+ name: "PyArrow directory marker: 0-byte with children",
+ objectPath: "dataset",
+ hasTrailingSlash: false,
+ fileSize: 0,
+ isDirectory: false,
+ hasChildren: true,
+ versioningEnabled: false,
+ shouldReturn404: true,
+ description: "Should return 404 for PyArrow-created directory markers",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Test the logic: should we return 404?
+ // Logic from HeadObjectHandler:
+ // if !versioningConfigured && !strings.HasSuffix(object, "/") {
+ // if isZeroByteFile || isActualDirectory {
+ // if hasChildren {
+ // return 404
+ // }
+ // }
+ // }
+
+ isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory
+ isActualDirectory := tt.isDirectory
+
+ shouldReturn404 := false
+ if !tt.versioningEnabled && !tt.hasTrailingSlash {
+ if isZeroByteFile || isActualDirectory {
+ if tt.hasChildren {
+ shouldReturn404 = true
+ }
+ }
+ }
+
+ if shouldReturn404 != tt.shouldReturn404 {
+ t.Errorf("Logic mismatch for %s:\n Expected shouldReturn404=%v\n Got shouldReturn404=%v\n Description: %s",
+ tt.name, tt.shouldReturn404, shouldReturn404, tt.description)
+ } else {
+ t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404])
+ }
+ })
+ }
+}
+
+// TestHasChildrenLogic tests the hasChildren helper function logic
+func TestHasChildrenLogic(t *testing.T) {
+ tests := []struct {
+ name string
+ bucket string
+ prefix string
+ listResponse *filer_pb.ListEntriesResponse
+ listError error
+ expectedResult bool
+ description string
+ }{
+ {
+ name: "Directory with children",
+ bucket: "test-bucket",
+ prefix: "dataset",
+ listResponse: &filer_pb.ListEntriesResponse{
+ Entry: &filer_pb.Entry{
+ Name: "file.parquet",
+ IsDirectory: false,
+ },
+ },
+ listError: nil,
+ expectedResult: true,
+ description: "Should return true when at least one child exists",
+ },
+ {
+ name: "Empty directory",
+ bucket: "test-bucket",
+ prefix: "empty-dir",
+ listResponse: nil,
+ listError: io.EOF,
+ expectedResult: false,
+ description: "Should return false when no children exist (EOF)",
+ },
+ {
+ name: "Directory with leading slash in prefix",
+ bucket: "test-bucket",
+ prefix: "/dataset",
+ listResponse: &filer_pb.ListEntriesResponse{
+ Entry: &filer_pb.Entry{
+ Name: "file.parquet",
+ IsDirectory: false,
+ },
+ },
+ listError: nil,
+ expectedResult: true,
+ description: "Should handle leading slashes correctly",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Test the hasChildren logic:
+ // 1. It should trim leading slashes from prefix
+ // 2. It should list with Limit=1
+ // 3. It should return true if any entry is received
+ // 4. It should return false if EOF is received
+
+ hasChildren := false
+ if tt.listError == nil && tt.listResponse != nil {
+ hasChildren = true
+ } else if tt.listError == io.EOF {
+ hasChildren = false
+ }
+
+ if hasChildren != tt.expectedResult {
+ t.Errorf("hasChildren logic mismatch for %s:\n Expected: %v\n Got: %v\n Description: %s",
+ tt.name, tt.expectedResult, hasChildren, tt.description)
+ } else {
+ t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren)
+ }
+ })
+ }
+}
+
+// TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection
+func TestImplicitDirectoryEdgeCases(t *testing.T) {
+ tests := []struct {
+ name string
+ scenario string
+ expectation string
+ }{
+ {
+ name: "PyArrow write_dataset creates 0-byte files",
+ scenario: "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'",
+ expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
+ },
+ {
+ name: "Filer creates actual directories",
+ scenario: "Filer creates 'dataset' as actual directory with IsDirectory=true",
+ expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
+ },
+ {
+ name: "Empty file edge case",
+ scenario: "User creates 'empty.txt' as 0-byte file with no children",
+ expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file",
+ },
+ {
+ name: "Explicit directory request",
+ scenario: "User requests 'dataset/' with trailing slash",
+ expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior",
+ },
+ {
+ name: "Versioned bucket",
+ scenario: "Bucket has versioning enabled",
+ expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply",
+ },
+ {
+ name: "AWS S3 compatibility",
+ scenario: "Only 'dataset/file.txt' exists, no marker at 'dataset'",
+ expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Logf("Scenario: %s", tt.scenario)
+ t.Logf("Expected: %s", tt.expectation)
+ })
+ }
+}
+
+// TestImplicitDirectoryIntegration is an integration test placeholder
+// Run with: cd test/s3/parquet && make test-implicit-dir-with-server
+func TestImplicitDirectoryIntegration(t *testing.T) {
+ if testing.Short() {
+ t.Skip("Skipping integration test in short mode")
+ }
+
+ t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server")
+}
+
+// Benchmark for hasChildren performance
+func BenchmarkHasChildrenCheck(b *testing.B) {
+ // This benchmark would measure the performance impact of the hasChildren check
+ // Expected: ~1-5ms per call (one gRPC LIST request with Limit=1)
+ b.Skip("Benchmark - requires full filer setup")
+}