diff options
Diffstat (limited to 'weed/s3api/s3api_implicit_directory_test.go')
| -rw-r--r-- | weed/s3api/s3api_implicit_directory_test.go | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/weed/s3api/s3api_implicit_directory_test.go b/weed/s3api/s3api_implicit_directory_test.go new file mode 100644 index 000000000..e7c3633fc --- /dev/null +++ b/weed/s3api/s3api_implicit_directory_test.go @@ -0,0 +1,285 @@ +package s3api + +import ( + "io" + "testing" + + "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" +) + +// TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection +// This tests the decision logic without requiring a full S3 server setup +func TestImplicitDirectoryBehaviorLogic(t *testing.T) { + tests := []struct { + name string + objectPath string + hasTrailingSlash bool + fileSize uint64 + isDirectory bool + hasChildren bool + versioningEnabled bool + shouldReturn404 bool + description string + }{ + { + name: "Implicit directory: 0-byte file with children, no trailing slash", + objectPath: "dataset", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: false, + hasChildren: true, + versioningEnabled: false, + shouldReturn404: true, + description: "Should return 404 to force s3fs LIST-based discovery", + }, + { + name: "Implicit directory: actual directory with children, no trailing slash", + objectPath: "dataset", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: true, + hasChildren: true, + versioningEnabled: false, + shouldReturn404: true, + description: "Should return 404 for directory with children", + }, + { + name: "Explicit directory request: trailing slash", + objectPath: "dataset/", + hasTrailingSlash: true, + fileSize: 0, + isDirectory: true, + hasChildren: true, + versioningEnabled: false, + shouldReturn404: false, + description: "Should return 200 for explicit directory request (trailing slash)", + }, + { + name: "Empty file: 0-byte file without children", + objectPath: "empty.txt", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: false, + hasChildren: false, + versioningEnabled: false, + shouldReturn404: false, + description: "Should return 200 for legitimate empty file", + }, + { + name: "Empty directory: 0-byte directory without children", + objectPath: "empty-dir", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: true, + hasChildren: false, + versioningEnabled: false, + shouldReturn404: false, + description: "Should return 200 for empty directory", + }, + { + name: "Regular file: non-zero size", + objectPath: "file.txt", + hasTrailingSlash: false, + fileSize: 100, + isDirectory: false, + hasChildren: false, + versioningEnabled: false, + shouldReturn404: false, + description: "Should return 200 for regular file with content", + }, + { + name: "Versioned bucket: implicit directory should return 200", + objectPath: "dataset", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: false, + hasChildren: true, + versioningEnabled: true, + shouldReturn404: false, + description: "Should return 200 for versioned buckets (skip implicit dir check)", + }, + { + name: "PyArrow directory marker: 0-byte with children", + objectPath: "dataset", + hasTrailingSlash: false, + fileSize: 0, + isDirectory: false, + hasChildren: true, + versioningEnabled: false, + shouldReturn404: true, + description: "Should return 404 for PyArrow-created directory markers", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test the logic: should we return 404? + // Logic from HeadObjectHandler: + // if !versioningConfigured && !strings.HasSuffix(object, "/") { + // if isZeroByteFile || isActualDirectory { + // if hasChildren { + // return 404 + // } + // } + // } + + isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory + isActualDirectory := tt.isDirectory + + shouldReturn404 := false + if !tt.versioningEnabled && !tt.hasTrailingSlash { + if isZeroByteFile || isActualDirectory { + if tt.hasChildren { + shouldReturn404 = true + } + } + } + + if shouldReturn404 != tt.shouldReturn404 { + t.Errorf("Logic mismatch for %s:\n Expected shouldReturn404=%v\n Got shouldReturn404=%v\n Description: %s", + tt.name, tt.shouldReturn404, shouldReturn404, tt.description) + } else { + t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404]) + } + }) + } +} + +// TestHasChildrenLogic tests the hasChildren helper function logic +func TestHasChildrenLogic(t *testing.T) { + tests := []struct { + name string + bucket string + prefix string + listResponse *filer_pb.ListEntriesResponse + listError error + expectedResult bool + description string + }{ + { + name: "Directory with children", + bucket: "test-bucket", + prefix: "dataset", + listResponse: &filer_pb.ListEntriesResponse{ + Entry: &filer_pb.Entry{ + Name: "file.parquet", + IsDirectory: false, + }, + }, + listError: nil, + expectedResult: true, + description: "Should return true when at least one child exists", + }, + { + name: "Empty directory", + bucket: "test-bucket", + prefix: "empty-dir", + listResponse: nil, + listError: io.EOF, + expectedResult: false, + description: "Should return false when no children exist (EOF)", + }, + { + name: "Directory with leading slash in prefix", + bucket: "test-bucket", + prefix: "/dataset", + listResponse: &filer_pb.ListEntriesResponse{ + Entry: &filer_pb.Entry{ + Name: "file.parquet", + IsDirectory: false, + }, + }, + listError: nil, + expectedResult: true, + description: "Should handle leading slashes correctly", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test the hasChildren logic: + // 1. It should trim leading slashes from prefix + // 2. It should list with Limit=1 + // 3. It should return true if any entry is received + // 4. It should return false if EOF is received + + hasChildren := false + if tt.listError == nil && tt.listResponse != nil { + hasChildren = true + } else if tt.listError == io.EOF { + hasChildren = false + } + + if hasChildren != tt.expectedResult { + t.Errorf("hasChildren logic mismatch for %s:\n Expected: %v\n Got: %v\n Description: %s", + tt.name, tt.expectedResult, hasChildren, tt.description) + } else { + t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren) + } + }) + } +} + +// TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection +func TestImplicitDirectoryEdgeCases(t *testing.T) { + tests := []struct { + name string + scenario string + expectation string + }{ + { + name: "PyArrow write_dataset creates 0-byte files", + scenario: "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'", + expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory", + }, + { + name: "Filer creates actual directories", + scenario: "Filer creates 'dataset' as actual directory with IsDirectory=true", + expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory", + }, + { + name: "Empty file edge case", + scenario: "User creates 'empty.txt' as 0-byte file with no children", + expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file", + }, + { + name: "Explicit directory request", + scenario: "User requests 'dataset/' with trailing slash", + expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior", + }, + { + name: "Versioned bucket", + scenario: "Bucket has versioning enabled", + expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply", + }, + { + name: "AWS S3 compatibility", + scenario: "Only 'dataset/file.txt' exists, no marker at 'dataset'", + expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Logf("Scenario: %s", tt.scenario) + t.Logf("Expected: %s", tt.expectation) + }) + } +} + +// TestImplicitDirectoryIntegration is an integration test placeholder +// Run with: cd test/s3/parquet && make test-implicit-dir-with-server +func TestImplicitDirectoryIntegration(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server") +} + +// Benchmark for hasChildren performance +func BenchmarkHasChildrenCheck(b *testing.B) { + // This benchmark would measure the performance impact of the hasChildren check + // Expected: ~1-5ms per call (one gRPC LIST request with Limit=1) + b.Skip("Benchmark - requires full filer setup") +} |
