aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api/s3api_implicit_directory_test.go
blob: e7c3633fcffbfbc6c5db0791d2affd958fd306dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
package s3api

import (
	"io"
	"testing"

	"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)

// TestImplicitDirectoryBehaviorLogic tests the core logic for implicit directory detection
// This tests the decision logic without requiring a full S3 server setup
func TestImplicitDirectoryBehaviorLogic(t *testing.T) {
	tests := []struct {
		name              string
		objectPath        string
		hasTrailingSlash  bool
		fileSize          uint64
		isDirectory       bool
		hasChildren       bool
		versioningEnabled bool
		shouldReturn404   bool
		description       string
	}{
		{
			name:              "Implicit directory: 0-byte file with children, no trailing slash",
			objectPath:        "dataset",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       false,
			hasChildren:       true,
			versioningEnabled: false,
			shouldReturn404:   true,
			description:       "Should return 404 to force s3fs LIST-based discovery",
		},
		{
			name:              "Implicit directory: actual directory with children, no trailing slash",
			objectPath:        "dataset",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       true,
			hasChildren:       true,
			versioningEnabled: false,
			shouldReturn404:   true,
			description:       "Should return 404 for directory with children",
		},
		{
			name:              "Explicit directory request: trailing slash",
			objectPath:        "dataset/",
			hasTrailingSlash:  true,
			fileSize:          0,
			isDirectory:       true,
			hasChildren:       true,
			versioningEnabled: false,
			shouldReturn404:   false,
			description:       "Should return 200 for explicit directory request (trailing slash)",
		},
		{
			name:              "Empty file: 0-byte file without children",
			objectPath:        "empty.txt",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       false,
			hasChildren:       false,
			versioningEnabled: false,
			shouldReturn404:   false,
			description:       "Should return 200 for legitimate empty file",
		},
		{
			name:              "Empty directory: 0-byte directory without children",
			objectPath:        "empty-dir",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       true,
			hasChildren:       false,
			versioningEnabled: false,
			shouldReturn404:   false,
			description:       "Should return 200 for empty directory",
		},
		{
			name:              "Regular file: non-zero size",
			objectPath:        "file.txt",
			hasTrailingSlash:  false,
			fileSize:          100,
			isDirectory:       false,
			hasChildren:       false,
			versioningEnabled: false,
			shouldReturn404:   false,
			description:       "Should return 200 for regular file with content",
		},
		{
			name:              "Versioned bucket: implicit directory should return 200",
			objectPath:        "dataset",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       false,
			hasChildren:       true,
			versioningEnabled: true,
			shouldReturn404:   false,
			description:       "Should return 200 for versioned buckets (skip implicit dir check)",
		},
		{
			name:              "PyArrow directory marker: 0-byte with children",
			objectPath:        "dataset",
			hasTrailingSlash:  false,
			fileSize:          0,
			isDirectory:       false,
			hasChildren:       true,
			versioningEnabled: false,
			shouldReturn404:   true,
			description:       "Should return 404 for PyArrow-created directory markers",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Test the logic: should we return 404?
			// Logic from HeadObjectHandler:
			// if !versioningConfigured && !strings.HasSuffix(object, "/") {
			//     if isZeroByteFile || isActualDirectory {
			//         if hasChildren {
			//             return 404
			//         }
			//     }
			// }

			isZeroByteFile := tt.fileSize == 0 && !tt.isDirectory
			isActualDirectory := tt.isDirectory

			shouldReturn404 := false
			if !tt.versioningEnabled && !tt.hasTrailingSlash {
				if isZeroByteFile || isActualDirectory {
					if tt.hasChildren {
						shouldReturn404 = true
					}
				}
			}

			if shouldReturn404 != tt.shouldReturn404 {
				t.Errorf("Logic mismatch for %s:\n  Expected shouldReturn404=%v\n  Got shouldReturn404=%v\n  Description: %s",
					tt.name, tt.shouldReturn404, shouldReturn404, tt.description)
			} else {
				t.Logf("✓ %s: correctly returns %d", tt.name, map[bool]int{true: 404, false: 200}[shouldReturn404])
			}
		})
	}
}

// TestHasChildrenLogic tests the hasChildren helper function logic
func TestHasChildrenLogic(t *testing.T) {
	tests := []struct {
		name           string
		bucket         string
		prefix         string
		listResponse   *filer_pb.ListEntriesResponse
		listError      error
		expectedResult bool
		description    string
	}{
		{
			name:   "Directory with children",
			bucket: "test-bucket",
			prefix: "dataset",
			listResponse: &filer_pb.ListEntriesResponse{
				Entry: &filer_pb.Entry{
					Name:        "file.parquet",
					IsDirectory: false,
				},
			},
			listError:      nil,
			expectedResult: true,
			description:    "Should return true when at least one child exists",
		},
		{
			name:           "Empty directory",
			bucket:         "test-bucket",
			prefix:         "empty-dir",
			listResponse:   nil,
			listError:      io.EOF,
			expectedResult: false,
			description:    "Should return false when no children exist (EOF)",
		},
		{
			name:   "Directory with leading slash in prefix",
			bucket: "test-bucket",
			prefix: "/dataset",
			listResponse: &filer_pb.ListEntriesResponse{
				Entry: &filer_pb.Entry{
					Name:        "file.parquet",
					IsDirectory: false,
				},
			},
			listError:      nil,
			expectedResult: true,
			description:    "Should handle leading slashes correctly",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Test the hasChildren logic:
			// 1. It should trim leading slashes from prefix
			// 2. It should list with Limit=1
			// 3. It should return true if any entry is received
			// 4. It should return false if EOF is received

			hasChildren := false
			if tt.listError == nil && tt.listResponse != nil {
				hasChildren = true
			} else if tt.listError == io.EOF {
				hasChildren = false
			}

			if hasChildren != tt.expectedResult {
				t.Errorf("hasChildren logic mismatch for %s:\n  Expected: %v\n  Got: %v\n  Description: %s",
					tt.name, tt.expectedResult, hasChildren, tt.description)
			} else {
				t.Logf("✓ %s: correctly returns %v", tt.name, hasChildren)
			}
		})
	}
}

// TestImplicitDirectoryEdgeCases tests edge cases in the implicit directory detection
func TestImplicitDirectoryEdgeCases(t *testing.T) {
	tests := []struct {
		name        string
		scenario    string
		expectation string
	}{
		{
			name:        "PyArrow write_dataset creates 0-byte files",
			scenario:    "PyArrow creates 'dataset' as 0-byte file, then writes 'dataset/file.parquet'",
			expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
		},
		{
			name:        "Filer creates actual directories",
			scenario:    "Filer creates 'dataset' as actual directory with IsDirectory=true",
			expectation: "HEAD dataset → 404 (has children), s3fs uses LIST → correctly identifies as directory",
		},
		{
			name:        "Empty file edge case",
			scenario:    "User creates 'empty.txt' as 0-byte file with no children",
			expectation: "HEAD empty.txt → 200 (no children), s3fs correctly reports as file",
		},
		{
			name:        "Explicit directory request",
			scenario:    "User requests 'dataset/' with trailing slash",
			expectation: "HEAD dataset/ → 200 (explicit directory request), normal directory behavior",
		},
		{
			name:        "Versioned bucket",
			scenario:    "Bucket has versioning enabled",
			expectation: "HEAD dataset → 200 (skip implicit dir check), versioned semantics apply",
		},
		{
			name:        "AWS S3 compatibility",
			scenario:    "Only 'dataset/file.txt' exists, no marker at 'dataset'",
			expectation: "HEAD dataset → 404 (object doesn't exist), matches AWS S3 behavior",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			t.Logf("Scenario: %s", tt.scenario)
			t.Logf("Expected: %s", tt.expectation)
		})
	}
}

// TestImplicitDirectoryIntegration is an integration test placeholder
// Run with: cd test/s3/parquet && make test-implicit-dir-with-server
func TestImplicitDirectoryIntegration(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping integration test in short mode")
	}

	t.Skip("Integration test - run manually with: cd test/s3/parquet && make test-implicit-dir-with-server")
}

// Benchmark for hasChildren performance
func BenchmarkHasChildrenCheck(b *testing.B) {
	// This benchmark would measure the performance impact of the hasChildren check
	// Expected: ~1-5ms per call (one gRPC LIST request with Limit=1)
	b.Skip("Benchmark - requires full filer setup")
}