aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api/s3api_sse_chunk_metadata_test.go
blob: ca38f44f45624a86b43940c74cf5b46dd54cca89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
package s3api

import (
	"bytes"
	"crypto/aes"
	"crypto/cipher"
	"crypto/rand"
	"encoding/json"
	"io"
	"testing"

	"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)

// TestSSEKMSChunkMetadataAssignment tests that SSE-KMS creates per-chunk metadata
// with correct ChunkOffset values for each chunk (matching the fix in putToFiler)
func TestSSEKMSChunkMetadataAssignment(t *testing.T) {
	kmsKey := SetupTestKMS(t)
	defer kmsKey.Cleanup()

	// Generate SSE-KMS key by encrypting test data (this gives us a real SSEKMSKey)
	encryptionContext := BuildEncryptionContext("test-bucket", "test-object", false)
	testData := "Test data for SSE-KMS chunk metadata validation"
	encryptedReader, sseKMSKey, err := CreateSSEKMSEncryptedReader(bytes.NewReader([]byte(testData)), kmsKey.KeyID, encryptionContext)
	if err != nil {
		t.Fatalf("Failed to create encrypted reader: %v", err)
	}
	// Read to complete encryption setup
	io.ReadAll(encryptedReader)

	// Serialize the base metadata (what putToFiler receives before chunking)
	baseMetadata, err := SerializeSSEKMSMetadata(sseKMSKey)
	if err != nil {
		t.Fatalf("Failed to serialize base SSE-KMS metadata: %v", err)
	}

	// Simulate multi-chunk upload scenario (what putToFiler does after UploadReaderInChunks)
	simulatedChunks := []*filer_pb.FileChunk{
		{FileId: "chunk1", Offset: 0, Size: 8 * 1024 * 1024},                // 8MB chunk at offset 0
		{FileId: "chunk2", Offset: 8 * 1024 * 1024, Size: 8 * 1024 * 1024},  // 8MB chunk at offset 8MB
		{FileId: "chunk3", Offset: 16 * 1024 * 1024, Size: 4 * 1024 * 1024}, // 4MB chunk at offset 16MB
	}

	// THIS IS THE CRITICAL FIX: Create per-chunk metadata (lines 421-443 in putToFiler)
	for _, chunk := range simulatedChunks {
		chunk.SseType = filer_pb.SSEType_SSE_KMS

		// Create a copy of the SSE-KMS key with chunk-specific offset
		chunkSSEKey := &SSEKMSKey{
			KeyID:             sseKMSKey.KeyID,
			EncryptedDataKey:  sseKMSKey.EncryptedDataKey,
			EncryptionContext: sseKMSKey.EncryptionContext,
			BucketKeyEnabled:  sseKMSKey.BucketKeyEnabled,
			IV:                sseKMSKey.IV,
			ChunkOffset:       chunk.Offset, // Set chunk-specific offset
		}

		// Serialize per-chunk metadata
		chunkMetadata, serErr := SerializeSSEKMSMetadata(chunkSSEKey)
		if serErr != nil {
			t.Fatalf("Failed to serialize SSE-KMS metadata for chunk at offset %d: %v", chunk.Offset, serErr)
		}
		chunk.SseMetadata = chunkMetadata
	}

	// VERIFICATION 1: Each chunk should have different metadata (due to different ChunkOffset)
	metadataSet := make(map[string]bool)
	for i, chunk := range simulatedChunks {
		metadataStr := string(chunk.SseMetadata)
		if metadataSet[metadataStr] {
			t.Errorf("Chunk %d has duplicate metadata (should be unique per chunk)", i)
		}
		metadataSet[metadataStr] = true

		// Deserialize and verify ChunkOffset
		var metadata SSEKMSMetadata
		if err := json.Unmarshal(chunk.SseMetadata, &metadata); err != nil {
			t.Fatalf("Failed to deserialize chunk %d metadata: %v", i, err)
		}

		expectedOffset := chunk.Offset
		if metadata.PartOffset != expectedOffset {
			t.Errorf("Chunk %d: expected PartOffset=%d, got %d", i, expectedOffset, metadata.PartOffset)
		}

		t.Logf("✓ Chunk %d: PartOffset=%d (correct)", i, metadata.PartOffset)
	}

	// VERIFICATION 2: Verify metadata can be deserialized and has correct ChunkOffset
	for i, chunk := range simulatedChunks {
		// Deserialize chunk metadata
		deserializedKey, err := DeserializeSSEKMSMetadata(chunk.SseMetadata)
		if err != nil {
			t.Fatalf("Failed to deserialize chunk %d metadata: %v", i, err)
		}

		// Verify the deserialized key has correct ChunkOffset
		if deserializedKey.ChunkOffset != chunk.Offset {
			t.Errorf("Chunk %d: deserialized ChunkOffset=%d, expected %d",
				i, deserializedKey.ChunkOffset, chunk.Offset)
		}

		// Verify IV is set (should be inherited from base)
		if len(deserializedKey.IV) != aes.BlockSize {
			t.Errorf("Chunk %d: invalid IV length: %d", i, len(deserializedKey.IV))
		}

		// Verify KeyID matches
		if deserializedKey.KeyID != sseKMSKey.KeyID {
			t.Errorf("Chunk %d: KeyID mismatch", i)
		}

		t.Logf("✓ Chunk %d: metadata deserialized successfully (ChunkOffset=%d, KeyID=%s)",
			i, deserializedKey.ChunkOffset, deserializedKey.KeyID)
	}

	// VERIFICATION 3: Ensure base metadata is NOT reused (the bug we're preventing)
	var baseMetadataStruct SSEKMSMetadata
	if err := json.Unmarshal(baseMetadata, &baseMetadataStruct); err != nil {
		t.Fatalf("Failed to deserialize base metadata: %v", err)
	}

	// Base metadata should have ChunkOffset=0
	if baseMetadataStruct.PartOffset != 0 {
		t.Errorf("Base metadata should have PartOffset=0, got %d", baseMetadataStruct.PartOffset)
	}

	// Chunks 2 and 3 should NOT have the same metadata as base (proving we're not reusing)
	for i := 1; i < len(simulatedChunks); i++ {
		if bytes.Equal(simulatedChunks[i].SseMetadata, baseMetadata) {
			t.Errorf("CRITICAL BUG: Chunk %d reuses base metadata (should have per-chunk metadata)", i)
		}
	}

	t.Log("✓ All chunks have unique per-chunk metadata (bug prevented)")
}

// TestSSES3ChunkMetadataAssignment tests that SSE-S3 creates per-chunk metadata
// with offset-adjusted IVs for each chunk (matching the fix in putToFiler)
func TestSSES3ChunkMetadataAssignment(t *testing.T) {
	// Initialize global SSE-S3 key manager
	globalSSES3KeyManager = NewSSES3KeyManager()
	defer func() {
		globalSSES3KeyManager = NewSSES3KeyManager()
	}()

	keyManager := GetSSES3KeyManager()
	keyManager.superKey = make([]byte, 32)
	rand.Read(keyManager.superKey)

	// Generate SSE-S3 key
	sseS3Key, err := GenerateSSES3Key()
	if err != nil {
		t.Fatalf("Failed to generate SSE-S3 key: %v", err)
	}

	// Generate base IV
	baseIV := make([]byte, aes.BlockSize)
	rand.Read(baseIV)
	sseS3Key.IV = baseIV

	// Serialize base metadata (what putToFiler receives)
	baseMetadata, err := SerializeSSES3Metadata(sseS3Key)
	if err != nil {
		t.Fatalf("Failed to serialize base SSE-S3 metadata: %v", err)
	}

	// Simulate multi-chunk upload scenario (what putToFiler does after UploadReaderInChunks)
	simulatedChunks := []*filer_pb.FileChunk{
		{FileId: "chunk1", Offset: 0, Size: 8 * 1024 * 1024},                // 8MB chunk at offset 0
		{FileId: "chunk2", Offset: 8 * 1024 * 1024, Size: 8 * 1024 * 1024},  // 8MB chunk at offset 8MB
		{FileId: "chunk3", Offset: 16 * 1024 * 1024, Size: 4 * 1024 * 1024}, // 4MB chunk at offset 16MB
	}

	// THIS IS THE CRITICAL FIX: Create per-chunk metadata (lines 444-468 in putToFiler)
	for _, chunk := range simulatedChunks {
		chunk.SseType = filer_pb.SSEType_SSE_S3

		// Calculate chunk-specific IV using base IV and chunk offset
		chunkIV, _ := calculateIVWithOffset(sseS3Key.IV, chunk.Offset)

		// Create a copy of the SSE-S3 key with chunk-specific IV
		chunkSSEKey := &SSES3Key{
			Key:       sseS3Key.Key,
			KeyID:     sseS3Key.KeyID,
			Algorithm: sseS3Key.Algorithm,
			IV:        chunkIV, // Use chunk-specific IV
		}

		// Serialize per-chunk metadata
		chunkMetadata, serErr := SerializeSSES3Metadata(chunkSSEKey)
		if serErr != nil {
			t.Fatalf("Failed to serialize SSE-S3 metadata for chunk at offset %d: %v", chunk.Offset, serErr)
		}
		chunk.SseMetadata = chunkMetadata
	}

	// VERIFICATION 1: Each chunk should have different metadata (due to different IVs)
	metadataSet := make(map[string]bool)
	for i, chunk := range simulatedChunks {
		metadataStr := string(chunk.SseMetadata)
		if metadataSet[metadataStr] {
			t.Errorf("Chunk %d has duplicate metadata (should be unique per chunk)", i)
		}
		metadataSet[metadataStr] = true

		// Deserialize and verify IV
		deserializedKey, err := DeserializeSSES3Metadata(chunk.SseMetadata, keyManager)
		if err != nil {
			t.Fatalf("Failed to deserialize chunk %d metadata: %v", i, err)
		}

		// Calculate expected IV for this chunk
		expectedIV, _ := calculateIVWithOffset(baseIV, chunk.Offset)
		if !bytes.Equal(deserializedKey.IV, expectedIV) {
			t.Errorf("Chunk %d: IV mismatch\nExpected: %x\nGot: %x",
				i, expectedIV[:8], deserializedKey.IV[:8])
		}

		t.Logf("✓ Chunk %d: IV correctly adjusted for offset=%d", i, chunk.Offset)
	}

	// VERIFICATION 2: Verify decryption works with per-chunk IVs
	for i, chunk := range simulatedChunks {
		// Deserialize chunk metadata
		deserializedKey, err := DeserializeSSES3Metadata(chunk.SseMetadata, keyManager)
		if err != nil {
			t.Fatalf("Failed to deserialize chunk %d metadata: %v", i, err)
		}

		// Simulate encryption/decryption with the chunk's IV
		testData := []byte("Test data for SSE-S3 chunk decryption verification")
		block, err := aes.NewCipher(deserializedKey.Key)
		if err != nil {
			t.Fatalf("Failed to create cipher: %v", err)
		}

		// Encrypt with chunk's IV
		ciphertext := make([]byte, len(testData))
		stream := cipher.NewCTR(block, deserializedKey.IV)
		stream.XORKeyStream(ciphertext, testData)

		// Decrypt with chunk's IV
		plaintext := make([]byte, len(ciphertext))
		block2, _ := aes.NewCipher(deserializedKey.Key)
		stream2 := cipher.NewCTR(block2, deserializedKey.IV)
		stream2.XORKeyStream(plaintext, ciphertext)

		if !bytes.Equal(plaintext, testData) {
			t.Errorf("Chunk %d: decryption failed", i)
		}

		t.Logf("✓ Chunk %d: encryption/decryption successful with chunk-specific IV", i)
	}

	// VERIFICATION 3: Ensure base IV is NOT reused for non-zero offset chunks (the bug we're preventing)
	for i := 1; i < len(simulatedChunks); i++ {
		if bytes.Equal(simulatedChunks[i].SseMetadata, baseMetadata) {
			t.Errorf("CRITICAL BUG: Chunk %d reuses base metadata (should have per-chunk metadata)", i)
		}

		// Verify chunk metadata has different IV than base IV
		deserializedKey, _ := DeserializeSSES3Metadata(simulatedChunks[i].SseMetadata, keyManager)
		if bytes.Equal(deserializedKey.IV, baseIV) {
			t.Errorf("CRITICAL BUG: Chunk %d uses base IV (should use offset-adjusted IV)", i)
		}
	}

	t.Log("✓ All chunks have unique per-chunk IVs (bug prevented)")
}

// TestSSEChunkMetadataComparison tests that the bug (reusing same metadata for all chunks)
// would cause decryption failures, while the fix (per-chunk metadata) works correctly
func TestSSEChunkMetadataComparison(t *testing.T) {
	// Generate test key and IV
	key := make([]byte, 32)
	rand.Read(key)
	baseIV := make([]byte, aes.BlockSize)
	rand.Read(baseIV)

	// Create test data for 3 chunks
	chunk0Data := []byte("Chunk 0 data at offset 0")
	chunk1Data := []byte("Chunk 1 data at offset 8MB")
	chunk2Data := []byte("Chunk 2 data at offset 16MB")

	chunkOffsets := []int64{0, 8 * 1024 * 1024, 16 * 1024 * 1024}
	chunkDataList := [][]byte{chunk0Data, chunk1Data, chunk2Data}

	// Scenario 1: BUG - Using same IV for all chunks (what the old code did)
	t.Run("Bug: Reusing base IV causes decryption failures", func(t *testing.T) {
		var encryptedChunks [][]byte

		// Encrypt each chunk with offset-adjusted IV (what encryption does)
		for i, offset := range chunkOffsets {
			adjustedIV, _ := calculateIVWithOffset(baseIV, offset)
			block, _ := aes.NewCipher(key)
			stream := cipher.NewCTR(block, adjustedIV)

			ciphertext := make([]byte, len(chunkDataList[i]))
			stream.XORKeyStream(ciphertext, chunkDataList[i])
			encryptedChunks = append(encryptedChunks, ciphertext)
		}

		// Try to decrypt with base IV (THE BUG)
		for i := range encryptedChunks {
			block, _ := aes.NewCipher(key)
			stream := cipher.NewCTR(block, baseIV) // BUG: Always using base IV

			plaintext := make([]byte, len(encryptedChunks[i]))
			stream.XORKeyStream(plaintext, encryptedChunks[i])

			if i == 0 {
				// Chunk 0 should work (offset 0 means base IV = adjusted IV)
				if !bytes.Equal(plaintext, chunkDataList[i]) {
					t.Errorf("Chunk 0 decryption failed (unexpected)")
				}
			} else {
				// Chunks 1 and 2 should FAIL (wrong IV)
				if bytes.Equal(plaintext, chunkDataList[i]) {
					t.Errorf("BUG NOT REPRODUCED: Chunk %d decrypted correctly with base IV (should fail)", i)
				} else {
					t.Logf("✓ Chunk %d: Correctly failed to decrypt with base IV (bug reproduced)", i)
				}
			}
		}
	})

	// Scenario 2: FIX - Using per-chunk offset-adjusted IVs (what the new code does)
	t.Run("Fix: Per-chunk IVs enable correct decryption", func(t *testing.T) {
		var encryptedChunks [][]byte
		var chunkIVs [][]byte

		// Encrypt each chunk with offset-adjusted IV
		for i, offset := range chunkOffsets {
			adjustedIV, _ := calculateIVWithOffset(baseIV, offset)
			chunkIVs = append(chunkIVs, adjustedIV)

			block, _ := aes.NewCipher(key)
			stream := cipher.NewCTR(block, adjustedIV)

			ciphertext := make([]byte, len(chunkDataList[i]))
			stream.XORKeyStream(ciphertext, chunkDataList[i])
			encryptedChunks = append(encryptedChunks, ciphertext)
		}

		// Decrypt with per-chunk IVs (THE FIX)
		for i := range encryptedChunks {
			block, _ := aes.NewCipher(key)
			stream := cipher.NewCTR(block, chunkIVs[i]) // FIX: Using per-chunk IV

			plaintext := make([]byte, len(encryptedChunks[i]))
			stream.XORKeyStream(plaintext, encryptedChunks[i])

			if !bytes.Equal(plaintext, chunkDataList[i]) {
				t.Errorf("Chunk %d decryption failed with per-chunk IV (unexpected)", i)
			} else {
				t.Logf("✓ Chunk %d: Successfully decrypted with per-chunk IV", i)
			}
		}
	})
}