aboutsummaryrefslogtreecommitdiff
path: root/test/s3
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-12-02 09:24:31 -0800
committerGitHub <noreply@github.com>2025-12-02 09:24:31 -0800
commit733ca8e6df3de57e2dcc0923fb2f166d3222921d (patch)
tree89fe56d8a72775ba40afb2c2066464f9995dbf61 /test/s3
parent099a351f3b084454f0d437b2d967c0f4a3bb9e1f (diff)
downloadseaweedfs-733ca8e6df3de57e2dcc0923fb2f166d3222921d.tar.xz
seaweedfs-733ca8e6df3de57e2dcc0923fb2f166d3222921d.zip
Fix SSE-S3 copy: preserve encryption metadata and set chunk SSE type (#7598)
* Fix SSE-S3 copy: preserve encryption metadata and set chunk SSE type Fixes GitHub #7562: Copying objects between encrypted buckets was failing. Root causes: 1. processMetadataBytes was re-adding SSE headers from source entry, undoing the encryption header filtering. Now uses dstEntry.Extended which is already filtered. 2. SSE-S3 streaming copy returned nil metadata. Now properly generates and returns SSE-S3 destination metadata (SeaweedFSSSES3Key, AES256 header) via ExecuteStreamingCopyWithMetadata. 3. Chunks created during streaming copy didn't have SseType set. Now sets SseType and per-chunk SseMetadata with chunk-specific IVs for SSE-S3, enabling proper decryption on GetObject. * Address review: make SSE-S3 metadata serialization failures fatal errors - In executeEncryptCopy: return error instead of just logging if SerializeSSES3Metadata fails - In createChunkFromData: return error if chunk SSE-S3 metadata serialization fails This ensures objects/chunks are never created without proper encryption metadata, preventing unreadable/corrupted data. * fmt * Refactor: reuse function names instead of creating WithMetadata variants - Change ExecuteStreamingCopy to return (*EncryptionSpec, error) directly - Remove ExecuteStreamingCopyWithMetadata wrapper - Change executeStreamingReencryptCopy to return (*EncryptionSpec, error) - Remove executeStreamingReencryptCopyWithMetadata wrapper - Update callers to ignore encryption spec with _ where not needed * Add TODO documenting large file SSE-S3 copy limitation The streaming copy approach encrypts the entire stream with a single IV but stores data in chunks with per-chunk IVs. This causes decryption issues for large files. Small inline files work correctly. This is a known architectural issue that needs separate work to fix. * Use chunk-by-chunk encryption for SSE-S3 copy (consistent with SSE-C/SSE-KMS) Instead of streaming encryption (which had IV mismatch issues for multi-chunk files), SSE-S3 now uses the same chunk-by-chunk approach as SSE-C and SSE-KMS: 1. Extended copyMultipartCrossEncryption to handle SSE-S3: - Added SSE-S3 source decryption in copyCrossEncryptionChunk - Added SSE-S3 destination encryption with per-chunk IVs - Added object-level metadata generation for SSE-S3 destinations 2. Updated routing in executeEncryptCopy/executeDecryptCopy/executeReencryptCopy to use copyMultipartCrossEncryption for all SSE-S3 scenarios 3. Removed streaming copy functions (shouldUseStreamingCopy, executeStreamingReencryptCopy) as they're no longer used 4. Added large file (1MB) integration test to verify chunk-by-chunk copy works This ensures consistent behavior across all SSE types and fixes data corruption that occurred with large files in the streaming copy approach. * fmt * fmt * Address review: fail explicitly if SSE-S3 metadata is missing Instead of silently ignoring missing SSE-S3 metadata (which could create unreadable objects), now explicitly fail the copy operation with a clear error message if: - First chunk is missing - First chunk doesn't have SSE-S3 type - First chunk has empty SSE metadata - Deserialization fails * Address review: improve comment to reflect full scope of chunk creation * Address review: fail explicitly if baseIV is empty for SSE-S3 chunk encryption If DestinationIV is not set when encrypting SSE-S3 chunks, the chunk would be created without SseMetadata, causing GetObject decryption to fail later. Now fails explicitly with a clear error message. Note: calculateIVWithOffset returns ([]byte, int) not ([]byte, error) - the int is a skip amount for intra-block alignment, not an error code. * Address review: handle 0-byte files in SSE-S3 copy For 0-byte files, there are no chunks to get metadata from. Generate an IV for the object-level metadata to ensure even empty files are properly marked as SSE-S3 encrypted. Also validate that we don't have a non-empty file with no chunks (which would indicate an internal error).
Diffstat (limited to 'test/s3')
-rw-r--r--test/s3/sse/s3_sse_integration_test.go72
1 files changed, 72 insertions, 0 deletions
diff --git a/test/s3/sse/s3_sse_integration_test.go b/test/s3/sse/s3_sse_integration_test.go
index 7b939ea76..4b7eb0ddc 100644
--- a/test/s3/sse/s3_sse_integration_test.go
+++ b/test/s3/sse/s3_sse_integration_test.go
@@ -2082,6 +2082,78 @@ func TestCopyToBucketDefaultEncryptedRegression(t *testing.T) {
require.NoError(t, err, "Failed to read object")
assertDataEqual(t, testData, data, "Data mismatch")
})
+
+ t.Run("LargeFileCopyEncrypted_ToTemp_ToEncrypted", func(t *testing.T) {
+ // Test with large file (1MB) to exercise chunk-by-chunk copy path
+ // This verifies consistent behavior with SSE-C and SSE-KMS
+ largeTestData := generateTestData(1024 * 1024) // 1MB
+ objectKey := "large-file-test.bin"
+
+ // Step 1: Upload large object to source bucket (will be automatically encrypted)
+ _, err = client.PutObject(ctx, &s3.PutObjectInput{
+ Bucket: aws.String(srcBucket),
+ Key: aws.String(objectKey),
+ Body: bytes.NewReader(largeTestData),
+ })
+ require.NoError(t, err, "Failed to upload large file to source bucket")
+
+ // Verify source object is encrypted
+ srcHead, err := client.HeadObject(ctx, &s3.HeadObjectInput{
+ Bucket: aws.String(srcBucket),
+ Key: aws.String(objectKey),
+ })
+ require.NoError(t, err, "Failed to HEAD source object")
+ assert.Equal(t, types.ServerSideEncryptionAes256, srcHead.ServerSideEncryption,
+ "Source object should be SSE-S3 encrypted")
+
+ // Step 2: Copy to temp bucket (unencrypted) - exercises chunk-by-chunk decrypt
+ _, err = client.CopyObject(ctx, &s3.CopyObjectInput{
+ Bucket: aws.String(tempBucket),
+ Key: aws.String(objectKey),
+ CopySource: aws.String(fmt.Sprintf("%s/%s", srcBucket, objectKey)),
+ })
+ require.NoError(t, err, "Failed to copy large file to temp bucket")
+
+ // Verify temp object is unencrypted and data is correct
+ tempGet, err := client.GetObject(ctx, &s3.GetObjectInput{
+ Bucket: aws.String(tempBucket),
+ Key: aws.String(objectKey),
+ })
+ require.NoError(t, err, "Failed to GET temp object")
+ tempData, err := io.ReadAll(tempGet.Body)
+ tempGet.Body.Close()
+ require.NoError(t, err, "Failed to read temp object")
+ assertDataEqual(t, largeTestData, tempData, "Temp object data mismatch after decrypt")
+
+ // Step 3: Copy from temp bucket to dest bucket (with default encryption)
+ // This exercises chunk-by-chunk encrypt copy
+ _, err = client.CopyObject(ctx, &s3.CopyObjectInput{
+ Bucket: aws.String(dstBucket),
+ Key: aws.String(objectKey),
+ CopySource: aws.String(fmt.Sprintf("%s/%s", tempBucket, objectKey)),
+ })
+ require.NoError(t, err, "Failed to copy large file to destination bucket")
+
+ // Verify destination object is encrypted
+ dstHead, err := client.HeadObject(ctx, &s3.HeadObjectInput{
+ Bucket: aws.String(dstBucket),
+ Key: aws.String(objectKey),
+ })
+ require.NoError(t, err, "Failed to HEAD destination object")
+ assert.Equal(t, types.ServerSideEncryptionAes256, dstHead.ServerSideEncryption,
+ "Destination object should be SSE-S3 encrypted via bucket default")
+
+ // Verify destination object content is correct after re-encryption
+ dstGet, err := client.GetObject(ctx, &s3.GetObjectInput{
+ Bucket: aws.String(dstBucket),
+ Key: aws.String(objectKey),
+ })
+ require.NoError(t, err, "Failed to GET destination object")
+ dstData, err := io.ReadAll(dstGet.Body)
+ dstGet.Body.Close()
+ require.NoError(t, err, "Failed to read destination object")
+ assertDataEqual(t, largeTestData, dstData, "Large file data mismatch after re-encryption")
+ })
}
// REGRESSION TESTS FOR CRITICAL BUGS FIXED