aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api/s3api_streaming_copy.go
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-12-02 09:24:31 -0800
committerGitHub <noreply@github.com>2025-12-02 09:24:31 -0800
commit733ca8e6df3de57e2dcc0923fb2f166d3222921d (patch)
tree89fe56d8a72775ba40afb2c2066464f9995dbf61 /weed/s3api/s3api_streaming_copy.go
parent099a351f3b084454f0d437b2d967c0f4a3bb9e1f (diff)
downloadseaweedfs-733ca8e6df3de57e2dcc0923fb2f166d3222921d.tar.xz
seaweedfs-733ca8e6df3de57e2dcc0923fb2f166d3222921d.zip
Fix SSE-S3 copy: preserve encryption metadata and set chunk SSE type (#7598)
* Fix SSE-S3 copy: preserve encryption metadata and set chunk SSE type Fixes GitHub #7562: Copying objects between encrypted buckets was failing. Root causes: 1. processMetadataBytes was re-adding SSE headers from source entry, undoing the encryption header filtering. Now uses dstEntry.Extended which is already filtered. 2. SSE-S3 streaming copy returned nil metadata. Now properly generates and returns SSE-S3 destination metadata (SeaweedFSSSES3Key, AES256 header) via ExecuteStreamingCopyWithMetadata. 3. Chunks created during streaming copy didn't have SseType set. Now sets SseType and per-chunk SseMetadata with chunk-specific IVs for SSE-S3, enabling proper decryption on GetObject. * Address review: make SSE-S3 metadata serialization failures fatal errors - In executeEncryptCopy: return error instead of just logging if SerializeSSES3Metadata fails - In createChunkFromData: return error if chunk SSE-S3 metadata serialization fails This ensures objects/chunks are never created without proper encryption metadata, preventing unreadable/corrupted data. * fmt * Refactor: reuse function names instead of creating WithMetadata variants - Change ExecuteStreamingCopy to return (*EncryptionSpec, error) directly - Remove ExecuteStreamingCopyWithMetadata wrapper - Change executeStreamingReencryptCopy to return (*EncryptionSpec, error) - Remove executeStreamingReencryptCopyWithMetadata wrapper - Update callers to ignore encryption spec with _ where not needed * Add TODO documenting large file SSE-S3 copy limitation The streaming copy approach encrypts the entire stream with a single IV but stores data in chunks with per-chunk IVs. This causes decryption issues for large files. Small inline files work correctly. This is a known architectural issue that needs separate work to fix. * Use chunk-by-chunk encryption for SSE-S3 copy (consistent with SSE-C/SSE-KMS) Instead of streaming encryption (which had IV mismatch issues for multi-chunk files), SSE-S3 now uses the same chunk-by-chunk approach as SSE-C and SSE-KMS: 1. Extended copyMultipartCrossEncryption to handle SSE-S3: - Added SSE-S3 source decryption in copyCrossEncryptionChunk - Added SSE-S3 destination encryption with per-chunk IVs - Added object-level metadata generation for SSE-S3 destinations 2. Updated routing in executeEncryptCopy/executeDecryptCopy/executeReencryptCopy to use copyMultipartCrossEncryption for all SSE-S3 scenarios 3. Removed streaming copy functions (shouldUseStreamingCopy, executeStreamingReencryptCopy) as they're no longer used 4. Added large file (1MB) integration test to verify chunk-by-chunk copy works This ensures consistent behavior across all SSE types and fixes data corruption that occurred with large files in the streaming copy approach. * fmt * fmt * Address review: fail explicitly if SSE-S3 metadata is missing Instead of silently ignoring missing SSE-S3 metadata (which could create unreadable objects), now explicitly fail the copy operation with a clear error message if: - First chunk is missing - First chunk doesn't have SSE-S3 type - First chunk has empty SSE metadata - Deserialization fails * Address review: improve comment to reflect full scope of chunk creation * Address review: fail explicitly if baseIV is empty for SSE-S3 chunk encryption If DestinationIV is not set when encrypting SSE-S3 chunks, the chunk would be created without SseMetadata, causing GetObject decryption to fail later. Now fails explicitly with a clear error message. Note: calculateIVWithOffset returns ([]byte, int) not ([]byte, error) - the int is a skip amount for intra-block alignment, not an error code. * Address review: handle 0-byte files in SSE-S3 copy For 0-byte files, there are no chunks to get metadata from. Generate an IV for the object-level metadata to ensure even empty files are properly marked as SSE-S3 encrypted. Also validate that we don't have a non-empty file with no chunks (which would indicate an internal error).
Diffstat (limited to 'weed/s3api/s3api_streaming_copy.go')
-rw-r--r--weed/s3api/s3api_streaming_copy.go60
1 files changed, 51 insertions, 9 deletions
diff --git a/weed/s3api/s3api_streaming_copy.go b/weed/s3api/s3api_streaming_copy.go
index 457986858..94729c003 100644
--- a/weed/s3api/s3api_streaming_copy.go
+++ b/weed/s3api/s3api_streaming_copy.go
@@ -59,18 +59,19 @@ func NewStreamingCopyManager(s3a *S3ApiServer) *StreamingCopyManager {
}
}
-// ExecuteStreamingCopy performs a streaming copy operation
-func (scm *StreamingCopyManager) ExecuteStreamingCopy(ctx context.Context, entry *filer_pb.Entry, r *http.Request, dstPath string, state *EncryptionState) ([]*filer_pb.FileChunk, error) {
+// ExecuteStreamingCopy performs a streaming copy operation and returns the encryption spec
+// The encryption spec is needed for SSE-S3 to properly set destination metadata (fixes GitHub #7562)
+func (scm *StreamingCopyManager) ExecuteStreamingCopy(ctx context.Context, entry *filer_pb.Entry, r *http.Request, dstPath string, state *EncryptionState) ([]*filer_pb.FileChunk, *EncryptionSpec, error) {
// Create streaming copy specification
spec, err := scm.createStreamingSpec(entry, r, state)
if err != nil {
- return nil, fmt.Errorf("create streaming spec: %w", err)
+ return nil, nil, fmt.Errorf("create streaming spec: %w", err)
}
// Create source reader from entry
sourceReader, err := scm.createSourceReader(entry)
if err != nil {
- return nil, fmt.Errorf("create source reader: %w", err)
+ return nil, nil, fmt.Errorf("create source reader: %w", err)
}
defer sourceReader.Close()
@@ -79,11 +80,16 @@ func (scm *StreamingCopyManager) ExecuteStreamingCopy(ctx context.Context, entry
// Create processing pipeline
processedReader, err := scm.createProcessingPipeline(spec)
if err != nil {
- return nil, fmt.Errorf("create processing pipeline: %w", err)
+ return nil, nil, fmt.Errorf("create processing pipeline: %w", err)
}
// Stream to destination
- return scm.streamToDestination(ctx, processedReader, spec, dstPath)
+ chunks, err := scm.streamToDestination(ctx, processedReader, spec, dstPath)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return chunks, spec.EncryptionSpec, nil
}
// createStreamingSpec creates a streaming specification based on copy parameters
@@ -453,8 +459,8 @@ func (scm *StreamingCopyManager) streamToChunks(ctx context.Context, reader io.R
for {
n, err := reader.Read(buffer)
if n > 0 {
- // Create chunk for this data
- chunk, chunkErr := scm.createChunkFromData(buffer[:n], offset, dstPath)
+ // Create chunk for this data, setting SSE type and per-chunk metadata (including chunk-specific IVs for SSE-S3)
+ chunk, chunkErr := scm.createChunkFromData(buffer[:n], offset, dstPath, spec.EncryptionSpec)
if chunkErr != nil {
return nil, fmt.Errorf("create chunk from data: %w", chunkErr)
}
@@ -474,7 +480,7 @@ func (scm *StreamingCopyManager) streamToChunks(ctx context.Context, reader io.R
}
// createChunkFromData creates a chunk from streaming data
-func (scm *StreamingCopyManager) createChunkFromData(data []byte, offset int64, dstPath string) (*filer_pb.FileChunk, error) {
+func (scm *StreamingCopyManager) createChunkFromData(data []byte, offset int64, dstPath string, encSpec *EncryptionSpec) (*filer_pb.FileChunk, error) {
// Assign new volume
assignResult, err := scm.s3a.assignNewVolume(dstPath)
if err != nil {
@@ -487,6 +493,42 @@ func (scm *StreamingCopyManager) createChunkFromData(data []byte, offset int64,
Size: uint64(len(data)),
}
+ // Set SSE type and metadata on chunk if destination is encrypted
+ // This is critical for GetObject to know to decrypt the data - fixes GitHub #7562
+ if encSpec != nil && encSpec.NeedsEncryption {
+ switch encSpec.DestinationType {
+ case EncryptionTypeSSEC:
+ chunk.SseType = filer_pb.SSEType_SSE_C
+ // SSE-C metadata is handled at object level, not per-chunk for streaming copy
+ case EncryptionTypeSSEKMS:
+ chunk.SseType = filer_pb.SSEType_SSE_KMS
+ // SSE-KMS metadata is handled at object level, not per-chunk for streaming copy
+ case EncryptionTypeSSES3:
+ chunk.SseType = filer_pb.SSEType_SSE_S3
+ // Create per-chunk SSE-S3 metadata with chunk-specific IV
+ if sseKey, ok := encSpec.DestinationKey.(*SSES3Key); ok {
+ // Calculate chunk-specific IV using base IV and chunk offset
+ baseIV := encSpec.DestinationIV
+ if len(baseIV) == 0 {
+ return nil, fmt.Errorf("SSE-S3 encryption requires DestinationIV to be set for chunk at offset %d", offset)
+ }
+ chunkIV, _ := calculateIVWithOffset(baseIV, offset)
+ // Create chunk key with the chunk-specific IV
+ chunkSSEKey := &SSES3Key{
+ Key: sseKey.Key,
+ KeyID: sseKey.KeyID,
+ Algorithm: sseKey.Algorithm,
+ IV: chunkIV,
+ }
+ chunkMetadata, serErr := SerializeSSES3Metadata(chunkSSEKey)
+ if serErr != nil {
+ return nil, fmt.Errorf("failed to serialize chunk SSE-S3 metadata: %w", serErr)
+ }
+ chunk.SseMetadata = chunkMetadata
+ }
+ }
+ }
+
// Set file ID
if err := scm.s3a.setChunkFileId(chunk, assignResult); err != nil {
return nil, err