aboutsummaryrefslogtreecommitdiff
path: root/weed/s3api
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-12-01 17:41:42 -0800
committerGitHub <noreply@github.com>2025-12-01 17:41:42 -0800
commit099a351f3b084454f0d437b2d967c0f4a3bb9e1f (patch)
tree61428c436867a8ccc21335fdbba7c1285dc28276 /weed/s3api
parent1a67e6118ef5953bfd98d29d8c0b59a53f5484ff (diff)
downloadseaweedfs-099a351f3b084454f0d437b2d967c0f4a3bb9e1f.tar.xz
seaweedfs-099a351f3b084454f0d437b2d967c0f4a3bb9e1f.zip
Fix issue #6847: S3 chunked encoding includes headers in stored content (#7595)
* Fix issue #6847: S3 chunked encoding includes headers in stored content - Add hasTrailer flag to s3ChunkedReader to track trailer presence - Update state transition logic to properly handle trailers in unsigned streaming - Enhance parseChunkChecksum to handle multiple trailer lines - Skip checksum verification for unsigned streaming uploads - Add test case for mixed format handling (unsigned headers with signed chunks) - Remove redundant CRLF reading in trailer processing This fixes the issue where chunk-signature and x-amz headers were appearing in stored file content when using chunked encoding with newer AWS SDKs. * Fix checksum validation for unsigned streaming uploads - Always validate checksum for data integrity regardless of signing - Correct checksum value in test case - Addresses PR review feedback about checksum verification * Add warning log when multiple checksum headers found in trailer - Log a warning when multiple valid checksum headers appear in trailers - Uses last checksum header as suggested by CodeRabbit reviewer - Improves debugging for edge cases with multiple checksum algorithms * Improve trailer parsing robustness in parseChunkChecksum - Remove redundant trimTrailingWhitespace call since readChunkLine already trims - Use bytes.TrimSpace for both key and value to handle whitespace around colon separator - Follows HTTP header specifications for optional whitespace around separators - Addresses Gemini Code Assist review feedback
Diffstat (limited to 'weed/s3api')
-rw-r--r--weed/s3api/chunked_reader_v4.go77
1 files changed, 38 insertions, 39 deletions
diff --git a/weed/s3api/chunked_reader_v4.go b/weed/s3api/chunked_reader_v4.go
index c21b57009..f841c3e1e 100644
--- a/weed/s3api/chunked_reader_v4.go
+++ b/weed/s3api/chunked_reader_v4.go
@@ -116,6 +116,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
}
checkSumWriter := getCheckSumWriter(checksumAlgorithm)
+ hasTrailer := amzTrailerHeader != ""
return &s3ChunkedReader{
cred: credential,
@@ -129,6 +130,7 @@ func (iam *IdentityAccessManagement) newChunkedReader(req *http.Request) (io.Rea
checkSumWriter: checkSumWriter,
state: readChunkHeader,
iam: iam,
+ hasTrailer: hasTrailer,
}, s3err.ErrNone
}
@@ -170,6 +172,7 @@ type s3ChunkedReader struct {
n uint64 // Unread bytes in chunk
err error
iam *IdentityAccessManagement
+ hasTrailer bool
}
// Read chunk reads the chunk token signature portion.
@@ -281,10 +284,10 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
}
// If we're using unsigned streaming upload, there is no signature to verify at each chunk.
- if cr.chunkSignature != "" {
- cr.state = verifyChunk
- } else if cr.lastChunk {
+ if cr.lastChunk && cr.hasTrailer {
cr.state = readTrailerChunk
+ } else if cr.chunkSignature != "" {
+ cr.state = verifyChunk
} else {
cr.state = readChunkHeader
}
@@ -304,7 +307,11 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
// This implementation currently only supports the first case.
// TODO: Implement the second case (signed upload with additional checksum computation for each chunk)
- extractedCheckSumAlgorithm, extractedChecksum := parseChunkChecksum(cr.reader)
+ extractedCheckSumAlgorithm, extractedChecksum, err := parseChunkChecksum(cr.reader)
+ if err != nil {
+ cr.err = err
+ return 0, err
+ }
if extractedCheckSumAlgorithm.String() != cr.checkSumAlgorithm {
errorMessage := fmt.Sprintf("checksum algorithm in trailer '%s' does not match the one advertised in the header '%s'", extractedCheckSumAlgorithm.String(), cr.checkSumAlgorithm)
@@ -313,6 +320,7 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
return 0, cr.err
}
+ // Validate checksum for data integrity (required for both signed and unsigned streaming with trailers)
computedChecksum := cr.checkSumWriter.Sum(nil)
base64Checksum := base64.StdEncoding.EncodeToString(computedChecksum)
if string(extractedChecksum) != base64Checksum {
@@ -324,11 +332,6 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) {
// TODO: Extract signature from trailer chunk and verify it.
// For now, we just read the trailer chunk and discard it.
- // Reading remaining CRLF.
- for i := 0; i < 2; i++ {
- cr.err = readCRLF(cr.reader)
- }
-
cr.state = eofChunk
case readChunk:
@@ -506,41 +509,37 @@ func parseS3ChunkExtension(buf []byte) ([]byte, []byte) {
return buf[:semi], parseChunkSignature(buf[semi:])
}
-func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte) {
- // When using unsigned upload, this would be the raw contents of the trailer chunk:
- //
- // x-amz-checksum-crc32:YABb/g==\n\r\n\r\n // Trailer chunk (note optional \n character)
- // \r\n // CRLF
- //
- // When using signed upload with an additional checksum algorithm, this would be the raw contents of the trailer chunk:
- //
- // x-amz-checksum-crc32:YABb/g==\n\r\n // Trailer chunk (note optional \n character)
- // trailer-signature\r\n
- // \r\n // CRLF
- //
-
- // x-amz-checksum-crc32:YABb/g==\n
- bytesRead, err := readChunkLine(b)
- if err != nil {
- return ChecksumAlgorithmNone, nil
- }
+func parseChunkChecksum(b *bufio.Reader) (ChecksumAlgorithm, []byte, error) {
+ // Read trailer lines until empty line
+ var checksumAlgorithm ChecksumAlgorithm
+ var checksum []byte
- // Split on ':'
- parts := bytes.SplitN(bytesRead, []byte(":"), 2)
- checksumKey := string(parts[0])
- checksumValue := parts[1]
+ for {
+ bytesRead, err := readChunkLine(b)
+ if err != nil {
+ return ChecksumAlgorithmNone, nil, err
+ }
- // Discard all trailing whitespace characters
- checksumValue = trimTrailingWhitespace(checksumValue)
+ if len(bytesRead) == 0 {
+ break
+ }
- // If the checksum key is not a supported checksum algorithm, return an error.
- // TODO: Bubble that error up to the caller
- extractedAlgorithm, err := extractChecksumAlgorithm(checksumKey)
- if err != nil {
- return ChecksumAlgorithmNone, nil
+ parts := bytes.SplitN(bytesRead, []byte(":"), 2)
+ if len(parts) == 2 {
+ key := string(bytes.TrimSpace(parts[0]))
+ value := bytes.TrimSpace(parts[1])
+ if alg, err := extractChecksumAlgorithm(key); err == nil {
+ if checksumAlgorithm != ChecksumAlgorithmNone {
+ glog.V(3).Infof("multiple checksum headers found in trailer, using last: %s", key)
+ }
+ checksumAlgorithm = alg
+ checksum = value
+ }
+ // Ignore other trailer headers like x-amz-trailer-signature
+ }
}
- return extractedAlgorithm, checksumValue
+ return checksumAlgorithm, checksum, nil
}
func parseChunkSignature(chunk []byte) []byte {