diff options
| author | Chris Lu <chrislusf@users.noreply.github.com> | 2025-11-21 14:46:32 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-21 14:46:32 -0800 |
| commit | f125a013a8eefd15cc26b01a1a88a45381a772f9 (patch) | |
| tree | 4102feba79ebbdf5b52f66d1005c1f65c9497492 /weed/s3api | |
| parent | a9fefcd22cc7e35afa6c632ea307d1ae28eb7f03 (diff) | |
| download | seaweedfs-f125a013a8eefd15cc26b01a1a88a45381a772f9.tar.xz seaweedfs-f125a013a8eefd15cc26b01a1a88a45381a772f9.zip | |
S3: set identity to request context, and remove obsolete code (#7523)
* list owned buckets
* simplify
* add unit tests
* no-owner buckets
* set identity id
* fallback to request header if iam is not enabled
* refactor to test
* fix comparing
* fix security vulnerability
* Update s3api_bucket_handlers.go
* Update s3api_bucket_handlers.go
* Update s3api_bucket_handlers.go
* set identity to request context
* remove SeaweedFSIsDirectoryKey
* remove obsolete
* simplify
* reuse
* refactor or remove obsolete logic on filer
* Removed the redundant check in GetOrHeadHandler
* surfacing invalid X-Amz-Tagging as a client error
* clean up
* constant
* reuse
* multiple header values
* code reuse
* err on duplicated tag key
Diffstat (limited to 'weed/s3api')
| -rw-r--r-- | weed/s3api/auth_credentials.go | 4 | ||||
| -rw-r--r-- | weed/s3api/s3_constants/header.go | 29 | ||||
| -rw-r--r-- | weed/s3api/s3_metadata_util.go | 94 | ||||
| -rw-r--r-- | weed/s3api/s3api_bucket_handlers.go | 20 | ||||
| -rw-r--r-- | weed/s3api/s3api_object_handlers_multipart.go | 8 | ||||
| -rw-r--r-- | weed/s3api/s3err/audit_fluent.go | 2 |
6 files changed, 141 insertions, 16 deletions
diff --git a/weed/s3api/auth_credentials.go b/weed/s3api/auth_credentials.go index 289fbd556..0d99e43eb 100644 --- a/weed/s3api/auth_credentials.go +++ b/weed/s3api/auth_credentials.go @@ -421,8 +421,10 @@ func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) htt glog.V(3).Infof("auth error: %v", errCode) if errCode == s3err.ErrNone { + // Store the authenticated identity in request context (secure, cannot be spoofed) if identity != nil && identity.Name != "" { - r.Header.Set(s3_constants.AmzIdentityId, identity.Name) + ctx := s3_constants.SetIdentityNameInContext(r.Context(), identity.Name) + r = r.WithContext(ctx) } f(w, r) return diff --git a/weed/s3api/s3_constants/header.go b/weed/s3api/s3_constants/header.go index 1ef6f62c5..a232eb189 100644 --- a/weed/s3api/s3_constants/header.go +++ b/weed/s3api/s3_constants/header.go @@ -17,6 +17,7 @@ package s3_constants import ( + "context" "net/http" "strings" @@ -44,8 +45,6 @@ const ( AmzObjectTaggingDirective = "X-Amz-Tagging-Directive" AmzTagCount = "x-amz-tagging-count" - SeaweedFSIsDirectoryKey = "X-Seaweedfs-Is-Directory-Key" - SeaweedFSPartNumber = "X-Seaweedfs-Part-Number" SeaweedFSUploadId = "X-Seaweedfs-Upload-Id" SeaweedFSMultipartPartsCount = "X-Seaweedfs-Multipart-Parts-Count" SeaweedFSMultipartPartBoundaries = "X-Seaweedfs-Multipart-Part-Boundaries" // JSON: [{part:1,start:0,end:2,etag:"abc"},{part:2,start:2,end:3,etag:"def"}] @@ -174,3 +173,29 @@ var PassThroughHeaders = map[string]string{ func IsSeaweedFSInternalHeader(headerKey string) bool { return strings.HasPrefix(strings.ToLower(headerKey), SeaweedFSInternalPrefix) } + +// Context keys for storing authenticated identity information +type contextKey string + +const ( + contextKeyIdentityName contextKey = "s3-identity-name" +) + +// SetIdentityNameInContext stores the authenticated identity name in the request context +// This is the secure way to propagate identity - headers can be spoofed, context cannot +func SetIdentityNameInContext(ctx context.Context, identityName string) context.Context { + if identityName != "" { + return context.WithValue(ctx, contextKeyIdentityName, identityName) + } + return ctx +} + +// GetIdentityNameFromContext retrieves the authenticated identity name from the request context +// Returns empty string if no identity is set (unauthenticated request) +// This is the secure way to retrieve identity - never read from headers directly +func GetIdentityNameFromContext(r *http.Request) string { + if name, ok := r.Context().Value(contextKeyIdentityName).(string); ok { + return name + } + return "" +} diff --git a/weed/s3api/s3_metadata_util.go b/weed/s3api/s3_metadata_util.go new file mode 100644 index 000000000..37363752a --- /dev/null +++ b/weed/s3api/s3_metadata_util.go @@ -0,0 +1,94 @@ +package s3api + +import ( + "net/http" + "net/url" + "strings" + + "github.com/seaweedfs/seaweedfs/weed/glog" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3err" +) + +// ParseS3Metadata extracts S3-specific metadata from HTTP request headers +// This includes: storage class, tags, user metadata, SSE headers, and ACL headers +// Used by S3 API handlers to prepare metadata before saving to filer +// Returns an S3 error code if tag parsing fails +func ParseS3Metadata(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte, errCode s3err.ErrorCode) { + metadata = make(map[string][]byte) + + // Copy existing metadata unless replacing + if !isReplace { + for k, v := range existing { + metadata[k] = v + } + } + + // Storage class + if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" { + metadata[s3_constants.AmzStorageClass] = []byte(sc) + } + + // Content-Encoding (standard HTTP header used by S3) + if ce := r.Header.Get("Content-Encoding"); ce != "" { + metadata["Content-Encoding"] = []byte(ce) + } + + // Object tagging + if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" { + // Use url.ParseQuery for robust parsing and automatic URL decoding + parsedTags, err := url.ParseQuery(tags) + if err != nil { + // Return proper S3 error instead of silently dropping tags + glog.Warningf("Invalid S3 tag format in header '%s': %v", tags, err) + return nil, s3err.ErrInvalidTag + } + + // Validate: S3 spec does not allow duplicate tag keys + for key, values := range parsedTags { + if len(values) > 1 { + glog.Warningf("Duplicate tag key '%s' in header '%s'", key, tags) + return nil, s3err.ErrInvalidTag + } + // Tag value can be an empty string but not nil + value := "" + if len(values) > 0 { + value = values[0] + } + metadata[s3_constants.AmzObjectTagging+"-"+key] = []byte(value) + } + } + + // User-defined metadata (x-amz-meta-* headers) + for header, values := range r.Header { + if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) { + // Go's HTTP server canonicalizes headers (e.g., x-amz-meta-foo → X-Amz-Meta-Foo) + // Per HTTP and S3 spec: multiple header values are concatenated with commas + // This ensures no metadata is lost when clients send duplicate header names + metadata[header] = []byte(strings.Join(values, ",")) + } + } + + // SSE-C headers + if algorithm := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerAlgorithm); algorithm != "" { + metadata[s3_constants.AmzServerSideEncryptionCustomerAlgorithm] = []byte(algorithm) + } + if keyMD5 := r.Header.Get(s3_constants.AmzServerSideEncryptionCustomerKeyMD5); keyMD5 != "" { + // Store as-is; SSE-C MD5 is base64 and case-sensitive + metadata[s3_constants.AmzServerSideEncryptionCustomerKeyMD5] = []byte(keyMD5) + } + + // ACL owner + acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey) + if len(acpOwner) > 0 { + metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner) + } + + // ACL grants + acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey) + if len(acpGrants) > 0 { + metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants) + } + + return metadata, s3err.ErrNone +} diff --git a/weed/s3api/s3api_bucket_handlers.go b/weed/s3api/s3api_bucket_handlers.go index 4222c911e..eaff6d442 100644 --- a/weed/s3api/s3api_bucket_handlers.go +++ b/weed/s3api/s3api_bucket_handlers.go @@ -59,12 +59,9 @@ func (s3a *S3ApiServer) ListBucketsHandler(w http.ResponseWriter, r *http.Reques return } - identityId := "" - if identity != nil { - identityId = identity.Name - } - // Note: For unauthenticated requests, identityId remains empty. - // We never read from request headers to prevent reflecting unvalidated user input. + // Get authenticated identity from context (secure, cannot be spoofed) + // For unauthenticated requests, this returns empty string + identityId := s3_constants.GetIdentityNameFromContext(r) var listBuckets ListAllMyBucketsList for _, entry := range entries { @@ -164,7 +161,8 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request) } // Check if bucket already exists and handle ownership/settings - currentIdentityId := r.Header.Get(s3_constants.AmzIdentityId) + // Get authenticated identity from context (secure, cannot be spoofed) + currentIdentityId := s3_constants.GetIdentityNameFromContext(r) // Check collection existence first collectionExists := false @@ -247,11 +245,12 @@ func (s3a *S3ApiServer) PutBucketHandler(w http.ResponseWriter, r *http.Request) } fn := func(entry *filer_pb.Entry) { - if identityId := r.Header.Get(s3_constants.AmzIdentityId); identityId != "" { + // Reuse currentIdentityId from above (already retrieved from context) + if currentIdentityId != "" { if entry.Extended == nil { entry.Extended = make(map[string][]byte) } - entry.Extended[s3_constants.AmzIdentityId] = []byte(identityId) + entry.Extended[s3_constants.AmzIdentityId] = []byte(currentIdentityId) } } @@ -576,7 +575,8 @@ func (s3a *S3ApiServer) hasAccess(r *http.Request, entry *filer_pb.Entry) bool { return true } - identityId := r.Header.Get(s3_constants.AmzIdentityId) + // Get authenticated identity from context (secure, cannot be spoofed) + identityId := s3_constants.GetIdentityNameFromContext(r) if id, ok := entry.Extended[s3_constants.AmzIdentityId]; ok { if identityId != string(id) { glog.V(3).Infof("hasAccess: %s != %s (entry.Extended = %v)", identityId, id, entry.Extended) diff --git a/weed/s3api/s3api_object_handlers_multipart.go b/weed/s3api/s3api_object_handlers_multipart.go index 3ea709b31..ba9886d66 100644 --- a/weed/s3api/s3api_object_handlers_multipart.go +++ b/weed/s3api/s3api_object_handlers_multipart.go @@ -20,7 +20,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/seaweedfs/seaweedfs/weed/s3api/s3err" - weed_server "github.com/seaweedfs/seaweedfs/weed/server" stats_collect "github.com/seaweedfs/seaweedfs/weed/stats" ) @@ -65,7 +64,12 @@ func (s3a *S3ApiServer) NewMultipartUploadHandler(w http.ResponseWriter, r *http Metadata: make(map[string]*string), } - metadata := weed_server.SaveAmzMetaData(r, nil, false) + // Parse S3 metadata from request headers + metadata, errCode := ParseS3Metadata(r, nil, false) + if errCode != s3err.ErrNone { + s3err.WriteErrorResponse(w, r, errCode) + return + } for k, v := range metadata { createMultipartUploadInput.Metadata[k] = aws.String(string(v)) } diff --git a/weed/s3api/s3err/audit_fluent.go b/weed/s3api/s3err/audit_fluent.go index ef2459eac..5d617ce1c 100644 --- a/weed/s3api/s3err/audit_fluent.go +++ b/weed/s3api/s3err/audit_fluent.go @@ -152,7 +152,7 @@ func GetAccessLog(r *http.Request, HTTPStatusCode int, s3errCode ErrorCode) *Acc HostHeader: hostHeader, RequestID: r.Header.Get("X-Request-ID"), RemoteIP: remoteIP, - Requester: r.Header.Get(s3_constants.AmzIdentityId), + Requester: s3_constants.GetIdentityNameFromContext(r), // Get from context, not header (secure) SignatureVersion: r.Header.Get(s3_constants.AmzAuthType), UserAgent: r.Header.Get("user-agent"), HostId: hostname, |
