aboutsummaryrefslogtreecommitdiff
path: root/weed/operation/upload_chunked.go
diff options
context:
space:
mode:
Diffstat (limited to 'weed/operation/upload_chunked.go')
-rw-r--r--weed/operation/upload_chunked.go267
1 files changed, 267 insertions, 0 deletions
diff --git a/weed/operation/upload_chunked.go b/weed/operation/upload_chunked.go
new file mode 100644
index 000000000..352b329f8
--- /dev/null
+++ b/weed/operation/upload_chunked.go
@@ -0,0 +1,267 @@
+package operation
+
+import (
+ "bytes"
+ "context"
+ "crypto/md5"
+ "fmt"
+ "hash"
+ "io"
+ "sort"
+ "sync"
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/weed/glog"
+ "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
+ "github.com/seaweedfs/seaweedfs/weed/security"
+)
+
+// ChunkedUploadResult contains the result of a chunked upload
+type ChunkedUploadResult struct {
+ FileChunks []*filer_pb.FileChunk
+ Md5Hash hash.Hash
+ TotalSize int64
+ SmallContent []byte // For files smaller than threshold
+}
+
+// ChunkedUploadOption contains options for chunked uploads
+type ChunkedUploadOption struct {
+ ChunkSize int32
+ SmallFileLimit int64
+ Collection string
+ Replication string
+ DataCenter string
+ SaveSmallInline bool
+ Jwt security.EncodedJwt
+ MimeType string
+ AssignFunc func(ctx context.Context, count int) (*VolumeAssignRequest, *AssignResult, error)
+ UploadFunc func(ctx context.Context, data []byte, option *UploadOption) (*UploadResult, error) // Optional: for testing
+}
+
+var chunkBufferPool = sync.Pool{
+ New: func() interface{} {
+ return new(bytes.Buffer)
+ },
+}
+
+// UploadReaderInChunks reads from reader and uploads in chunks to volume servers
+// This prevents OOM by processing the stream in fixed-size chunks
+// Returns file chunks, MD5 hash, total size, and any small content stored inline
+func UploadReaderInChunks(ctx context.Context, reader io.Reader, opt *ChunkedUploadOption) (*ChunkedUploadResult, error) {
+
+ md5Hash := md5.New()
+ var partReader = io.TeeReader(reader, md5Hash)
+
+ var fileChunks []*filer_pb.FileChunk
+ var fileChunksLock sync.Mutex
+ var uploadErr error
+ var uploadErrLock sync.Mutex
+ var chunkOffset int64 = 0
+
+ var wg sync.WaitGroup
+ const bytesBufferCounter = 4
+ bytesBufferLimitChan := make(chan struct{}, bytesBufferCounter)
+
+uploadLoop:
+ for {
+ // Throttle buffer usage
+ bytesBufferLimitChan <- struct{}{}
+
+ // Check for errors from parallel uploads
+ uploadErrLock.Lock()
+ if uploadErr != nil {
+ <-bytesBufferLimitChan
+ uploadErrLock.Unlock()
+ break
+ }
+ uploadErrLock.Unlock()
+
+ // Check for context cancellation
+ select {
+ case <-ctx.Done():
+ <-bytesBufferLimitChan
+ uploadErrLock.Lock()
+ if uploadErr == nil {
+ uploadErr = ctx.Err()
+ }
+ uploadErrLock.Unlock()
+ break uploadLoop
+ default:
+ }
+
+ // Get buffer from pool
+ bytesBuffer := chunkBufferPool.Get().(*bytes.Buffer)
+ limitedReader := io.LimitReader(partReader, int64(opt.ChunkSize))
+ bytesBuffer.Reset()
+
+ // Read one chunk
+ dataSize, err := bytesBuffer.ReadFrom(limitedReader)
+ if err != nil {
+ glog.V(2).Infof("UploadReaderInChunks: read error at offset %d: %v", chunkOffset, err)
+ chunkBufferPool.Put(bytesBuffer)
+ <-bytesBufferLimitChan
+ uploadErrLock.Lock()
+ if uploadErr == nil {
+ uploadErr = err
+ }
+ uploadErrLock.Unlock()
+ break
+ }
+ // If no data was read, we've reached EOF
+ // Only break if we've already read some data (chunkOffset > 0) or if this is truly EOF
+ if dataSize == 0 {
+ if chunkOffset == 0 {
+ glog.Warningf("UploadReaderInChunks: received 0 bytes on first read - creating empty file")
+ }
+ chunkBufferPool.Put(bytesBuffer)
+ <-bytesBufferLimitChan
+ // If we've already read some chunks, this is normal EOF
+ // If we haven't read anything yet (chunkOffset == 0), this could be an empty file
+ // which is valid (e.g., touch command creates 0-byte files)
+ break
+ }
+
+ // For small files at offset 0, store inline instead of uploading
+ if chunkOffset == 0 && opt.SaveSmallInline && dataSize < opt.SmallFileLimit {
+ smallContent := make([]byte, dataSize)
+ n, readErr := io.ReadFull(bytesBuffer, smallContent)
+ chunkBufferPool.Put(bytesBuffer)
+ <-bytesBufferLimitChan
+
+ if readErr != nil {
+ return nil, fmt.Errorf("failed to read small content: read %d of %d bytes: %w", n, dataSize, readErr)
+ }
+
+ return &ChunkedUploadResult{
+ FileChunks: nil,
+ Md5Hash: md5Hash,
+ TotalSize: dataSize,
+ SmallContent: smallContent,
+ }, nil
+ }
+
+ // Upload chunk in parallel goroutine
+ wg.Add(1)
+ go func(offset int64, buf *bytes.Buffer) {
+ defer func() {
+ chunkBufferPool.Put(buf)
+ <-bytesBufferLimitChan
+ wg.Done()
+ }()
+
+ // Assign volume for this chunk
+ _, assignResult, assignErr := opt.AssignFunc(ctx, 1)
+ if assignErr != nil {
+ uploadErrLock.Lock()
+ if uploadErr == nil {
+ uploadErr = fmt.Errorf("assign volume: %w", assignErr)
+ }
+ uploadErrLock.Unlock()
+ return
+ }
+
+ // Upload chunk data
+ uploadUrl := fmt.Sprintf("http://%s/%s", assignResult.Url, assignResult.Fid)
+
+ // Use per-assignment JWT if present, otherwise fall back to the original JWT
+ // This is critical for secured clusters where each volume assignment has its own JWT
+ jwt := opt.Jwt
+ if assignResult.Auth != "" {
+ jwt = assignResult.Auth
+ }
+
+ uploadOption := &UploadOption{
+ UploadUrl: uploadUrl,
+ Cipher: false,
+ IsInputCompressed: false,
+ MimeType: opt.MimeType,
+ PairMap: nil,
+ Jwt: jwt,
+ }
+
+ var uploadResult *UploadResult
+ var uploadResultErr error
+
+ // Use mock upload function if provided (for testing), otherwise use real uploader
+ if opt.UploadFunc != nil {
+ uploadResult, uploadResultErr = opt.UploadFunc(ctx, buf.Bytes(), uploadOption)
+ } else {
+ uploader, uploaderErr := NewUploader()
+ if uploaderErr != nil {
+ uploadErrLock.Lock()
+ if uploadErr == nil {
+ uploadErr = fmt.Errorf("create uploader: %w", uploaderErr)
+ }
+ uploadErrLock.Unlock()
+ return
+ }
+ uploadResult, uploadResultErr = uploader.UploadData(ctx, buf.Bytes(), uploadOption)
+ }
+
+ if uploadResultErr != nil {
+ uploadErrLock.Lock()
+ if uploadErr == nil {
+ uploadErr = fmt.Errorf("upload chunk: %w", uploadResultErr)
+ }
+ uploadErrLock.Unlock()
+ return
+ }
+
+ // Create chunk entry
+ // Set ModifiedTsNs to current time (nanoseconds) to track when upload completed
+ // This is critical for multipart uploads where the same part may be uploaded multiple times
+ // The part with the latest ModifiedTsNs is selected as the authoritative version
+ fid, _ := filer_pb.ToFileIdObject(assignResult.Fid)
+ chunk := &filer_pb.FileChunk{
+ FileId: assignResult.Fid,
+ Offset: offset,
+ Size: uint64(uploadResult.Size),
+ ModifiedTsNs: time.Now().UnixNano(),
+ ETag: uploadResult.ContentMd5,
+ Fid: fid,
+ CipherKey: uploadResult.CipherKey,
+ }
+
+ fileChunksLock.Lock()
+ fileChunks = append(fileChunks, chunk)
+ glog.V(4).Infof("uploaded chunk %d to %s [%d,%d)", len(fileChunks), chunk.FileId, offset, offset+int64(chunk.Size))
+ fileChunksLock.Unlock()
+
+ }(chunkOffset, bytesBuffer)
+
+ // Update offset for next chunk
+ chunkOffset += dataSize
+
+ // If this was a partial chunk, we're done
+ if dataSize < int64(opt.ChunkSize) {
+ break
+ }
+ }
+
+ // Wait for all uploads to complete
+ wg.Wait()
+
+ // Sort chunks by offset (do this even if there's an error, for cleanup purposes)
+ sort.Slice(fileChunks, func(i, j int) bool {
+ return fileChunks[i].Offset < fileChunks[j].Offset
+ })
+
+ // Check for errors - return partial results for cleanup
+ if uploadErr != nil {
+ glog.Errorf("chunked upload failed: %v (returning %d partial chunks for cleanup)", uploadErr, len(fileChunks))
+ // IMPORTANT: Return partial results even on error so caller can cleanup orphaned chunks
+ return &ChunkedUploadResult{
+ FileChunks: fileChunks,
+ Md5Hash: md5Hash,
+ TotalSize: chunkOffset,
+ SmallContent: nil,
+ }, uploadErr
+ }
+
+ return &ChunkedUploadResult{
+ FileChunks: fileChunks,
+ Md5Hash: md5Hash,
+ TotalSize: chunkOffset,
+ SmallContent: nil,
+ }, nil
+}