diff options
| author | Chris Lu <chrislusf@users.noreply.github.com> | 2023-01-02 23:20:45 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-01-02 23:20:45 -0800 |
| commit | d4566d4aaa426b33015780c7cc18f887fc07cca4 (patch) | |
| tree | 7c3b5cb3d9e54297b9d4213b67408f86149013f7 /weed/filer/stream.go | |
| parent | 367353b936c450906e88e850c7d1e804f97c3560 (diff) | |
| download | seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.tar.xz seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.zip | |
more solid weed mount (#4089)
* compare chunks by timestamp
* fix slab clearing error
* fix test compilation
* move oldest chunk to sealed, instead of by fullness
* lock on fh.entryViewCache
* remove verbose logs
* revert slat clearing
* less logs
* less logs
* track write and read by timestamp
* remove useless logic
* add entry lock on file handle release
* use mem chunk only, swap file chunk has problems
* comment out code that maybe used later
* add debug mode to compare data read and write
* more efficient readResolvedChunks with linked list
* small optimization
* fix test compilation
* minor fix on writer
* add SeparateGarbageChunks
* group chunks into sections
* turn off debug mode
* fix tests
* fix tests
* tmp enable swap file chunk
* Revert "tmp enable swap file chunk"
This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7.
* simple refactoring
* simple refactoring
* do not re-use swap file chunk. Sealed chunks should not be re-used.
* comment out debugging facilities
* either mem chunk or swap file chunk is fine now
* remove orderedMutex as *semaphore.Weighted
not found impactful
* optimize size calculation for changing large files
* optimize performance to avoid going through the long list of chunks
* still problems with swap file chunk
* rename
* tiny optimization
* swap file chunk save only successfully read data
* fix
* enable both mem and swap file chunk
* resolve chunks with range
* rename
* fix chunk interval list
* also change file handle chunk group when adding chunks
* pick in-active chunk with time-decayed counter
* fix compilation
* avoid nil with empty fh.entry
* refactoring
* rename
* rename
* refactor visible intervals to *list.List
* refactor chunkViews to *list.List
* add IntervalList for generic interval list
* change visible interval to use IntervalList in generics
* cahnge chunkViews to *IntervalList[*ChunkView]
* use NewFileChunkSection to create
* rename variables
* refactor
* fix renaming leftover
* renaming
* renaming
* add insert interval
* interval list adds lock
* incrementally add chunks to readers
Fixes:
1. set start and stop offset for the value object
2. clone the value object
3. use pointer instead of copy-by-value when passing to interval.Value
4. use insert interval since adding chunk could be out of order
* fix tests compilation
* fix tests compilation
Diffstat (limited to 'weed/filer/stream.go')
| -rw-r--r-- | weed/filer/stream.go | 90 |
1 files changed, 32 insertions, 58 deletions
diff --git a/weed/filer/stream.go b/weed/filer/stream.go index f28341be4..d49784686 100644 --- a/weed/filer/stream.go +++ b/weed/filer/stream.go @@ -6,7 +6,6 @@ import ( "golang.org/x/exp/slices" "io" "math" - "sort" "strings" "sync" "time" @@ -78,7 +77,8 @@ func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, w fileId2Url := make(map[string][]string) - for _, chunkView := range chunkViews { + for x := chunkViews.Front(); x != nil; x = x.Next { + chunkView := x.Value var urlStrings []string var err error for _, backoff := range getLookupFileIdBackoffSchedule { @@ -102,29 +102,30 @@ func StreamContentWithThrottler(masterClient wdclient.HasLookupFileIdFunction, w downloadThrottler := util.NewWriteThrottler(downloadMaxBytesPs) remaining := size - for _, chunkView := range chunkViews { - if offset < chunkView.LogicOffset { - gap := chunkView.LogicOffset - offset + for x := chunkViews.Front(); x != nil; x = x.Next { + chunkView := x.Value + if offset < chunkView.ViewOffset { + gap := chunkView.ViewOffset - offset remaining -= gap - glog.V(4).Infof("zero [%d,%d)", offset, chunkView.LogicOffset) + glog.V(4).Infof("zero [%d,%d)", offset, chunkView.ViewOffset) err := writeZero(writer, gap) if err != nil { - return fmt.Errorf("write zero [%d,%d)", offset, chunkView.LogicOffset) + return fmt.Errorf("write zero [%d,%d)", offset, chunkView.ViewOffset) } - offset = chunkView.LogicOffset + offset = chunkView.ViewOffset } urlStrings := fileId2Url[chunkView.FileId] start := time.Now() - err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size)) - offset += int64(chunkView.Size) - remaining -= int64(chunkView.Size) + err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize)) + offset += int64(chunkView.ViewSize) + remaining -= int64(chunkView.ViewSize) stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds()) if err != nil { stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc() return fmt.Errorf("read chunk: %v", err) } stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc() - downloadThrottler.MaybeSlowdown(int64(chunkView.Size)) + downloadThrottler.MaybeSlowdown(int64(chunkView.ViewSize)) } if remaining > 0 { glog.V(4).Infof("zero [%d,%d)", offset, offset+remaining) @@ -167,14 +168,15 @@ func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer idx := 0 - for _, chunkView := range chunkViews { + for x := chunkViews.Front(); x != nil; x = x.Next { + chunkView := x.Value urlStrings, err := lookupFileIdFn(chunkView.FileId) if err != nil { glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) return err } - n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.Size)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset) + n, err := retriedFetchChunkData(buffer[idx:idx+int(chunkView.ViewSize)], urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk) if err != nil { return err } @@ -185,7 +187,7 @@ func ReadAll(buffer []byte, masterClient *wdclient.MasterClient, chunks []*filer // ---------------- ChunkStreamReader ---------------------------------- type ChunkStreamReader struct { - chunkViews []*ChunkView + chunkView *Interval[*ChunkView] totalSize int64 logicOffset int64 buffer []byte @@ -201,17 +203,15 @@ var _ = io.ReaderAt(&ChunkStreamReader{}) func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader { chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64) - slices.SortFunc(chunkViews, func(a, b *ChunkView) bool { - return a.LogicOffset < b.LogicOffset - }) var totalSize int64 - for _, chunk := range chunkViews { - totalSize += int64(chunk.Size) + for x := chunkViews.Front(); x != nil; x = x.Next { + chunk := x.Value + totalSize += int64(chunk.ViewSize) } return &ChunkStreamReader{ - chunkViews: chunkViews, + chunkView: chunkViews.Front(), lookupFileId: lookupFileIdFn, totalSize: totalSize, } @@ -290,7 +290,7 @@ func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) { } func insideChunk(offset int64, chunk *ChunkView) bool { - return chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) + return chunk.ViewOffset <= offset && offset < chunk.ViewOffset+int64(chunk.ViewSize) } func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) { @@ -300,48 +300,22 @@ func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) { } // fmt.Printf("fetch for offset %d\n", offset) - - // need to seek to a different chunk - currentChunkIndex := sort.Search(len(c.chunkViews), func(i int) bool { - return offset < c.chunkViews[i].LogicOffset - }) - if currentChunkIndex == len(c.chunkViews) { - // not found - if insideChunk(offset, c.chunkViews[0]) { - // fmt.Printf("select0 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId) - currentChunkIndex = 0 - } else if insideChunk(offset, c.chunkViews[len(c.chunkViews)-1]) { - currentChunkIndex = len(c.chunkViews) - 1 - // fmt.Printf("select last chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId) - } else { - return io.EOF - } - } else if currentChunkIndex > 0 { - if insideChunk(offset, c.chunkViews[currentChunkIndex]) { - // good hit - } else if insideChunk(offset, c.chunkViews[currentChunkIndex-1]) { - currentChunkIndex -= 1 - // fmt.Printf("select -1 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId) - } else { - // glog.Fatalf("unexpected1 offset %d", offset) - return fmt.Errorf("unexpected1 offset %d", offset) - } - } else { - // glog.Fatalf("unexpected2 offset %d", offset) - return fmt.Errorf("unexpected2 offset %d", offset) + c.chunkView = c.chunkView.Next + if c.chunkView == nil { + return io.EOF } // positioning within the new chunk - chunk := c.chunkViews[currentChunkIndex] + chunk := c.chunkView.Value if insideChunk(offset, chunk) { - if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset { + if c.isBufferEmpty() || c.bufferOffset != chunk.ViewOffset { if err = c.fetchChunkToBuffer(chunk); err != nil { return } } } else { - // glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size)) - return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size)) + // glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize)) + return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.ViewOffset, chunk.ViewOffset+int64(chunk.ViewSize)) } return } @@ -355,7 +329,7 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error { var buffer bytes.Buffer var shouldRetry bool for _, urlString := range urlStrings { - shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) { + shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize), func(data []byte) { buffer.Write(data) }) if !shouldRetry { @@ -372,10 +346,10 @@ func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error { return err } c.buffer = buffer.Bytes() - c.bufferOffset = chunkView.LogicOffset + c.bufferOffset = chunkView.ViewOffset c.chunk = chunkView.FileId - // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size)) + // glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.ViewOffset, chunkView.ViewOffset+int64(chunkView.ViewSize)) return nil } |
