diff options
| author | Chris Lu <chrislusf@users.noreply.github.com> | 2023-01-02 23:20:45 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-01-02 23:20:45 -0800 |
| commit | d4566d4aaa426b33015780c7cc18f887fc07cca4 (patch) | |
| tree | 7c3b5cb3d9e54297b9d4213b67408f86149013f7 /weed/filer/filechunks.go | |
| parent | 367353b936c450906e88e850c7d1e804f97c3560 (diff) | |
| download | seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.tar.xz seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.zip | |
more solid weed mount (#4089)
* compare chunks by timestamp
* fix slab clearing error
* fix test compilation
* move oldest chunk to sealed, instead of by fullness
* lock on fh.entryViewCache
* remove verbose logs
* revert slat clearing
* less logs
* less logs
* track write and read by timestamp
* remove useless logic
* add entry lock on file handle release
* use mem chunk only, swap file chunk has problems
* comment out code that maybe used later
* add debug mode to compare data read and write
* more efficient readResolvedChunks with linked list
* small optimization
* fix test compilation
* minor fix on writer
* add SeparateGarbageChunks
* group chunks into sections
* turn off debug mode
* fix tests
* fix tests
* tmp enable swap file chunk
* Revert "tmp enable swap file chunk"
This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7.
* simple refactoring
* simple refactoring
* do not re-use swap file chunk. Sealed chunks should not be re-used.
* comment out debugging facilities
* either mem chunk or swap file chunk is fine now
* remove orderedMutex as *semaphore.Weighted
not found impactful
* optimize size calculation for changing large files
* optimize performance to avoid going through the long list of chunks
* still problems with swap file chunk
* rename
* tiny optimization
* swap file chunk save only successfully read data
* fix
* enable both mem and swap file chunk
* resolve chunks with range
* rename
* fix chunk interval list
* also change file handle chunk group when adding chunks
* pick in-active chunk with time-decayed counter
* fix compilation
* avoid nil with empty fh.entry
* refactoring
* rename
* rename
* refactor visible intervals to *list.List
* refactor chunkViews to *list.List
* add IntervalList for generic interval list
* change visible interval to use IntervalList in generics
* cahnge chunkViews to *IntervalList[*ChunkView]
* use NewFileChunkSection to create
* rename variables
* refactor
* fix renaming leftover
* renaming
* renaming
* add insert interval
* interval list adds lock
* incrementally add chunks to readers
Fixes:
1. set start and stop offset for the value object
2. clone the value object
3. use pointer instead of copy-by-value when passing to interval.Value
4. use insert interval since adding chunk could be out of order
* fix tests compilation
* fix tests compilation
Diffstat (limited to 'weed/filer/filechunks.go')
| -rw-r--r-- | weed/filer/filechunks.go | 248 |
1 files changed, 110 insertions, 138 deletions
diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go index 061e0757a..d872bd22d 100644 --- a/weed/filer/filechunks.go +++ b/weed/filer/filechunks.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "github.com/seaweedfs/seaweedfs/weed/wdclient" - "golang.org/x/exp/slices" "math" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" @@ -42,7 +41,7 @@ func ETag(entry *filer_pb.Entry) (etag string) { } func ETagEntry(entry *Entry) (etag string) { - if entry.IsInRemoteOnly() { + if entry.IsInRemoteOnly() { return entry.Remote.RemoteETag } if entry.Attr.Md5 == nil { @@ -66,8 +65,15 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, 0, math.MaxInt64) + compacted, garbage = SeparateGarbageChunks(visibles, chunks) + + return +} + +func SeparateGarbageChunks(visibles *IntervalList[*VisibleInterval], chunks []*filer_pb.FileChunk) (compacted []*filer_pb.FileChunk, garbage []*filer_pb.FileChunk) { fileIds := make(map[string]bool) - for _, interval := range visibles { + for x := visibles.Front(); x != nil; x = x.Next { + interval := x.Value fileIds[interval.fileId] = true } for _, chunk := range chunks { @@ -77,8 +83,7 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks garbage = append(garbage, chunk) } } - - return + return compacted, garbage } func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) { @@ -131,20 +136,39 @@ func DoMinusChunksBySourceFileId(as, bs []*filer_pb.FileChunk) (delta []*filer_p } type ChunkView struct { - FileId string - Offset int64 - Size uint64 - LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk - ChunkSize uint64 - CipherKey []byte - IsGzipped bool + FileId string + OffsetInChunk int64 // offset within the chunk + ViewSize uint64 + ViewOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk + ChunkSize uint64 + CipherKey []byte + IsGzipped bool + ModifiedTsNs int64 +} + +func (cv *ChunkView) SetStartStop(start, stop int64) { + cv.OffsetInChunk += start - cv.ViewOffset + cv.ViewOffset = start + cv.ViewSize = uint64(stop - start) +} +func (cv *ChunkView) Clone() IntervalValue { + return &ChunkView{ + FileId: cv.FileId, + OffsetInChunk: cv.OffsetInChunk, + ViewSize: cv.ViewSize, + ViewOffset: cv.ViewOffset, + ChunkSize: cv.ChunkSize, + CipherKey: cv.CipherKey, + IsGzipped: cv.IsGzipped, + ModifiedTsNs: cv.ModifiedTsNs, + } } func (cv *ChunkView) IsFullChunk() bool { - return cv.Size == cv.ChunkSize + return cv.ViewSize == cv.ChunkSize } -func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) { +func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) { visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, offset, offset+size) @@ -152,7 +176,7 @@ func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []* } -func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int64) (views []*ChunkView) { +func ViewFromVisibleIntervals(visibles *IntervalList[*VisibleInterval], offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) { stop := offset + size if size == math.MaxInt64 { @@ -162,164 +186,112 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int stop = math.MaxInt64 } - for _, chunk := range visibles { + chunkViews = NewIntervalList[*ChunkView]() + for x := visibles.Front(); x != nil; x = x.Next { + chunk := x.Value chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop) if chunkStart < chunkStop { - views = append(views, &ChunkView{ - FileId: chunk.fileId, - Offset: chunkStart - chunk.start + chunk.chunkOffset, - Size: uint64(chunkStop - chunkStart), - LogicOffset: chunkStart, - ChunkSize: chunk.chunkSize, - CipherKey: chunk.cipherKey, - IsGzipped: chunk.isGzipped, + chunkView := &ChunkView{ + FileId: chunk.fileId, + OffsetInChunk: chunkStart - chunk.start + chunk.offsetInChunk, + ViewSize: uint64(chunkStop - chunkStart), + ViewOffset: chunkStart, + ChunkSize: chunk.chunkSize, + CipherKey: chunk.cipherKey, + IsGzipped: chunk.isGzipped, + ModifiedTsNs: chunk.modifiedTsNs, + } + chunkViews.AppendInterval(&Interval[*ChunkView]{ + StartOffset: chunkStart, + StopOffset: chunkStop, + TsNs: chunk.modifiedTsNs, + Value: chunkView, + Prev: nil, + Next: nil, }) } } - return views + return chunkViews } -func logPrintf(name string, visibles []VisibleInterval) { - - /* - glog.V(0).Infof("%s len %d", name, len(visibles)) - for _, v := range visibles { - glog.V(0).Infof("%s: [%d,%d) %s %d", name, v.start, v.stop, v.fileId, v.chunkOffset) - } - */ -} - -func MergeIntoVisibles(visibles []VisibleInterval, chunk *filer_pb.FileChunk) (newVisibles []VisibleInterval) { - - newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.ModifiedTsNs, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed) - - length := len(visibles) - if length == 0 { - return append(visibles, newV) - } - last := visibles[length-1] - if last.stop <= chunk.Offset { - return append(visibles, newV) +func MergeIntoVisibles(visibles *IntervalList[*VisibleInterval], start int64, stop int64, chunk *filer_pb.FileChunk) { + + newV := &VisibleInterval{ + start: start, + stop: stop, + fileId: chunk.GetFileIdString(), + modifiedTsNs: chunk.ModifiedTsNs, + offsetInChunk: start - chunk.Offset, // the starting position in the chunk + chunkSize: chunk.Size, // size of the chunk + cipherKey: chunk.CipherKey, + isGzipped: chunk.IsCompressed, } - logPrintf(" before", visibles) - // glog.V(0).Infof("newVisibles %d adding chunk [%d,%d) %s size:%d", len(newVisibles), chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Size) - chunkStop := chunk.Offset + int64(chunk.Size) - for _, v := range visibles { - if v.start < chunk.Offset && chunk.Offset < v.stop { - t := newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTsNs, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped) - newVisibles = append(newVisibles, t) - // glog.V(0).Infof("visible %d [%d,%d) =1> [%d,%d)", i, v.start, v.stop, t.start, t.stop) - } - if v.start < chunkStop && chunkStop < v.stop { - t := newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTsNs, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped) - newVisibles = append(newVisibles, t) - // glog.V(0).Infof("visible %d [%d,%d) =2> [%d,%d)", i, v.start, v.stop, t.start, t.stop) - } - if chunkStop <= v.start || v.stop <= chunk.Offset { - newVisibles = append(newVisibles, v) - // glog.V(0).Infof("visible %d [%d,%d) =3> [%d,%d)", i, v.start, v.stop, v.start, v.stop) - } - } - newVisibles = append(newVisibles, newV) - - logPrintf(" append", newVisibles) + visibles.InsertInterval(start, stop, chunk.ModifiedTsNs, newV) +} - for i := len(newVisibles) - 1; i >= 0; i-- { - if i > 0 && newV.start < newVisibles[i-1].start { - newVisibles[i] = newVisibles[i-1] - } else { - newVisibles[i] = newV - break - } +func MergeIntoChunkViews(chunkViews *IntervalList[*ChunkView], start int64, stop int64, chunk *filer_pb.FileChunk) { + + chunkView := &ChunkView{ + FileId: chunk.GetFileIdString(), + OffsetInChunk: start - chunk.Offset, + ViewSize: uint64(stop - start), + ViewOffset: start, + ChunkSize: chunk.Size, + CipherKey: chunk.CipherKey, + IsGzipped: chunk.IsCompressed, + ModifiedTsNs: chunk.ModifiedTsNs, } - logPrintf(" sorted", newVisibles) - return newVisibles + chunkViews.InsertInterval(start, stop, chunk.ModifiedTsNs, chunkView) } // NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory // If the file chunk content is a chunk manifest -func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles []VisibleInterval, err error) { +func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles *IntervalList[*VisibleInterval], err error) { chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks, startOffset, stopOffset) if err != nil { return } - visibles2 := readResolvedChunks(chunks) - - if true { - return visibles2, err - } - slices.SortFunc(chunks, func(a, b *filer_pb.FileChunk) bool { - if a.ModifiedTsNs == b.ModifiedTsNs { - filer_pb.EnsureFid(a) - filer_pb.EnsureFid(b) - if a.Fid == nil || b.Fid == nil { - return true - } - return a.Fid.FileKey < b.Fid.FileKey - } - return a.ModifiedTsNs < b.ModifiedTsNs - }) - for _, chunk := range chunks { - - // glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size)) - visibles = MergeIntoVisibles(visibles, chunk) - - logPrintf("add", visibles) + visibles2 := readResolvedChunks(chunks, 0, math.MaxInt64) - } - - if len(visibles) != len(visibles2) { - fmt.Printf("different visibles size %d : %d\n", len(visibles), len(visibles2)) - } else { - for i := 0; i < len(visibles); i++ { - checkDifference(visibles[i], visibles2[i]) - } - } - - return -} - -func checkDifference(x, y VisibleInterval) { - if x.start != y.start || - x.stop != y.stop || - x.fileId != y.fileId || - x.modifiedTsNs != y.modifiedTsNs { - fmt.Printf("different visible %+v : %+v\n", x, y) - } + return visibles2, err } // find non-overlapping visible intervals // visible interval map to one file chunk type VisibleInterval struct { - start int64 - stop int64 - modifiedTsNs int64 - fileId string - chunkOffset int64 - chunkSize uint64 - cipherKey []byte - isGzipped bool + start int64 + stop int64 + modifiedTsNs int64 + fileId string + offsetInChunk int64 + chunkSize uint64 + cipherKey []byte + isGzipped bool } -func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval { - return VisibleInterval{ - start: start, - stop: stop, - fileId: fileId, - modifiedTsNs: modifiedTime, - chunkOffset: chunkOffset, // the starting position in the chunk - chunkSize: chunkSize, - cipherKey: cipherKey, - isGzipped: isGzipped, +func (v *VisibleInterval) SetStartStop(start, stop int64) { + v.offsetInChunk += start - v.start + v.start, v.stop = start, stop +} +func (v *VisibleInterval) Clone() IntervalValue { + return &VisibleInterval{ + start: v.start, + stop: v.stop, + modifiedTsNs: v.modifiedTsNs, + fileId: v.fileId, + offsetInChunk: v.offsetInChunk, + chunkSize: v.chunkSize, + cipherKey: v.cipherKey, + isGzipped: v.isGzipped, } } |
