aboutsummaryrefslogtreecommitdiff
path: root/weed/filer/filechunks.go
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2023-01-02 23:20:45 -0800
committerGitHub <noreply@github.com>2023-01-02 23:20:45 -0800
commitd4566d4aaa426b33015780c7cc18f887fc07cca4 (patch)
tree7c3b5cb3d9e54297b9d4213b67408f86149013f7 /weed/filer/filechunks.go
parent367353b936c450906e88e850c7d1e804f97c3560 (diff)
downloadseaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.tar.xz
seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.zip
more solid weed mount (#4089)
* compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
Diffstat (limited to 'weed/filer/filechunks.go')
-rw-r--r--weed/filer/filechunks.go248
1 files changed, 110 insertions, 138 deletions
diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go
index 061e0757a..d872bd22d 100644
--- a/weed/filer/filechunks.go
+++ b/weed/filer/filechunks.go
@@ -4,7 +4,6 @@ import (
"bytes"
"fmt"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
- "golang.org/x/exp/slices"
"math"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@@ -42,7 +41,7 @@ func ETag(entry *filer_pb.Entry) (etag string) {
}
func ETagEntry(entry *Entry) (etag string) {
- if entry.IsInRemoteOnly() {
+ if entry.IsInRemoteOnly() {
return entry.Remote.RemoteETag
}
if entry.Attr.Md5 == nil {
@@ -66,8 +65,15 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, 0, math.MaxInt64)
+ compacted, garbage = SeparateGarbageChunks(visibles, chunks)
+
+ return
+}
+
+func SeparateGarbageChunks(visibles *IntervalList[*VisibleInterval], chunks []*filer_pb.FileChunk) (compacted []*filer_pb.FileChunk, garbage []*filer_pb.FileChunk) {
fileIds := make(map[string]bool)
- for _, interval := range visibles {
+ for x := visibles.Front(); x != nil; x = x.Next {
+ interval := x.Value
fileIds[interval.fileId] = true
}
for _, chunk := range chunks {
@@ -77,8 +83,7 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks
garbage = append(garbage, chunk)
}
}
-
- return
+ return compacted, garbage
}
func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) {
@@ -131,20 +136,39 @@ func DoMinusChunksBySourceFileId(as, bs []*filer_pb.FileChunk) (delta []*filer_p
}
type ChunkView struct {
- FileId string
- Offset int64
- Size uint64
- LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
- ChunkSize uint64
- CipherKey []byte
- IsGzipped bool
+ FileId string
+ OffsetInChunk int64 // offset within the chunk
+ ViewSize uint64
+ ViewOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
+ ChunkSize uint64
+ CipherKey []byte
+ IsGzipped bool
+ ModifiedTsNs int64
+}
+
+func (cv *ChunkView) SetStartStop(start, stop int64) {
+ cv.OffsetInChunk += start - cv.ViewOffset
+ cv.ViewOffset = start
+ cv.ViewSize = uint64(stop - start)
+}
+func (cv *ChunkView) Clone() IntervalValue {
+ return &ChunkView{
+ FileId: cv.FileId,
+ OffsetInChunk: cv.OffsetInChunk,
+ ViewSize: cv.ViewSize,
+ ViewOffset: cv.ViewOffset,
+ ChunkSize: cv.ChunkSize,
+ CipherKey: cv.CipherKey,
+ IsGzipped: cv.IsGzipped,
+ ModifiedTsNs: cv.ModifiedTsNs,
+ }
}
func (cv *ChunkView) IsFullChunk() bool {
- return cv.Size == cv.ChunkSize
+ return cv.ViewSize == cv.ChunkSize
}
-func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) {
+func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) {
visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, offset, offset+size)
@@ -152,7 +176,7 @@ func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*
}
-func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int64) (views []*ChunkView) {
+func ViewFromVisibleIntervals(visibles *IntervalList[*VisibleInterval], offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) {
stop := offset + size
if size == math.MaxInt64 {
@@ -162,164 +186,112 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
stop = math.MaxInt64
}
- for _, chunk := range visibles {
+ chunkViews = NewIntervalList[*ChunkView]()
+ for x := visibles.Front(); x != nil; x = x.Next {
+ chunk := x.Value
chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop)
if chunkStart < chunkStop {
- views = append(views, &ChunkView{
- FileId: chunk.fileId,
- Offset: chunkStart - chunk.start + chunk.chunkOffset,
- Size: uint64(chunkStop - chunkStart),
- LogicOffset: chunkStart,
- ChunkSize: chunk.chunkSize,
- CipherKey: chunk.cipherKey,
- IsGzipped: chunk.isGzipped,
+ chunkView := &ChunkView{
+ FileId: chunk.fileId,
+ OffsetInChunk: chunkStart - chunk.start + chunk.offsetInChunk,
+ ViewSize: uint64(chunkStop - chunkStart),
+ ViewOffset: chunkStart,
+ ChunkSize: chunk.chunkSize,
+ CipherKey: chunk.cipherKey,
+ IsGzipped: chunk.isGzipped,
+ ModifiedTsNs: chunk.modifiedTsNs,
+ }
+ chunkViews.AppendInterval(&Interval[*ChunkView]{
+ StartOffset: chunkStart,
+ StopOffset: chunkStop,
+ TsNs: chunk.modifiedTsNs,
+ Value: chunkView,
+ Prev: nil,
+ Next: nil,
})
}
}
- return views
+ return chunkViews
}
-func logPrintf(name string, visibles []VisibleInterval) {
-
- /*
- glog.V(0).Infof("%s len %d", name, len(visibles))
- for _, v := range visibles {
- glog.V(0).Infof("%s: [%d,%d) %s %d", name, v.start, v.stop, v.fileId, v.chunkOffset)
- }
- */
-}
-
-func MergeIntoVisibles(visibles []VisibleInterval, chunk *filer_pb.FileChunk) (newVisibles []VisibleInterval) {
-
- newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.ModifiedTsNs, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
-
- length := len(visibles)
- if length == 0 {
- return append(visibles, newV)
- }
- last := visibles[length-1]
- if last.stop <= chunk.Offset {
- return append(visibles, newV)
+func MergeIntoVisibles(visibles *IntervalList[*VisibleInterval], start int64, stop int64, chunk *filer_pb.FileChunk) {
+
+ newV := &VisibleInterval{
+ start: start,
+ stop: stop,
+ fileId: chunk.GetFileIdString(),
+ modifiedTsNs: chunk.ModifiedTsNs,
+ offsetInChunk: start - chunk.Offset, // the starting position in the chunk
+ chunkSize: chunk.Size, // size of the chunk
+ cipherKey: chunk.CipherKey,
+ isGzipped: chunk.IsCompressed,
}
- logPrintf(" before", visibles)
- // glog.V(0).Infof("newVisibles %d adding chunk [%d,%d) %s size:%d", len(newVisibles), chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Size)
- chunkStop := chunk.Offset + int64(chunk.Size)
- for _, v := range visibles {
- if v.start < chunk.Offset && chunk.Offset < v.stop {
- t := newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTsNs, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped)
- newVisibles = append(newVisibles, t)
- // glog.V(0).Infof("visible %d [%d,%d) =1> [%d,%d)", i, v.start, v.stop, t.start, t.stop)
- }
- if v.start < chunkStop && chunkStop < v.stop {
- t := newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTsNs, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped)
- newVisibles = append(newVisibles, t)
- // glog.V(0).Infof("visible %d [%d,%d) =2> [%d,%d)", i, v.start, v.stop, t.start, t.stop)
- }
- if chunkStop <= v.start || v.stop <= chunk.Offset {
- newVisibles = append(newVisibles, v)
- // glog.V(0).Infof("visible %d [%d,%d) =3> [%d,%d)", i, v.start, v.stop, v.start, v.stop)
- }
- }
- newVisibles = append(newVisibles, newV)
-
- logPrintf(" append", newVisibles)
+ visibles.InsertInterval(start, stop, chunk.ModifiedTsNs, newV)
+}
- for i := len(newVisibles) - 1; i >= 0; i-- {
- if i > 0 && newV.start < newVisibles[i-1].start {
- newVisibles[i] = newVisibles[i-1]
- } else {
- newVisibles[i] = newV
- break
- }
+func MergeIntoChunkViews(chunkViews *IntervalList[*ChunkView], start int64, stop int64, chunk *filer_pb.FileChunk) {
+
+ chunkView := &ChunkView{
+ FileId: chunk.GetFileIdString(),
+ OffsetInChunk: start - chunk.Offset,
+ ViewSize: uint64(stop - start),
+ ViewOffset: start,
+ ChunkSize: chunk.Size,
+ CipherKey: chunk.CipherKey,
+ IsGzipped: chunk.IsCompressed,
+ ModifiedTsNs: chunk.ModifiedTsNs,
}
- logPrintf(" sorted", newVisibles)
- return newVisibles
+ chunkViews.InsertInterval(start, stop, chunk.ModifiedTsNs, chunkView)
}
// NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory
// If the file chunk content is a chunk manifest
-func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles []VisibleInterval, err error) {
+func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles *IntervalList[*VisibleInterval], err error) {
chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks, startOffset, stopOffset)
if err != nil {
return
}
- visibles2 := readResolvedChunks(chunks)
-
- if true {
- return visibles2, err
- }
- slices.SortFunc(chunks, func(a, b *filer_pb.FileChunk) bool {
- if a.ModifiedTsNs == b.ModifiedTsNs {
- filer_pb.EnsureFid(a)
- filer_pb.EnsureFid(b)
- if a.Fid == nil || b.Fid == nil {
- return true
- }
- return a.Fid.FileKey < b.Fid.FileKey
- }
- return a.ModifiedTsNs < b.ModifiedTsNs
- })
- for _, chunk := range chunks {
-
- // glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
- visibles = MergeIntoVisibles(visibles, chunk)
-
- logPrintf("add", visibles)
+ visibles2 := readResolvedChunks(chunks, 0, math.MaxInt64)
- }
-
- if len(visibles) != len(visibles2) {
- fmt.Printf("different visibles size %d : %d\n", len(visibles), len(visibles2))
- } else {
- for i := 0; i < len(visibles); i++ {
- checkDifference(visibles[i], visibles2[i])
- }
- }
-
- return
-}
-
-func checkDifference(x, y VisibleInterval) {
- if x.start != y.start ||
- x.stop != y.stop ||
- x.fileId != y.fileId ||
- x.modifiedTsNs != y.modifiedTsNs {
- fmt.Printf("different visible %+v : %+v\n", x, y)
- }
+ return visibles2, err
}
// find non-overlapping visible intervals
// visible interval map to one file chunk
type VisibleInterval struct {
- start int64
- stop int64
- modifiedTsNs int64
- fileId string
- chunkOffset int64
- chunkSize uint64
- cipherKey []byte
- isGzipped bool
+ start int64
+ stop int64
+ modifiedTsNs int64
+ fileId string
+ offsetInChunk int64
+ chunkSize uint64
+ cipherKey []byte
+ isGzipped bool
}
-func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
- return VisibleInterval{
- start: start,
- stop: stop,
- fileId: fileId,
- modifiedTsNs: modifiedTime,
- chunkOffset: chunkOffset, // the starting position in the chunk
- chunkSize: chunkSize,
- cipherKey: cipherKey,
- isGzipped: isGzipped,
+func (v *VisibleInterval) SetStartStop(start, stop int64) {
+ v.offsetInChunk += start - v.start
+ v.start, v.stop = start, stop
+}
+func (v *VisibleInterval) Clone() IntervalValue {
+ return &VisibleInterval{
+ start: v.start,
+ stop: v.stop,
+ modifiedTsNs: v.modifiedTsNs,
+ fileId: v.fileId,
+ offsetInChunk: v.offsetInChunk,
+ chunkSize: v.chunkSize,
+ cipherKey: v.cipherKey,
+ isGzipped: v.isGzipped,
}
}