more solid weed mount (#4089)

* compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
author: Chris Lu <chrislusf@users.noreply.github.com> 2023-01-02 23:20:45 -0800
committer: GitHub <noreply@github.com> 2023-01-02 23:20:45 -0800
commit: d4566d4aaa426b33015780c7cc18f887fc07cca4 (patch)
tree: 7c3b5cb3d9e54297b9d4213b67408f86149013f7 /weed/filer/filechunks.go
parent: 367353b936c450906e88e850c7d1e804f97c3560 (diff)
download: seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.tar.xz
seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.zip
1 files changed, 110 insertions, 138 deletions
diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go
index 061e0757a..d872bd22d 100644
--- a/weed/filer/filechunks.go
+++ b/weed/filer/filechunks.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"fmt"
 	"github.com/seaweedfs/seaweedfs/weed/wdclient"
-	"golang.org/x/exp/slices"
 	"math"
 
 	"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
@@ -42,7 +41,7 @@ func ETag(entry *filer_pb.Entry) (etag string) {
 }
 
 func ETagEntry(entry *Entry) (etag string) {
-        if entry.IsInRemoteOnly() {
+	if entry.IsInRemoteOnly() {
 		return entry.Remote.RemoteETag
 	}
 	if entry.Attr.Md5 == nil {
@@ -66,8 +65,15 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks
 
 	visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, 0, math.MaxInt64)
 
+	compacted, garbage = SeparateGarbageChunks(visibles, chunks)
+
+	return
+}
+
+func SeparateGarbageChunks(visibles *IntervalList[*VisibleInterval], chunks []*filer_pb.FileChunk) (compacted []*filer_pb.FileChunk, garbage []*filer_pb.FileChunk) {
 	fileIds := make(map[string]bool)
-	for _, interval := range visibles {
+	for x := visibles.Front(); x != nil; x = x.Next {
+		interval := x.Value
 		fileIds[interval.fileId] = true
 	}
 	for _, chunk := range chunks {
@@ -77,8 +83,7 @@ func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks
 			garbage = append(garbage, chunk)
 		}
 	}
-
-	return
+	return compacted, garbage
 }
 
 func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) {
@@ -131,20 +136,39 @@ func DoMinusChunksBySourceFileId(as, bs []*filer_pb.FileChunk) (delta []*filer_p
 }
 
 type ChunkView struct {
-	FileId      string
-	Offset      int64
-	Size        uint64
-	LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
-	ChunkSize   uint64
-	CipherKey   []byte
-	IsGzipped   bool
+	FileId        string
+	OffsetInChunk int64 // offset within the chunk
+	ViewSize      uint64
+	ViewOffset    int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk
+	ChunkSize     uint64
+	CipherKey     []byte
+	IsGzipped     bool
+	ModifiedTsNs  int64
+}
+
+func (cv *ChunkView) SetStartStop(start, stop int64) {
+	cv.OffsetInChunk += start - cv.ViewOffset
+	cv.ViewOffset = start
+	cv.ViewSize = uint64(stop - start)
+}
+func (cv *ChunkView) Clone() IntervalValue {
+	return &ChunkView{
+		FileId:        cv.FileId,
+		OffsetInChunk: cv.OffsetInChunk,
+		ViewSize:      cv.ViewSize,
+		ViewOffset:    cv.ViewOffset,
+		ChunkSize:     cv.ChunkSize,
+		CipherKey:     cv.CipherKey,
+		IsGzipped:     cv.IsGzipped,
+		ModifiedTsNs:  cv.ModifiedTsNs,
+	}
 }
 
 func (cv *ChunkView) IsFullChunk() bool {
-	return cv.Size == cv.ChunkSize
+	return cv.ViewSize == cv.ChunkSize
 }
 
-func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) {
+func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) {
 
 	visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks, offset, offset+size)
 
@@ -152,7 +176,7 @@ func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*
 
 }
 
-func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int64) (views []*ChunkView) {
+func ViewFromVisibleIntervals(visibles *IntervalList[*VisibleInterval], offset int64, size int64) (chunkViews *IntervalList[*ChunkView]) {
 
 	stop := offset + size
 	if size == math.MaxInt64 {
@@ -162,164 +186,112 @@ func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int
 		stop = math.MaxInt64
 	}
 
-	for _, chunk := range visibles {
+	chunkViews = NewIntervalList[*ChunkView]()
+	for x := visibles.Front(); x != nil; x = x.Next {
+		chunk := x.Value
 
 		chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop)
 
 		if chunkStart < chunkStop {
-			views = append(views, &ChunkView{
-				FileId:      chunk.fileId,
-				Offset:      chunkStart - chunk.start + chunk.chunkOffset,
-				Size:        uint64(chunkStop - chunkStart),
-				LogicOffset: chunkStart,
-				ChunkSize:   chunk.chunkSize,
-				CipherKey:   chunk.cipherKey,
-				IsGzipped:   chunk.isGzipped,
+			chunkView := &ChunkView{
+				FileId:        chunk.fileId,
+				OffsetInChunk: chunkStart - chunk.start + chunk.offsetInChunk,
+				ViewSize:      uint64(chunkStop - chunkStart),
+				ViewOffset:    chunkStart,
+				ChunkSize:     chunk.chunkSize,
+				CipherKey:     chunk.cipherKey,
+				IsGzipped:     chunk.isGzipped,
+				ModifiedTsNs:  chunk.modifiedTsNs,
+			}
+			chunkViews.AppendInterval(&Interval[*ChunkView]{
+				StartOffset: chunkStart,
+				StopOffset:  chunkStop,
+				TsNs:        chunk.modifiedTsNs,
+				Value:       chunkView,
+				Prev:        nil,
+				Next:        nil,
 			})
 		}
 	}
 
-	return views
+	return chunkViews
 
 }
 
-func logPrintf(name string, visibles []VisibleInterval) {
-
-	/*
-		glog.V(0).Infof("%s len %d", name, len(visibles))
-		for _, v := range visibles {
-			glog.V(0).Infof("%s:  [%d,%d) %s %d", name, v.start, v.stop, v.fileId, v.chunkOffset)
-		}
-	*/
-}
-
-func MergeIntoVisibles(visibles []VisibleInterval, chunk *filer_pb.FileChunk) (newVisibles []VisibleInterval) {
-
-	newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.ModifiedTsNs, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed)
-
-	length := len(visibles)
-	if length == 0 {
-		return append(visibles, newV)
-	}
-	last := visibles[length-1]
-	if last.stop <= chunk.Offset {
-		return append(visibles, newV)
+func MergeIntoVisibles(visibles *IntervalList[*VisibleInterval], start int64, stop int64, chunk *filer_pb.FileChunk) {
+
+	newV := &VisibleInterval{
+		start:         start,
+		stop:          stop,
+		fileId:        chunk.GetFileIdString(),
+		modifiedTsNs:  chunk.ModifiedTsNs,
+		offsetInChunk: start - chunk.Offset, // the starting position in the chunk
+		chunkSize:     chunk.Size,           // size of the chunk
+		cipherKey:     chunk.CipherKey,
+		isGzipped:     chunk.IsCompressed,
 	}
 
-	logPrintf("  before", visibles)
-	// glog.V(0).Infof("newVisibles %d adding chunk [%d,%d) %s size:%d", len(newVisibles), chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Size)
-	chunkStop := chunk.Offset + int64(chunk.Size)
-	for _, v := range visibles {
-		if v.start < chunk.Offset && chunk.Offset < v.stop {
-			t := newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTsNs, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped)
-			newVisibles = append(newVisibles, t)
-			// glog.V(0).Infof("visible %d [%d,%d) =1> [%d,%d)", i, v.start, v.stop, t.start, t.stop)
-		}
-		if v.start < chunkStop && chunkStop < v.stop {
-			t := newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTsNs, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped)
-			newVisibles = append(newVisibles, t)
-			// glog.V(0).Infof("visible %d [%d,%d) =2> [%d,%d)", i, v.start, v.stop, t.start, t.stop)
-		}
-		if chunkStop <= v.start || v.stop <= chunk.Offset {
-			newVisibles = append(newVisibles, v)
-			// glog.V(0).Infof("visible %d [%d,%d) =3> [%d,%d)", i, v.start, v.stop, v.start, v.stop)
-		}
-	}
-	newVisibles = append(newVisibles, newV)
-
-	logPrintf("  append", newVisibles)
+	visibles.InsertInterval(start, stop, chunk.ModifiedTsNs, newV)
+}
 
-	for i := len(newVisibles) - 1; i >= 0; i-- {
-		if i > 0 && newV.start < newVisibles[i-1].start {
-			newVisibles[i] = newVisibles[i-1]
-		} else {
-			newVisibles[i] = newV
-			break
-		}
+func MergeIntoChunkViews(chunkViews *IntervalList[*ChunkView], start int64, stop int64, chunk *filer_pb.FileChunk) {
+
+	chunkView := &ChunkView{
+		FileId:        chunk.GetFileIdString(),
+		OffsetInChunk: start - chunk.Offset,
+		ViewSize:      uint64(stop - start),
+		ViewOffset:    start,
+		ChunkSize:     chunk.Size,
+		CipherKey:     chunk.CipherKey,
+		IsGzipped:     chunk.IsCompressed,
+		ModifiedTsNs:  chunk.ModifiedTsNs,
 	}
-	logPrintf("  sorted", newVisibles)
 
-	return newVisibles
+	chunkViews.InsertInterval(start, stop, chunk.ModifiedTsNs, chunkView)
 }
 
 // NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory
 // If the file chunk content is a chunk manifest
-func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles []VisibleInterval, err error) {
+func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset int64, stopOffset int64) (visibles *IntervalList[*VisibleInterval], err error) {
 
 	chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks, startOffset, stopOffset)
 	if err != nil {
 		return
 	}
 
-	visibles2 := readResolvedChunks(chunks)
-
-	if true {
-		return visibles2, err
-	}
-	slices.SortFunc(chunks, func(a, b *filer_pb.FileChunk) bool {
-		if a.ModifiedTsNs == b.ModifiedTsNs {
-			filer_pb.EnsureFid(a)
-			filer_pb.EnsureFid(b)
-			if a.Fid == nil || b.Fid == nil {
-				return true
-			}
-			return a.Fid.FileKey < b.Fid.FileKey
-		}
-		return a.ModifiedTsNs < b.ModifiedTsNs
-	})
-	for _, chunk := range chunks {
-
-		// glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size))
-		visibles = MergeIntoVisibles(visibles, chunk)
-
-		logPrintf("add", visibles)
+	visibles2 := readResolvedChunks(chunks, 0, math.MaxInt64)
 
-	}
-
-	if len(visibles) != len(visibles2) {
-		fmt.Printf("different visibles size %d : %d\n", len(visibles), len(visibles2))
-	} else {
-		for i := 0; i < len(visibles); i++ {
-			checkDifference(visibles[i], visibles2[i])
-		}
-	}
-
-	return
-}
-
-func checkDifference(x, y VisibleInterval) {
-	if x.start != y.start ||
-		x.stop != y.stop ||
-		x.fileId != y.fileId ||
-		x.modifiedTsNs != y.modifiedTsNs {
-		fmt.Printf("different visible %+v : %+v\n", x, y)
-	}
+	return visibles2, err
 }
 
 // find non-overlapping visible intervals
 // visible interval map to one file chunk
 
 type VisibleInterval struct {
-	start        int64
-	stop         int64
-	modifiedTsNs int64
-	fileId       string
-	chunkOffset  int64
-	chunkSize    uint64
-	cipherKey    []byte
-	isGzipped    bool
+	start         int64
+	stop          int64
+	modifiedTsNs  int64
+	fileId        string
+	offsetInChunk int64
+	chunkSize     uint64
+	cipherKey     []byte
+	isGzipped     bool
 }
 
-func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval {
-	return VisibleInterval{
-		start:        start,
-		stop:         stop,
-		fileId:       fileId,
-		modifiedTsNs: modifiedTime,
-		chunkOffset:  chunkOffset, // the starting position in the chunk
-		chunkSize:    chunkSize,
-		cipherKey:    cipherKey,
-		isGzipped:    isGzipped,
+func (v *VisibleInterval) SetStartStop(start, stop int64) {
+	v.offsetInChunk += start - v.start
+	v.start, v.stop = start, stop
+}
+func (v *VisibleInterval) Clone() IntervalValue {
+	return &VisibleInterval{
+		start:         v.start,
+		stop:          v.stop,
+		modifiedTsNs:  v.modifiedTsNs,
+		fileId:        v.fileId,
+		offsetInChunk: v.offsetInChunk,
+		chunkSize:     v.chunkSize,
+		cipherKey:     v.cipherKey,
+		isGzipped:     v.isGzipped,
 	}
 }
author	Chris Lu <chrislusf@users.noreply.github.com>	2023-01-02 23:20:45 -0800
committer	GitHub <noreply@github.com>	2023-01-02 23:20:45 -0800
commit	d4566d4aaa426b33015780c7cc18f887fc07cca4 (patch)
tree	7c3b5cb3d9e54297b9d4213b67408f86149013f7 /weed/filer/filechunks.go
parent	367353b936c450906e88e850c7d1e804f97c3560 (diff)
download	seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.tar.xz seaweedfs-d4566d4aaa426b33015780c7cc18f887fc07cca4.zip