Feature limit caching to prescribed number of bytes per file (#6009)

* feature: we can check if a fileId is already in the cache We using this to protect cache from adding the same needle to the cache over and over. * fuse mount: Do not start dowloader if needle is already in the cache * added maxFilePartSizeInCache property to ChunkCache If file very large only first maxFilePartSizeInCache bytes are going to be put to the cache (subject to the needle size constrains). * feature: for large files put in cache no more than prescribed number of bytes Before this patch only the first needle of a large file was intended for caching. This patch uses maximum prescribed amount of bytes to be put in cache. This allows to bypass default 2MB maximum for a file part stored in the cache. * added dummy mock methods to satisfy interfaces of ChunkCache
author: Eugeniy E. Mikhailov <evgmik@gmail.com> 2024-09-12 00:09:20 -0400
committer: GitHub <noreply@github.com> 2024-09-11 21:09:20 -0700
commit: dab0bb809766fd24fe243ff9abb2ff94c8076e41 (patch)
tree: d18066093fbddabb613a226e25e08b2657f13f1c /weed/util/chunk_cache/chunk_cache.go
parent: 151f2ff7a9f551e713ff9894348a9bac138d5247 (diff)
download: seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.tar.xz
seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.zip
1 files changed, 46 insertions, 0 deletions
diff --git a/weed/util/chunk_cache/chunk_cache.go b/weed/util/chunk_cache/chunk_cache.go
index 866455a24..37dde1950 100644
--- a/weed/util/chunk_cache/chunk_cache.go
+++ b/weed/util/chunk_cache/chunk_cache.go
@@ -13,6 +13,8 @@ var ErrorOutOfBounds = errors.New("attempt to read out of bounds")
 type ChunkCache interface {
 	ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error)
 	SetChunk(fileId string, data []byte)
+	IsInCache(fileId string, lockNeeded bool) (answer bool)
+	GetMaxFilePartSizeInCache() (answer uint64)
 }
 
 // a global cache for recently accessed file chunks
@@ -23,6 +25,7 @@ type TieredChunkCache struct {
 	onDiskCacheSizeLimit0 uint64
 	onDiskCacheSizeLimit1 uint64
 	onDiskCacheSizeLimit2 uint64
+	maxFilePartSizeInCache uint64
 }
 
 var _ ChunkCache = &TieredChunkCache{}
@@ -39,10 +42,49 @@ func NewTieredChunkCache(maxEntries int64, dir string, diskSizeInUnit int64, uni
 	c.diskCaches[0] = NewOnDiskCacheLayer(dir, "c0_2", diskSizeInUnit*unitSize/8, 2)
 	c.diskCaches[1] = NewOnDiskCacheLayer(dir, "c1_3", diskSizeInUnit*unitSize/4+diskSizeInUnit*unitSize/8, 3)
 	c.diskCaches[2] = NewOnDiskCacheLayer(dir, "c2_2", diskSizeInUnit*unitSize/2, 2)
+	c.maxFilePartSizeInCache = uint64(unitSize*diskSizeInUnit)/4
 
 	return c
 }
 
+func (c *TieredChunkCache) GetMaxFilePartSizeInCache() (answer uint64) {
+	return c.maxFilePartSizeInCache
+}
+
+func (c *TieredChunkCache) IsInCache(fileId string, lockNeeded bool) (answer bool) {
+	if c == nil {
+		return false
+	}
+
+	if lockNeeded {
+		c.RLock()
+		defer c.RUnlock()
+	}
+
+	item := c.memCache.cache.Get(fileId)
+	if item != nil {
+		glog.V(4).Infof("fileId %s is in memcache", fileId)
+		return true
+	}
+
+	fid, err := needle.ParseFileIdFromString(fileId)
+	if err != nil {
+		glog.V(4).Infof("failed to parse file id %s", fileId)
+		return false
+	}
+
+	for i, diskCacheLayer := range c.diskCaches {
+		for k, v := range diskCacheLayer.diskCaches {
+			_, ok := v.nm.Get(fid.Key)
+			if ok {
+				glog.V(4).Infof("fileId %s is in diskCaches[%d].volume[%d]", fileId, i, k)
+				return true
+			}
+		}
+	}
+	return false
+}
+
 func (c *TieredChunkCache) ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error) {
 	if c == nil {
 		return 0, nil
@@ -99,6 +141,10 @@ func (c *TieredChunkCache) SetChunk(fileId string, data []byte) {
 	defer c.Unlock()
 
 	glog.V(4).Infof("SetChunk %s size %d\n", fileId, len(data))
+	if c.IsInCache(fileId, false) {
+		glog.V(4).Infof("fileId %s is already in cache", fileId)
+		return
+	}
 
 	c.doSetChunk(fileId, data)
 }
author	Eugeniy E. Mikhailov <evgmik@gmail.com>	2024-09-12 00:09:20 -0400
committer	GitHub <noreply@github.com>	2024-09-11 21:09:20 -0700
commit	dab0bb809766fd24fe243ff9abb2ff94c8076e41 (patch)
tree	d18066093fbddabb613a226e25e08b2657f13f1c /weed/util/chunk_cache/chunk_cache.go
parent	151f2ff7a9f551e713ff9894348a9bac138d5247 (diff)
download	seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.tar.xz seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.zip