aboutsummaryrefslogtreecommitdiff
path: root/weed/filer
diff options
context:
space:
mode:
authorEugeniy E. Mikhailov <evgmik@gmail.com>2024-09-12 00:09:20 -0400
committerGitHub <noreply@github.com>2024-09-11 21:09:20 -0700
commitdab0bb809766fd24fe243ff9abb2ff94c8076e41 (patch)
treed18066093fbddabb613a226e25e08b2657f13f1c /weed/filer
parent151f2ff7a9f551e713ff9894348a9bac138d5247 (diff)
downloadseaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.tar.xz
seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.zip
Feature limit caching to prescribed number of bytes per file (#6009)
* feature: we can check if a fileId is already in the cache We using this to protect cache from adding the same needle to the cache over and over. * fuse mount: Do not start dowloader if needle is already in the cache * added maxFilePartSizeInCache property to ChunkCache If file very large only first maxFilePartSizeInCache bytes are going to be put to the cache (subject to the needle size constrains). * feature: for large files put in cache no more than prescribed number of bytes Before this patch only the first needle of a large file was intended for caching. This patch uses maximum prescribed amount of bytes to be put in cache. This allows to bypass default 2MB maximum for a file part stored in the cache. * added dummy mock methods to satisfy interfaces of ChunkCache
Diffstat (limited to 'weed/filer')
-rw-r--r--weed/filer/reader_at.go2
-rw-r--r--weed/filer/reader_at_test.go8
-rw-r--r--weed/filer/reader_cache.go7
3 files changed, 15 insertions, 2 deletions
diff --git a/weed/filer/reader_at.go b/weed/filer/reader_at.go
index d475e6e11..b70942edc 100644
--- a/weed/filer/reader_at.go
+++ b/weed/filer/reader_at.go
@@ -199,7 +199,7 @@ func (c *ChunkReadAt) readChunkSliceAt(buffer []byte, chunkView *ChunkView, next
return fetchChunkRange(buffer, c.readerCache.lookupFileIdFn, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset))
}
- n, err = c.readerCache.ReadChunkAt(buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), chunkView.ViewOffset == 0)
+ n, err = c.readerCache.ReadChunkAt(buffer, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int64(offset), int(chunkView.ChunkSize), (uint64(chunkView.ViewOffset)+chunkView.ChunkSize) <= c.readerCache.chunkCache.GetMaxFilePartSizeInCache())
if c.lastChunkFid != chunkView.FileId {
if chunkView.OffsetInChunk == 0 { // start of a new chunk
if c.lastChunkFid != "" {
diff --git a/weed/filer/reader_at_test.go b/weed/filer/reader_at_test.go
index 8bc383184..0d95d1aad 100644
--- a/weed/filer/reader_at_test.go
+++ b/weed/filer/reader_at_test.go
@@ -31,6 +31,14 @@ func (m *mockChunkCache) ReadChunkAt(data []byte, fileId string, offset uint64)
func (m *mockChunkCache) SetChunk(fileId string, data []byte) {
}
+func (m *mockChunkCache) GetMaxFilePartSizeInCache() (uint64) {
+ return 0
+}
+
+func (m *mockChunkCache) IsInCache(fileId string, lockNeeded bool) (answer bool) {
+ return false
+}
+
func TestReaderAt(t *testing.T) {
visibles := NewIntervalList[*VisibleInterval]()
diff --git a/weed/filer/reader_cache.go b/weed/filer/reader_cache.go
index a3df8e0bd..716e796c9 100644
--- a/weed/filer/reader_cache.go
+++ b/weed/filer/reader_cache.go
@@ -6,6 +6,7 @@ import (
"sync/atomic"
"time"
+ "github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/util/mem"
@@ -61,6 +62,10 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView]) {
if _, found := rc.downloaders[chunkView.FileId]; found {
continue
}
+ if rc.chunkCache.IsInCache(chunkView.FileId, true) {
+ glog.V(4).Infof("%s is in cache", chunkView.FileId)
+ continue
+ }
if len(rc.downloaders) >= rc.limit {
// abort when slots are filled
@@ -69,7 +74,7 @@ func (rc *ReaderCache) MaybeCache(chunkViews *Interval[*ChunkView]) {
// glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.ViewOffset)
// cache this chunk if not yet
- cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), chunkView.ViewOffset == 0)
+ cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), (uint64(chunkView.ViewOffset)+chunkView.ChunkSize) <= rc.chunkCache.GetMaxFilePartSizeInCache())
go cacher.startCaching()
<-cacher.cacheStartedCh
rc.downloaders[chunkView.FileId] = cacher