diff options
| author | Eugeniy E. Mikhailov <evgmik@gmail.com> | 2024-09-12 00:09:20 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-11 21:09:20 -0700 |
| commit | dab0bb809766fd24fe243ff9abb2ff94c8076e41 (patch) | |
| tree | d18066093fbddabb613a226e25e08b2657f13f1c /weed/util/chunk_cache/chunk_cache.go | |
| parent | 151f2ff7a9f551e713ff9894348a9bac138d5247 (diff) | |
| download | seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.tar.xz seaweedfs-dab0bb809766fd24fe243ff9abb2ff94c8076e41.zip | |
Feature limit caching to prescribed number of bytes per file (#6009)
* feature: we can check if a fileId is already in the cache
We using this to protect cache from adding the same needle to
the cache over and over.
* fuse mount: Do not start dowloader if needle is already in the cache
* added maxFilePartSizeInCache property to ChunkCache
If file very large only first maxFilePartSizeInCache bytes
are going to be put to the cache (subject to the needle size
constrains).
* feature: for large files put in cache no more than prescribed number of bytes
Before this patch only the first needle of a large file was intended for
caching. This patch uses maximum prescribed amount of bytes to be put in
cache. This allows to bypass default 2MB maximum for a file part stored
in the cache.
* added dummy mock methods to satisfy interfaces of ChunkCache
Diffstat (limited to 'weed/util/chunk_cache/chunk_cache.go')
| -rw-r--r-- | weed/util/chunk_cache/chunk_cache.go | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/weed/util/chunk_cache/chunk_cache.go b/weed/util/chunk_cache/chunk_cache.go index 866455a24..37dde1950 100644 --- a/weed/util/chunk_cache/chunk_cache.go +++ b/weed/util/chunk_cache/chunk_cache.go @@ -13,6 +13,8 @@ var ErrorOutOfBounds = errors.New("attempt to read out of bounds") type ChunkCache interface { ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error) SetChunk(fileId string, data []byte) + IsInCache(fileId string, lockNeeded bool) (answer bool) + GetMaxFilePartSizeInCache() (answer uint64) } // a global cache for recently accessed file chunks @@ -23,6 +25,7 @@ type TieredChunkCache struct { onDiskCacheSizeLimit0 uint64 onDiskCacheSizeLimit1 uint64 onDiskCacheSizeLimit2 uint64 + maxFilePartSizeInCache uint64 } var _ ChunkCache = &TieredChunkCache{} @@ -39,10 +42,49 @@ func NewTieredChunkCache(maxEntries int64, dir string, diskSizeInUnit int64, uni c.diskCaches[0] = NewOnDiskCacheLayer(dir, "c0_2", diskSizeInUnit*unitSize/8, 2) c.diskCaches[1] = NewOnDiskCacheLayer(dir, "c1_3", diskSizeInUnit*unitSize/4+diskSizeInUnit*unitSize/8, 3) c.diskCaches[2] = NewOnDiskCacheLayer(dir, "c2_2", diskSizeInUnit*unitSize/2, 2) + c.maxFilePartSizeInCache = uint64(unitSize*diskSizeInUnit)/4 return c } +func (c *TieredChunkCache) GetMaxFilePartSizeInCache() (answer uint64) { + return c.maxFilePartSizeInCache +} + +func (c *TieredChunkCache) IsInCache(fileId string, lockNeeded bool) (answer bool) { + if c == nil { + return false + } + + if lockNeeded { + c.RLock() + defer c.RUnlock() + } + + item := c.memCache.cache.Get(fileId) + if item != nil { + glog.V(4).Infof("fileId %s is in memcache", fileId) + return true + } + + fid, err := needle.ParseFileIdFromString(fileId) + if err != nil { + glog.V(4).Infof("failed to parse file id %s", fileId) + return false + } + + for i, diskCacheLayer := range c.diskCaches { + for k, v := range diskCacheLayer.diskCaches { + _, ok := v.nm.Get(fid.Key) + if ok { + glog.V(4).Infof("fileId %s is in diskCaches[%d].volume[%d]", fileId, i, k) + return true + } + } + } + return false +} + func (c *TieredChunkCache) ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error) { if c == nil { return 0, nil @@ -99,6 +141,10 @@ func (c *TieredChunkCache) SetChunk(fileId string, data []byte) { defer c.Unlock() glog.V(4).Infof("SetChunk %s size %d\n", fileId, len(data)) + if c.IsInCache(fileId, false) { + glog.V(4).Infof("fileId %s is already in cache", fileId) + return + } c.doSetChunk(fileId, data) } |
