aboutsummaryrefslogtreecommitdiff
path: root/weed/mount/ml/cache_policy.go
diff options
context:
space:
mode:
Diffstat (limited to 'weed/mount/ml/cache_policy.go')
-rw-r--r--weed/mount/ml/cache_policy.go124
1 files changed, 62 insertions, 62 deletions
diff --git a/weed/mount/ml/cache_policy.go b/weed/mount/ml/cache_policy.go
index 256b36dc9..44650a44d 100644
--- a/weed/mount/ml/cache_policy.go
+++ b/weed/mount/ml/cache_policy.go
@@ -17,36 +17,36 @@ type CacheEntry struct {
Pattern AccessPattern // Detected access pattern
FileType MLFileType // Type of ML file
IsHot bool // Whether this is a hot chunk
-
+
// ML-specific metadata
- IsTrainingData bool // Whether this is training data
- IsModel bool // Whether this is a model file
- PredictedReuse float64 // Predicted reuse probability (0.0-1.0)
- EpochRelevance float64 // Relevance for current training epoch
+ IsTrainingData bool // Whether this is training data
+ IsModel bool // Whether this is a model file
+ PredictedReuse float64 // Predicted reuse probability (0.0-1.0)
+ EpochRelevance float64 // Relevance for current training epoch
}
// MLCachePolicy implements ML-aware cache eviction policy
type MLCachePolicy struct {
// Weights for different factors (sum should be 1.0)
accessFrequencyWeight float64 // Weight for access frequency
- recencyWeight float64 // Weight for access recency
+ recencyWeight float64 // Weight for access recency
sizeWeight float64 // Weight for item size
mlWeight float64 // Weight for ML-specific factors
-
+
// ML-specific parameters
- trainingDataBoost float64 // Boost factor for training data
- modelFileBoost float64 // Boost factor for model files
- sequentialBoost float64 // Boost factor for sequential access
- epochRelevanceBoost float64 // Boost factor for epoch-relevant data
-
+ trainingDataBoost float64 // Boost factor for training data
+ modelFileBoost float64 // Boost factor for model files
+ sequentialBoost float64 // Boost factor for sequential access
+ epochRelevanceBoost float64 // Boost factor for epoch-relevant data
+
// Time-based parameters
- hotThreshold time.Duration // Threshold for considering item "hot"
- coldThreshold time.Duration // Threshold for considering item "cold"
-
+ hotThreshold time.Duration // Threshold for considering item "hot"
+ coldThreshold time.Duration // Threshold for considering item "cold"
+
// Size-based parameters
- largeFileThreshold uint64 // Threshold for large files
- smallFilePreference float64 // Preference for keeping small files
-
+ largeFileThreshold uint64 // Threshold for large files
+ smallFilePreference float64 // Preference for keeping small files
+
// Statistics
totalEvictions int64
mlFileEvictions int64
@@ -60,19 +60,19 @@ func NewMLCachePolicy() *MLCachePolicy {
// Balanced weights
accessFrequencyWeight: 0.3,
recencyWeight: 0.3,
- sizeWeight: 0.2,
- mlWeight: 0.2,
-
+ sizeWeight: 0.2,
+ mlWeight: 0.2,
+
// ML-specific boosts
trainingDataBoost: 1.5, // 50% boost for training data
modelFileBoost: 2.0, // 100% boost for model files
sequentialBoost: 1.3, // 30% boost for sequential access
epochRelevanceBoost: 1.4, // 40% boost for epoch-relevant data
-
+
// Time thresholds
hotThreshold: 1 * time.Minute,
coldThreshold: 10 * time.Minute,
-
+
// Size parameters
largeFileThreshold: 10 * 1024 * 1024, // 10MB
smallFilePreference: 1.2, // 20% preference for small files
@@ -84,32 +84,32 @@ func NewMLCachePolicy() *MLCachePolicy {
func (policy *MLCachePolicy) CalculateEvictionScore(entry *CacheEntry) float64 {
now := time.Now()
timeSinceAccess := now.Sub(entry.LastAccess)
-
+
// Base factors
accessFrequencyScore := policy.calculateAccessFrequencyScore(entry)
recencyScore := policy.calculateRecencyScore(timeSinceAccess)
sizeScore := policy.calculateSizeScore(entry.Size)
mlScore := policy.calculateMLScore(entry)
-
+
// Weighted combination
totalScore := policy.accessFrequencyWeight*accessFrequencyScore +
policy.recencyWeight*recencyScore +
policy.sizeWeight*sizeScore +
policy.mlWeight*mlScore
-
- glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)",
+
+ glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)",
entry.Inode, totalScore, accessFrequencyScore, recencyScore, sizeScore, mlScore)
-
+
return totalScore
}
// ShouldEvict determines if a cache entry should be evicted
func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
score := policy.CalculateEvictionScore(entry)
-
+
// Different thresholds based on ML file type
threshold := 0.3 // Default threshold
-
+
switch entry.FileType {
case MLFileModel:
threshold = 0.1 // Very low threshold - keep models cached longer
@@ -126,9 +126,9 @@ func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
default:
threshold = 0.3 // Default for unknown files
}
-
+
shouldEvict := score < threshold
-
+
if shouldEvict {
policy.totalEvictions++
if entry.IsTrainingData {
@@ -140,11 +140,11 @@ func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
if entry.FileType != MLFileUnknown {
policy.mlFileEvictions++
}
-
- glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v",
+
+ glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v",
entry.Inode, score, threshold, entry.FileType)
}
-
+
return shouldEvict
}
@@ -153,10 +153,10 @@ func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) fl
if entry.AccessCount == 0 {
return 0.0
}
-
+
// Logarithmic scaling for access count
base := math.Log(float64(entry.AccessCount) + 1)
-
+
// Apply ML-specific boosts
boost := 1.0
if entry.IsTrainingData {
@@ -171,7 +171,7 @@ func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) fl
if entry.EpochRelevance > 0.5 {
boost *= policy.epochRelevanceBoost
}
-
+
return base * boost
}
@@ -180,11 +180,11 @@ func (policy *MLCachePolicy) calculateRecencyScore(timeSinceAccess time.Duration
if timeSinceAccess <= policy.hotThreshold {
return 1.0 // Very recent access
}
-
+
if timeSinceAccess >= policy.coldThreshold {
return 0.1 // Very old access
}
-
+
// Linear decay between hot and cold thresholds
ratio := float64(timeSinceAccess-policy.hotThreshold) / float64(policy.coldThreshold-policy.hotThreshold)
return 1.0 - ratio*0.9 // Decay from 1.0 to 0.1
@@ -196,7 +196,7 @@ func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 {
// Prefer keeping smaller files (higher score)
return policy.smallFilePreference
}
-
+
// Larger files get lower score (more likely to be evicted)
// But not too low since they might be important model files
ratio := float64(size) / float64(policy.largeFileThreshold)
@@ -206,7 +206,7 @@ func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 {
// calculateMLScore calculates ML-specific factors
func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
score := 0.5 // Base score for non-ML files
-
+
// File type bonuses
switch entry.FileType {
case MLFileModel:
@@ -222,7 +222,7 @@ func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
default:
score = 0.5 // Default for unknown files
}
-
+
// Access pattern bonuses
switch entry.Pattern {
case SequentialAccess:
@@ -234,22 +234,22 @@ func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
case BatchAccess:
score *= 1.1 // Small boost for batch access
}
-
+
// Predicted reuse bonus
if entry.PredictedReuse > 0.7 {
score *= 1.2 // Boost for high predicted reuse
}
-
+
// Epoch relevance bonus
if entry.EpochRelevance > 0.5 {
score *= (1.0 + entry.EpochRelevance*0.3) // Up to 30% boost for epoch relevance
}
-
+
// Hot chunk bonus
if entry.IsHot {
score *= 1.1
}
-
+
return score
}
@@ -260,27 +260,27 @@ func (policy *MLCachePolicy) GetEvictionMetrics() MLCachePolicyMetrics {
MLFileEvictions: policy.mlFileEvictions,
TrainingDataEvictions: policy.trainingDataEvictions,
ModelFileEvictions: policy.modelFileEvictions,
-
+
// Configuration
AccessFrequencyWeight: policy.accessFrequencyWeight,
RecencyWeight: policy.recencyWeight,
- SizeWeight: policy.sizeWeight,
- MLWeight: policy.mlWeight,
+ SizeWeight: policy.sizeWeight,
+ MLWeight: policy.mlWeight,
}
}
// MLCachePolicyMetrics holds metrics for the ML cache policy
type MLCachePolicyMetrics struct {
- TotalEvictions int64 `json:"total_evictions"`
- MLFileEvictions int64 `json:"ml_file_evictions"`
- TrainingDataEvictions int64 `json:"training_data_evictions"`
- ModelFileEvictions int64 `json:"model_file_evictions"`
-
+ TotalEvictions int64 `json:"total_evictions"`
+ MLFileEvictions int64 `json:"ml_file_evictions"`
+ TrainingDataEvictions int64 `json:"training_data_evictions"`
+ ModelFileEvictions int64 `json:"model_file_evictions"`
+
// Configuration weights
AccessFrequencyWeight float64 `json:"access_frequency_weight"`
RecencyWeight float64 `json:"recency_weight"`
- SizeWeight float64 `json:"size_weight"`
- MLWeight float64 `json:"ml_weight"`
+ SizeWeight float64 `json:"size_weight"`
+ MLWeight float64 `json:"ml_weight"`
}
// SetWeights updates the eviction policy weights
@@ -290,14 +290,14 @@ func (policy *MLCachePolicy) SetWeights(frequency, recency, size, ml float64) {
glog.Warningf("Invalid weights provided, using defaults")
return
}
-
+
// Normalize weights to sum to 1.0
policy.accessFrequencyWeight = frequency / total
policy.recencyWeight = recency / total
policy.sizeWeight = size / total
policy.mlWeight = ml / total
-
- glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f",
+
+ glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f",
policy.accessFrequencyWeight, policy.recencyWeight, policy.sizeWeight, policy.mlWeight)
}
@@ -307,7 +307,7 @@ func (policy *MLCachePolicy) SetMLBoosts(trainingData, model, sequential, epochR
policy.modelFileBoost = model
policy.sequentialBoost = sequential
policy.epochRelevanceBoost = epochRelevance
-
- glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f",
+
+ glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f",
trainingData, model, sequential, epochRelevance)
}