diff options
Diffstat (limited to 'weed/mount/ml/cache_policy.go')
| -rw-r--r-- | weed/mount/ml/cache_policy.go | 124 |
1 files changed, 62 insertions, 62 deletions
diff --git a/weed/mount/ml/cache_policy.go b/weed/mount/ml/cache_policy.go index 256b36dc9..44650a44d 100644 --- a/weed/mount/ml/cache_policy.go +++ b/weed/mount/ml/cache_policy.go @@ -17,36 +17,36 @@ type CacheEntry struct { Pattern AccessPattern // Detected access pattern FileType MLFileType // Type of ML file IsHot bool // Whether this is a hot chunk - + // ML-specific metadata - IsTrainingData bool // Whether this is training data - IsModel bool // Whether this is a model file - PredictedReuse float64 // Predicted reuse probability (0.0-1.0) - EpochRelevance float64 // Relevance for current training epoch + IsTrainingData bool // Whether this is training data + IsModel bool // Whether this is a model file + PredictedReuse float64 // Predicted reuse probability (0.0-1.0) + EpochRelevance float64 // Relevance for current training epoch } // MLCachePolicy implements ML-aware cache eviction policy type MLCachePolicy struct { // Weights for different factors (sum should be 1.0) accessFrequencyWeight float64 // Weight for access frequency - recencyWeight float64 // Weight for access recency + recencyWeight float64 // Weight for access recency sizeWeight float64 // Weight for item size mlWeight float64 // Weight for ML-specific factors - + // ML-specific parameters - trainingDataBoost float64 // Boost factor for training data - modelFileBoost float64 // Boost factor for model files - sequentialBoost float64 // Boost factor for sequential access - epochRelevanceBoost float64 // Boost factor for epoch-relevant data - + trainingDataBoost float64 // Boost factor for training data + modelFileBoost float64 // Boost factor for model files + sequentialBoost float64 // Boost factor for sequential access + epochRelevanceBoost float64 // Boost factor for epoch-relevant data + // Time-based parameters - hotThreshold time.Duration // Threshold for considering item "hot" - coldThreshold time.Duration // Threshold for considering item "cold" - + hotThreshold time.Duration // Threshold for considering item "hot" + coldThreshold time.Duration // Threshold for considering item "cold" + // Size-based parameters - largeFileThreshold uint64 // Threshold for large files - smallFilePreference float64 // Preference for keeping small files - + largeFileThreshold uint64 // Threshold for large files + smallFilePreference float64 // Preference for keeping small files + // Statistics totalEvictions int64 mlFileEvictions int64 @@ -60,19 +60,19 @@ func NewMLCachePolicy() *MLCachePolicy { // Balanced weights accessFrequencyWeight: 0.3, recencyWeight: 0.3, - sizeWeight: 0.2, - mlWeight: 0.2, - + sizeWeight: 0.2, + mlWeight: 0.2, + // ML-specific boosts trainingDataBoost: 1.5, // 50% boost for training data modelFileBoost: 2.0, // 100% boost for model files sequentialBoost: 1.3, // 30% boost for sequential access epochRelevanceBoost: 1.4, // 40% boost for epoch-relevant data - + // Time thresholds hotThreshold: 1 * time.Minute, coldThreshold: 10 * time.Minute, - + // Size parameters largeFileThreshold: 10 * 1024 * 1024, // 10MB smallFilePreference: 1.2, // 20% preference for small files @@ -84,32 +84,32 @@ func NewMLCachePolicy() *MLCachePolicy { func (policy *MLCachePolicy) CalculateEvictionScore(entry *CacheEntry) float64 { now := time.Now() timeSinceAccess := now.Sub(entry.LastAccess) - + // Base factors accessFrequencyScore := policy.calculateAccessFrequencyScore(entry) recencyScore := policy.calculateRecencyScore(timeSinceAccess) sizeScore := policy.calculateSizeScore(entry.Size) mlScore := policy.calculateMLScore(entry) - + // Weighted combination totalScore := policy.accessFrequencyWeight*accessFrequencyScore + policy.recencyWeight*recencyScore + policy.sizeWeight*sizeScore + policy.mlWeight*mlScore - - glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)", + + glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)", entry.Inode, totalScore, accessFrequencyScore, recencyScore, sizeScore, mlScore) - + return totalScore } // ShouldEvict determines if a cache entry should be evicted func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool { score := policy.CalculateEvictionScore(entry) - + // Different thresholds based on ML file type threshold := 0.3 // Default threshold - + switch entry.FileType { case MLFileModel: threshold = 0.1 // Very low threshold - keep models cached longer @@ -126,9 +126,9 @@ func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool { default: threshold = 0.3 // Default for unknown files } - + shouldEvict := score < threshold - + if shouldEvict { policy.totalEvictions++ if entry.IsTrainingData { @@ -140,11 +140,11 @@ func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool { if entry.FileType != MLFileUnknown { policy.mlFileEvictions++ } - - glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v", + + glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v", entry.Inode, score, threshold, entry.FileType) } - + return shouldEvict } @@ -153,10 +153,10 @@ func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) fl if entry.AccessCount == 0 { return 0.0 } - + // Logarithmic scaling for access count base := math.Log(float64(entry.AccessCount) + 1) - + // Apply ML-specific boosts boost := 1.0 if entry.IsTrainingData { @@ -171,7 +171,7 @@ func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) fl if entry.EpochRelevance > 0.5 { boost *= policy.epochRelevanceBoost } - + return base * boost } @@ -180,11 +180,11 @@ func (policy *MLCachePolicy) calculateRecencyScore(timeSinceAccess time.Duration if timeSinceAccess <= policy.hotThreshold { return 1.0 // Very recent access } - + if timeSinceAccess >= policy.coldThreshold { return 0.1 // Very old access } - + // Linear decay between hot and cold thresholds ratio := float64(timeSinceAccess-policy.hotThreshold) / float64(policy.coldThreshold-policy.hotThreshold) return 1.0 - ratio*0.9 // Decay from 1.0 to 0.1 @@ -196,7 +196,7 @@ func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 { // Prefer keeping smaller files (higher score) return policy.smallFilePreference } - + // Larger files get lower score (more likely to be evicted) // But not too low since they might be important model files ratio := float64(size) / float64(policy.largeFileThreshold) @@ -206,7 +206,7 @@ func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 { // calculateMLScore calculates ML-specific factors func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 { score := 0.5 // Base score for non-ML files - + // File type bonuses switch entry.FileType { case MLFileModel: @@ -222,7 +222,7 @@ func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 { default: score = 0.5 // Default for unknown files } - + // Access pattern bonuses switch entry.Pattern { case SequentialAccess: @@ -234,22 +234,22 @@ func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 { case BatchAccess: score *= 1.1 // Small boost for batch access } - + // Predicted reuse bonus if entry.PredictedReuse > 0.7 { score *= 1.2 // Boost for high predicted reuse } - + // Epoch relevance bonus if entry.EpochRelevance > 0.5 { score *= (1.0 + entry.EpochRelevance*0.3) // Up to 30% boost for epoch relevance } - + // Hot chunk bonus if entry.IsHot { score *= 1.1 } - + return score } @@ -260,27 +260,27 @@ func (policy *MLCachePolicy) GetEvictionMetrics() MLCachePolicyMetrics { MLFileEvictions: policy.mlFileEvictions, TrainingDataEvictions: policy.trainingDataEvictions, ModelFileEvictions: policy.modelFileEvictions, - + // Configuration AccessFrequencyWeight: policy.accessFrequencyWeight, RecencyWeight: policy.recencyWeight, - SizeWeight: policy.sizeWeight, - MLWeight: policy.mlWeight, + SizeWeight: policy.sizeWeight, + MLWeight: policy.mlWeight, } } // MLCachePolicyMetrics holds metrics for the ML cache policy type MLCachePolicyMetrics struct { - TotalEvictions int64 `json:"total_evictions"` - MLFileEvictions int64 `json:"ml_file_evictions"` - TrainingDataEvictions int64 `json:"training_data_evictions"` - ModelFileEvictions int64 `json:"model_file_evictions"` - + TotalEvictions int64 `json:"total_evictions"` + MLFileEvictions int64 `json:"ml_file_evictions"` + TrainingDataEvictions int64 `json:"training_data_evictions"` + ModelFileEvictions int64 `json:"model_file_evictions"` + // Configuration weights AccessFrequencyWeight float64 `json:"access_frequency_weight"` RecencyWeight float64 `json:"recency_weight"` - SizeWeight float64 `json:"size_weight"` - MLWeight float64 `json:"ml_weight"` + SizeWeight float64 `json:"size_weight"` + MLWeight float64 `json:"ml_weight"` } // SetWeights updates the eviction policy weights @@ -290,14 +290,14 @@ func (policy *MLCachePolicy) SetWeights(frequency, recency, size, ml float64) { glog.Warningf("Invalid weights provided, using defaults") return } - + // Normalize weights to sum to 1.0 policy.accessFrequencyWeight = frequency / total policy.recencyWeight = recency / total policy.sizeWeight = size / total policy.mlWeight = ml / total - - glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f", + + glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f", policy.accessFrequencyWeight, policy.recencyWeight, policy.sizeWeight, policy.mlWeight) } @@ -307,7 +307,7 @@ func (policy *MLCachePolicy) SetMLBoosts(trainingData, model, sequential, epochR policy.modelFileBoost = model policy.sequentialBoost = sequential policy.epochRelevanceBoost = epochRelevance - - glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f", + + glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f", trainingData, model, sequential, epochRelevance) } |
