aboutsummaryrefslogtreecommitdiff
path: root/weed/mount/ml/cache_policy.go
diff options
context:
space:
mode:
Diffstat (limited to 'weed/mount/ml/cache_policy.go')
-rw-r--r--weed/mount/ml/cache_policy.go313
1 files changed, 313 insertions, 0 deletions
diff --git a/weed/mount/ml/cache_policy.go b/weed/mount/ml/cache_policy.go
new file mode 100644
index 000000000..256b36dc9
--- /dev/null
+++ b/weed/mount/ml/cache_policy.go
@@ -0,0 +1,313 @@
+package ml
+
+import (
+ "math"
+ "time"
+
+ "github.com/seaweedfs/seaweedfs/weed/glog"
+)
+
+// CacheEntry represents a cached item with ML-aware metadata
+type CacheEntry struct {
+ Inode uint64 // File inode
+ Size uint64 // Size of cached data
+ LastAccess time.Time // Last access time
+ AccessCount int64 // Total access count
+ CacheLevel int // Cache level (0=memory, 1=disk, etc.)
+ Pattern AccessPattern // Detected access pattern
+ FileType MLFileType // Type of ML file
+ IsHot bool // Whether this is a hot chunk
+
+ // ML-specific metadata
+ IsTrainingData bool // Whether this is training data
+ IsModel bool // Whether this is a model file
+ PredictedReuse float64 // Predicted reuse probability (0.0-1.0)
+ EpochRelevance float64 // Relevance for current training epoch
+}
+
+// MLCachePolicy implements ML-aware cache eviction policy
+type MLCachePolicy struct {
+ // Weights for different factors (sum should be 1.0)
+ accessFrequencyWeight float64 // Weight for access frequency
+ recencyWeight float64 // Weight for access recency
+ sizeWeight float64 // Weight for item size
+ mlWeight float64 // Weight for ML-specific factors
+
+ // ML-specific parameters
+ trainingDataBoost float64 // Boost factor for training data
+ modelFileBoost float64 // Boost factor for model files
+ sequentialBoost float64 // Boost factor for sequential access
+ epochRelevanceBoost float64 // Boost factor for epoch-relevant data
+
+ // Time-based parameters
+ hotThreshold time.Duration // Threshold for considering item "hot"
+ coldThreshold time.Duration // Threshold for considering item "cold"
+
+ // Size-based parameters
+ largeFileThreshold uint64 // Threshold for large files
+ smallFilePreference float64 // Preference for keeping small files
+
+ // Statistics
+ totalEvictions int64
+ mlFileEvictions int64
+ trainingDataEvictions int64
+ modelFileEvictions int64
+}
+
+// NewMLCachePolicy creates a new ML-aware cache eviction policy
+func NewMLCachePolicy() *MLCachePolicy {
+ return &MLCachePolicy{
+ // Balanced weights
+ accessFrequencyWeight: 0.3,
+ recencyWeight: 0.3,
+ sizeWeight: 0.2,
+ mlWeight: 0.2,
+
+ // ML-specific boosts
+ trainingDataBoost: 1.5, // 50% boost for training data
+ modelFileBoost: 2.0, // 100% boost for model files
+ sequentialBoost: 1.3, // 30% boost for sequential access
+ epochRelevanceBoost: 1.4, // 40% boost for epoch-relevant data
+
+ // Time thresholds
+ hotThreshold: 1 * time.Minute,
+ coldThreshold: 10 * time.Minute,
+
+ // Size parameters
+ largeFileThreshold: 10 * 1024 * 1024, // 10MB
+ smallFilePreference: 1.2, // 20% preference for small files
+ }
+}
+
+// CalculateEvictionScore calculates an eviction score for a cache entry
+// Lower scores indicate higher priority for eviction
+func (policy *MLCachePolicy) CalculateEvictionScore(entry *CacheEntry) float64 {
+ now := time.Now()
+ timeSinceAccess := now.Sub(entry.LastAccess)
+
+ // Base factors
+ accessFrequencyScore := policy.calculateAccessFrequencyScore(entry)
+ recencyScore := policy.calculateRecencyScore(timeSinceAccess)
+ sizeScore := policy.calculateSizeScore(entry.Size)
+ mlScore := policy.calculateMLScore(entry)
+
+ // Weighted combination
+ totalScore := policy.accessFrequencyWeight*accessFrequencyScore +
+ policy.recencyWeight*recencyScore +
+ policy.sizeWeight*sizeScore +
+ policy.mlWeight*mlScore
+
+ glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)",
+ entry.Inode, totalScore, accessFrequencyScore, recencyScore, sizeScore, mlScore)
+
+ return totalScore
+}
+
+// ShouldEvict determines if a cache entry should be evicted
+func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
+ score := policy.CalculateEvictionScore(entry)
+
+ // Different thresholds based on ML file type
+ threshold := 0.3 // Default threshold
+
+ switch entry.FileType {
+ case MLFileModel:
+ threshold = 0.1 // Very low threshold - keep models cached longer
+ case MLFileDataset:
+ if entry.Pattern == SequentialAccess || entry.Pattern == EpochAccess {
+ threshold = 0.2 // Lower threshold for sequential dataset access
+ } else {
+ threshold = 0.4 // Higher threshold for random dataset access
+ }
+ case MLFileTensor:
+ threshold = 0.25 // Medium threshold for tensor files
+ case MLFileConfig:
+ threshold = 0.5 // Higher threshold for config files (less critical)
+ default:
+ threshold = 0.3 // Default for unknown files
+ }
+
+ shouldEvict := score < threshold
+
+ if shouldEvict {
+ policy.totalEvictions++
+ if entry.IsTrainingData {
+ policy.trainingDataEvictions++
+ }
+ if entry.IsModel {
+ policy.modelFileEvictions++
+ }
+ if entry.FileType != MLFileUnknown {
+ policy.mlFileEvictions++
+ }
+
+ glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v",
+ entry.Inode, score, threshold, entry.FileType)
+ }
+
+ return shouldEvict
+}
+
+// calculateAccessFrequencyScore calculates score based on access frequency
+func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) float64 {
+ if entry.AccessCount == 0 {
+ return 0.0
+ }
+
+ // Logarithmic scaling for access count
+ base := math.Log(float64(entry.AccessCount) + 1)
+
+ // Apply ML-specific boosts
+ boost := 1.0
+ if entry.IsTrainingData {
+ boost *= policy.trainingDataBoost
+ }
+ if entry.IsModel {
+ boost *= policy.modelFileBoost
+ }
+ if entry.Pattern == SequentialAccess {
+ boost *= policy.sequentialBoost
+ }
+ if entry.EpochRelevance > 0.5 {
+ boost *= policy.epochRelevanceBoost
+ }
+
+ return base * boost
+}
+
+// calculateRecencyScore calculates score based on access recency
+func (policy *MLCachePolicy) calculateRecencyScore(timeSinceAccess time.Duration) float64 {
+ if timeSinceAccess <= policy.hotThreshold {
+ return 1.0 // Very recent access
+ }
+
+ if timeSinceAccess >= policy.coldThreshold {
+ return 0.1 // Very old access
+ }
+
+ // Linear decay between hot and cold thresholds
+ ratio := float64(timeSinceAccess-policy.hotThreshold) / float64(policy.coldThreshold-policy.hotThreshold)
+ return 1.0 - ratio*0.9 // Decay from 1.0 to 0.1
+}
+
+// calculateSizeScore calculates score based on item size
+func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 {
+ if size < policy.largeFileThreshold {
+ // Prefer keeping smaller files (higher score)
+ return policy.smallFilePreference
+ }
+
+ // Larger files get lower score (more likely to be evicted)
+ // But not too low since they might be important model files
+ ratio := float64(size) / float64(policy.largeFileThreshold)
+ return math.Max(0.3, 1.0/math.Sqrt(ratio))
+}
+
+// calculateMLScore calculates ML-specific factors
+func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
+ score := 0.5 // Base score for non-ML files
+
+ // File type bonuses
+ switch entry.FileType {
+ case MLFileModel:
+ score = 1.0 // Highest priority for model files
+ case MLFileDataset:
+ score = 0.8 // High priority for datasets
+ case MLFileTensor:
+ score = 0.7 // Good priority for tensor files
+ case MLFileConfig:
+ score = 0.4 // Lower priority for config files
+ case MLFileLog:
+ score = 0.3 // Lowest priority for log files
+ default:
+ score = 0.5 // Default for unknown files
+ }
+
+ // Access pattern bonuses
+ switch entry.Pattern {
+ case SequentialAccess:
+ score *= 1.2 // Boost for sequential access
+ case ModelAccess:
+ score *= 1.5 // Strong boost for model access
+ case EpochAccess:
+ score *= 1.3 // Boost for epoch access
+ case BatchAccess:
+ score *= 1.1 // Small boost for batch access
+ }
+
+ // Predicted reuse bonus
+ if entry.PredictedReuse > 0.7 {
+ score *= 1.2 // Boost for high predicted reuse
+ }
+
+ // Epoch relevance bonus
+ if entry.EpochRelevance > 0.5 {
+ score *= (1.0 + entry.EpochRelevance*0.3) // Up to 30% boost for epoch relevance
+ }
+
+ // Hot chunk bonus
+ if entry.IsHot {
+ score *= 1.1
+ }
+
+ return score
+}
+
+// GetEvictionMetrics returns eviction policy metrics
+func (policy *MLCachePolicy) GetEvictionMetrics() MLCachePolicyMetrics {
+ return MLCachePolicyMetrics{
+ TotalEvictions: policy.totalEvictions,
+ MLFileEvictions: policy.mlFileEvictions,
+ TrainingDataEvictions: policy.trainingDataEvictions,
+ ModelFileEvictions: policy.modelFileEvictions,
+
+ // Configuration
+ AccessFrequencyWeight: policy.accessFrequencyWeight,
+ RecencyWeight: policy.recencyWeight,
+ SizeWeight: policy.sizeWeight,
+ MLWeight: policy.mlWeight,
+ }
+}
+
+// MLCachePolicyMetrics holds metrics for the ML cache policy
+type MLCachePolicyMetrics struct {
+ TotalEvictions int64 `json:"total_evictions"`
+ MLFileEvictions int64 `json:"ml_file_evictions"`
+ TrainingDataEvictions int64 `json:"training_data_evictions"`
+ ModelFileEvictions int64 `json:"model_file_evictions"`
+
+ // Configuration weights
+ AccessFrequencyWeight float64 `json:"access_frequency_weight"`
+ RecencyWeight float64 `json:"recency_weight"`
+ SizeWeight float64 `json:"size_weight"`
+ MLWeight float64 `json:"ml_weight"`
+}
+
+// SetWeights updates the eviction policy weights
+func (policy *MLCachePolicy) SetWeights(frequency, recency, size, ml float64) {
+ total := frequency + recency + size + ml
+ if total == 0 {
+ glog.Warningf("Invalid weights provided, using defaults")
+ return
+ }
+
+ // Normalize weights to sum to 1.0
+ policy.accessFrequencyWeight = frequency / total
+ policy.recencyWeight = recency / total
+ policy.sizeWeight = size / total
+ policy.mlWeight = ml / total
+
+ glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f",
+ policy.accessFrequencyWeight, policy.recencyWeight, policy.sizeWeight, policy.mlWeight)
+}
+
+// SetMLBoosts updates the ML-specific boost factors
+func (policy *MLCachePolicy) SetMLBoosts(trainingData, model, sequential, epochRelevance float64) {
+ policy.trainingDataBoost = trainingData
+ policy.modelFileBoost = model
+ policy.sequentialBoost = sequential
+ policy.epochRelevanceBoost = epochRelevance
+
+ glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f",
+ trainingData, model, sequential, epochRelevance)
+}