1 files changed, 313 insertions, 0 deletions
diff --git a/weed/mount/ml/cache_policy.go b/weed/mount/ml/cache_policy.go
new file mode 100644
index 000000000..256b36dc9
--- /dev/null
+++ b/weed/mount/ml/cache_policy.go
@@ -0,0 +1,313 @@
+package ml
+
+import (
+	"math"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/glog"
+)
+
+// CacheEntry represents a cached item with ML-aware metadata
+type CacheEntry struct {
+	Inode       uint64        // File inode
+	Size        uint64        // Size of cached data
+	LastAccess  time.Time     // Last access time
+	AccessCount int64         // Total access count
+	CacheLevel  int           // Cache level (0=memory, 1=disk, etc.)
+	Pattern     AccessPattern // Detected access pattern
+	FileType    MLFileType    // Type of ML file
+	IsHot       bool          // Whether this is a hot chunk
+	
+	// ML-specific metadata
+	IsTrainingData    bool    // Whether this is training data
+	IsModel          bool    // Whether this is a model file
+	PredictedReuse   float64 // Predicted reuse probability (0.0-1.0)
+	EpochRelevance   float64 // Relevance for current training epoch
+}
+
+// MLCachePolicy implements ML-aware cache eviction policy
+type MLCachePolicy struct {
+	// Weights for different factors (sum should be 1.0)
+	accessFrequencyWeight float64 // Weight for access frequency
+	recencyWeight         float64 // Weight for access recency  
+	sizeWeight            float64 // Weight for item size
+	mlWeight              float64 // Weight for ML-specific factors
+	
+	// ML-specific parameters
+	trainingDataBoost     float64 // Boost factor for training data
+	modelFileBoost        float64 // Boost factor for model files
+	sequentialBoost       float64 // Boost factor for sequential access
+	epochRelevanceBoost   float64 // Boost factor for epoch-relevant data
+	
+	// Time-based parameters
+	hotThreshold          time.Duration // Threshold for considering item "hot"
+	coldThreshold         time.Duration // Threshold for considering item "cold"
+	
+	// Size-based parameters
+	largeFileThreshold    uint64  // Threshold for large files
+	smallFilePreference   float64 // Preference for keeping small files
+	
+	// Statistics
+	totalEvictions        int64
+	mlFileEvictions       int64
+	trainingDataEvictions int64
+	modelFileEvictions    int64
+}
+
+// NewMLCachePolicy creates a new ML-aware cache eviction policy
+func NewMLCachePolicy() *MLCachePolicy {
+	return &MLCachePolicy{
+		// Balanced weights
+		accessFrequencyWeight: 0.3,
+		recencyWeight:         0.3,
+		sizeWeight:           0.2,
+		mlWeight:             0.2,
+		
+		// ML-specific boosts
+		trainingDataBoost:   1.5, // 50% boost for training data
+		modelFileBoost:      2.0, // 100% boost for model files
+		sequentialBoost:     1.3, // 30% boost for sequential access
+		epochRelevanceBoost: 1.4, // 40% boost for epoch-relevant data
+		
+		// Time thresholds
+		hotThreshold:  1 * time.Minute,
+		coldThreshold: 10 * time.Minute,
+		
+		// Size parameters
+		largeFileThreshold:  10 * 1024 * 1024, // 10MB
+		smallFilePreference: 1.2,              // 20% preference for small files
+	}
+}
+
+// CalculateEvictionScore calculates an eviction score for a cache entry
+// Lower scores indicate higher priority for eviction
+func (policy *MLCachePolicy) CalculateEvictionScore(entry *CacheEntry) float64 {
+	now := time.Now()
+	timeSinceAccess := now.Sub(entry.LastAccess)
+	
+	// Base factors
+	accessFrequencyScore := policy.calculateAccessFrequencyScore(entry)
+	recencyScore := policy.calculateRecencyScore(timeSinceAccess)
+	sizeScore := policy.calculateSizeScore(entry.Size)
+	mlScore := policy.calculateMLScore(entry)
+	
+	// Weighted combination
+	totalScore := policy.accessFrequencyWeight*accessFrequencyScore +
+		policy.recencyWeight*recencyScore +
+		policy.sizeWeight*sizeScore +
+		policy.mlWeight*mlScore
+	
+	glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)", 
+		entry.Inode, totalScore, accessFrequencyScore, recencyScore, sizeScore, mlScore)
+	
+	return totalScore
+}
+
+// ShouldEvict determines if a cache entry should be evicted
+func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
+	score := policy.CalculateEvictionScore(entry)
+	
+	// Different thresholds based on ML file type
+	threshold := 0.3 // Default threshold
+	
+	switch entry.FileType {
+	case MLFileModel:
+		threshold = 0.1 // Very low threshold - keep models cached longer
+	case MLFileDataset:
+		if entry.Pattern == SequentialAccess || entry.Pattern == EpochAccess {
+			threshold = 0.2 // Lower threshold for sequential dataset access
+		} else {
+			threshold = 0.4 // Higher threshold for random dataset access
+		}
+	case MLFileTensor:
+		threshold = 0.25 // Medium threshold for tensor files
+	case MLFileConfig:
+		threshold = 0.5 // Higher threshold for config files (less critical)
+	default:
+		threshold = 0.3 // Default for unknown files
+	}
+	
+	shouldEvict := score < threshold
+	
+	if shouldEvict {
+		policy.totalEvictions++
+		if entry.IsTrainingData {
+			policy.trainingDataEvictions++
+		}
+		if entry.IsModel {
+			policy.modelFileEvictions++
+		}
+		if entry.FileType != MLFileUnknown {
+			policy.mlFileEvictions++
+		}
+		
+		glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v", 
+			entry.Inode, score, threshold, entry.FileType)
+	}
+	
+	return shouldEvict
+}
+
+// calculateAccessFrequencyScore calculates score based on access frequency
+func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) float64 {
+	if entry.AccessCount == 0 {
+		return 0.0
+	}
+	
+	// Logarithmic scaling for access count
+	base := math.Log(float64(entry.AccessCount) + 1)
+	
+	// Apply ML-specific boosts
+	boost := 1.0
+	if entry.IsTrainingData {
+		boost *= policy.trainingDataBoost
+	}
+	if entry.IsModel {
+		boost *= policy.modelFileBoost
+	}
+	if entry.Pattern == SequentialAccess {
+		boost *= policy.sequentialBoost
+	}
+	if entry.EpochRelevance > 0.5 {
+		boost *= policy.epochRelevanceBoost
+	}
+	
+	return base * boost
+}
+
+// calculateRecencyScore calculates score based on access recency
+func (policy *MLCachePolicy) calculateRecencyScore(timeSinceAccess time.Duration) float64 {
+	if timeSinceAccess <= policy.hotThreshold {
+		return 1.0 // Very recent access
+	}
+	
+	if timeSinceAccess >= policy.coldThreshold {
+		return 0.1 // Very old access
+	}
+	
+	// Linear decay between hot and cold thresholds
+	ratio := float64(timeSinceAccess-policy.hotThreshold) / float64(policy.coldThreshold-policy.hotThreshold)
+	return 1.0 - ratio*0.9 // Decay from 1.0 to 0.1
+}
+
+// calculateSizeScore calculates score based on item size
+func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 {
+	if size < policy.largeFileThreshold {
+		// Prefer keeping smaller files (higher score)
+		return policy.smallFilePreference
+	}
+	
+	// Larger files get lower score (more likely to be evicted)
+	// But not too low since they might be important model files
+	ratio := float64(size) / float64(policy.largeFileThreshold)
+	return math.Max(0.3, 1.0/math.Sqrt(ratio))
+}
+
+// calculateMLScore calculates ML-specific factors
+func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
+	score := 0.5 // Base score for non-ML files
+	
+	// File type bonuses
+	switch entry.FileType {
+	case MLFileModel:
+		score = 1.0 // Highest priority for model files
+	case MLFileDataset:
+		score = 0.8 // High priority for datasets
+	case MLFileTensor:
+		score = 0.7 // Good priority for tensor files
+	case MLFileConfig:
+		score = 0.4 // Lower priority for config files
+	case MLFileLog:
+		score = 0.3 // Lowest priority for log files
+	default:
+		score = 0.5 // Default for unknown files
+	}
+	
+	// Access pattern bonuses
+	switch entry.Pattern {
+	case SequentialAccess:
+		score *= 1.2 // Boost for sequential access
+	case ModelAccess:
+		score *= 1.5 // Strong boost for model access
+	case EpochAccess:
+		score *= 1.3 // Boost for epoch access
+	case BatchAccess:
+		score *= 1.1 // Small boost for batch access
+	}
+	
+	// Predicted reuse bonus
+	if entry.PredictedReuse > 0.7 {
+		score *= 1.2 // Boost for high predicted reuse
+	}
+	
+	// Epoch relevance bonus
+	if entry.EpochRelevance > 0.5 {
+		score *= (1.0 + entry.EpochRelevance*0.3) // Up to 30% boost for epoch relevance
+	}
+	
+	// Hot chunk bonus
+	if entry.IsHot {
+		score *= 1.1
+	}
+	
+	return score
+}
+
+// GetEvictionMetrics returns eviction policy metrics
+func (policy *MLCachePolicy) GetEvictionMetrics() MLCachePolicyMetrics {
+	return MLCachePolicyMetrics{
+		TotalEvictions:        policy.totalEvictions,
+		MLFileEvictions:       policy.mlFileEvictions,
+		TrainingDataEvictions: policy.trainingDataEvictions,
+		ModelFileEvictions:    policy.modelFileEvictions,
+		
+		// Configuration
+		AccessFrequencyWeight: policy.accessFrequencyWeight,
+		RecencyWeight:         policy.recencyWeight,
+		SizeWeight:           policy.sizeWeight,
+		MLWeight:             policy.mlWeight,
+	}
+}
+
+// MLCachePolicyMetrics holds metrics for the ML cache policy
+type MLCachePolicyMetrics struct {
+	TotalEvictions        int64   `json:"total_evictions"`
+	MLFileEvictions       int64   `json:"ml_file_evictions"`
+	TrainingDataEvictions int64   `json:"training_data_evictions"`
+	ModelFileEvictions    int64   `json:"model_file_evictions"`
+	
+	// Configuration weights
+	AccessFrequencyWeight float64 `json:"access_frequency_weight"`
+	RecencyWeight         float64 `json:"recency_weight"`
+	SizeWeight           float64 `json:"size_weight"`
+	MLWeight             float64 `json:"ml_weight"`
+}
+
+// SetWeights updates the eviction policy weights
+func (policy *MLCachePolicy) SetWeights(frequency, recency, size, ml float64) {
+	total := frequency + recency + size + ml
+	if total == 0 {
+		glog.Warningf("Invalid weights provided, using defaults")
+		return
+	}
+	
+	// Normalize weights to sum to 1.0
+	policy.accessFrequencyWeight = frequency / total
+	policy.recencyWeight = recency / total
+	policy.sizeWeight = size / total
+	policy.mlWeight = ml / total
+	
+	glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f", 
+		policy.accessFrequencyWeight, policy.recencyWeight, policy.sizeWeight, policy.mlWeight)
+}
+
+// SetMLBoosts updates the ML-specific boost factors
+func (policy *MLCachePolicy) SetMLBoosts(trainingData, model, sequential, epochRelevance float64) {
+	policy.trainingDataBoost = trainingData
+	policy.modelFileBoost = model
+	policy.sequentialBoost = sequential
+	policy.epochRelevanceBoost = epochRelevance
+	
+	glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f", 
+		trainingData, model, sequential, epochRelevance)
+}