weed/mount/ml/cache_policy.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

package ml

import (
	"math"
	"time"

	"github.com/seaweedfs/seaweedfs/weed/glog"
)

// CacheEntry represents a cached item with ML-aware metadata
type CacheEntry struct {
	Inode       uint64        // File inode
	Size        uint64        // Size of cached data
	LastAccess  time.Time     // Last access time
	AccessCount int64         // Total access count
	CacheLevel  int           // Cache level (0=memory, 1=disk, etc.)
	Pattern     AccessPattern // Detected access pattern
	FileType    MLFileType    // Type of ML file
	IsHot       bool          // Whether this is a hot chunk
	
	// ML-specific metadata
	IsTrainingData    bool    // Whether this is training data
	IsModel          bool    // Whether this is a model file
	PredictedReuse   float64 // Predicted reuse probability (0.0-1.0)
	EpochRelevance   float64 // Relevance for current training epoch
}

// MLCachePolicy implements ML-aware cache eviction policy
type MLCachePolicy struct {
	// Weights for different factors (sum should be 1.0)
	accessFrequencyWeight float64 // Weight for access frequency
	recencyWeight         float64 // Weight for access recency  
	sizeWeight            float64 // Weight for item size
	mlWeight              float64 // Weight for ML-specific factors
	
	// ML-specific parameters
	trainingDataBoost     float64 // Boost factor for training data
	modelFileBoost        float64 // Boost factor for model files
	sequentialBoost       float64 // Boost factor for sequential access
	epochRelevanceBoost   float64 // Boost factor for epoch-relevant data
	
	// Time-based parameters
	hotThreshold          time.Duration // Threshold for considering item "hot"
	coldThreshold         time.Duration // Threshold for considering item "cold"
	
	// Size-based parameters
	largeFileThreshold    uint64  // Threshold for large files
	smallFilePreference   float64 // Preference for keeping small files
	
	// Statistics
	totalEvictions        int64
	mlFileEvictions       int64
	trainingDataEvictions int64
	modelFileEvictions    int64
}

// NewMLCachePolicy creates a new ML-aware cache eviction policy
func NewMLCachePolicy() *MLCachePolicy {
	return &MLCachePolicy{
		// Balanced weights
		accessFrequencyWeight: 0.3,
		recencyWeight:         0.3,
		sizeWeight:           0.2,
		mlWeight:             0.2,
		
		// ML-specific boosts
		trainingDataBoost:   1.5, // 50% boost for training data
		modelFileBoost:      2.0, // 100% boost for model files
		sequentialBoost:     1.3, // 30% boost for sequential access
		epochRelevanceBoost: 1.4, // 40% boost for epoch-relevant data
		
		// Time thresholds
		hotThreshold:  1 * time.Minute,
		coldThreshold: 10 * time.Minute,
		
		// Size parameters
		largeFileThreshold:  10 * 1024 * 1024, // 10MB
		smallFilePreference: 1.2,              // 20% preference for small files
	}
}

// CalculateEvictionScore calculates an eviction score for a cache entry
// Lower scores indicate higher priority for eviction
func (policy *MLCachePolicy) CalculateEvictionScore(entry *CacheEntry) float64 {
	now := time.Now()
	timeSinceAccess := now.Sub(entry.LastAccess)
	
	// Base factors
	accessFrequencyScore := policy.calculateAccessFrequencyScore(entry)
	recencyScore := policy.calculateRecencyScore(timeSinceAccess)
	sizeScore := policy.calculateSizeScore(entry.Size)
	mlScore := policy.calculateMLScore(entry)
	
	// Weighted combination
	totalScore := policy.accessFrequencyWeight*accessFrequencyScore +
		policy.recencyWeight*recencyScore +
		policy.sizeWeight*sizeScore +
		policy.mlWeight*mlScore
	
	glog.V(4).Infof("Eviction score for inode=%d: total=%.3f (freq=%.3f, recency=%.3f, size=%.3f, ml=%.3f)", 
		entry.Inode, totalScore, accessFrequencyScore, recencyScore, sizeScore, mlScore)
	
	return totalScore
}

// ShouldEvict determines if a cache entry should be evicted
func (policy *MLCachePolicy) ShouldEvict(entry *CacheEntry) bool {
	score := policy.CalculateEvictionScore(entry)
	
	// Different thresholds based on ML file type
	threshold := 0.3 // Default threshold
	
	switch entry.FileType {
	case MLFileModel:
		threshold = 0.1 // Very low threshold - keep models cached longer
	case MLFileDataset:
		if entry.Pattern == SequentialAccess || entry.Pattern == EpochAccess {
			threshold = 0.2 // Lower threshold for sequential dataset access
		} else {
			threshold = 0.4 // Higher threshold for random dataset access
		}
	case MLFileTensor:
		threshold = 0.25 // Medium threshold for tensor files
	case MLFileConfig:
		threshold = 0.5 // Higher threshold for config files (less critical)
	default:
		threshold = 0.3 // Default for unknown files
	}
	
	shouldEvict := score < threshold
	
	if shouldEvict {
		policy.totalEvictions++
		if entry.IsTrainingData {
			policy.trainingDataEvictions++
		}
		if entry.IsModel {
			policy.modelFileEvictions++
		}
		if entry.FileType != MLFileUnknown {
			policy.mlFileEvictions++
		}
		
		glog.V(4).Infof("Evicting: inode=%d, score=%.3f < threshold=%.3f, type=%v", 
			entry.Inode, score, threshold, entry.FileType)
	}
	
	return shouldEvict
}

// calculateAccessFrequencyScore calculates score based on access frequency
func (policy *MLCachePolicy) calculateAccessFrequencyScore(entry *CacheEntry) float64 {
	if entry.AccessCount == 0 {
		return 0.0
	}
	
	// Logarithmic scaling for access count
	base := math.Log(float64(entry.AccessCount) + 1)
	
	// Apply ML-specific boosts
	boost := 1.0
	if entry.IsTrainingData {
		boost *= policy.trainingDataBoost
	}
	if entry.IsModel {
		boost *= policy.modelFileBoost
	}
	if entry.Pattern == SequentialAccess {
		boost *= policy.sequentialBoost
	}
	if entry.EpochRelevance > 0.5 {
		boost *= policy.epochRelevanceBoost
	}
	
	return base * boost
}

// calculateRecencyScore calculates score based on access recency
func (policy *MLCachePolicy) calculateRecencyScore(timeSinceAccess time.Duration) float64 {
	if timeSinceAccess <= policy.hotThreshold {
		return 1.0 // Very recent access
	}
	
	if timeSinceAccess >= policy.coldThreshold {
		return 0.1 // Very old access
	}
	
	// Linear decay between hot and cold thresholds
	ratio := float64(timeSinceAccess-policy.hotThreshold) / float64(policy.coldThreshold-policy.hotThreshold)
	return 1.0 - ratio*0.9 // Decay from 1.0 to 0.1
}

// calculateSizeScore calculates score based on item size
func (policy *MLCachePolicy) calculateSizeScore(size uint64) float64 {
	if size < policy.largeFileThreshold {
		// Prefer keeping smaller files (higher score)
		return policy.smallFilePreference
	}
	
	// Larger files get lower score (more likely to be evicted)
	// But not too low since they might be important model files
	ratio := float64(size) / float64(policy.largeFileThreshold)
	return math.Max(0.3, 1.0/math.Sqrt(ratio))
}

// calculateMLScore calculates ML-specific factors
func (policy *MLCachePolicy) calculateMLScore(entry *CacheEntry) float64 {
	score := 0.5 // Base score for non-ML files
	
	// File type bonuses
	switch entry.FileType {
	case MLFileModel:
		score = 1.0 // Highest priority for model files
	case MLFileDataset:
		score = 0.8 // High priority for datasets
	case MLFileTensor:
		score = 0.7 // Good priority for tensor files
	case MLFileConfig:
		score = 0.4 // Lower priority for config files
	case MLFileLog:
		score = 0.3 // Lowest priority for log files
	default:
		score = 0.5 // Default for unknown files
	}
	
	// Access pattern bonuses
	switch entry.Pattern {
	case SequentialAccess:
		score *= 1.2 // Boost for sequential access
	case ModelAccess:
		score *= 1.5 // Strong boost for model access
	case EpochAccess:
		score *= 1.3 // Boost for epoch access
	case BatchAccess:
		score *= 1.1 // Small boost for batch access
	}
	
	// Predicted reuse bonus
	if entry.PredictedReuse > 0.7 {
		score *= 1.2 // Boost for high predicted reuse
	}
	
	// Epoch relevance bonus
	if entry.EpochRelevance > 0.5 {
		score *= (1.0 + entry.EpochRelevance*0.3) // Up to 30% boost for epoch relevance
	}
	
	// Hot chunk bonus
	if entry.IsHot {
		score *= 1.1
	}
	
	return score
}

// GetEvictionMetrics returns eviction policy metrics
func (policy *MLCachePolicy) GetEvictionMetrics() MLCachePolicyMetrics {
	return MLCachePolicyMetrics{
		TotalEvictions:        policy.totalEvictions,
		MLFileEvictions:       policy.mlFileEvictions,
		TrainingDataEvictions: policy.trainingDataEvictions,
		ModelFileEvictions:    policy.modelFileEvictions,
		
		// Configuration
		AccessFrequencyWeight: policy.accessFrequencyWeight,
		RecencyWeight:         policy.recencyWeight,
		SizeWeight:           policy.sizeWeight,
		MLWeight:             policy.mlWeight,
	}
}

// MLCachePolicyMetrics holds metrics for the ML cache policy
type MLCachePolicyMetrics struct {
	TotalEvictions        int64   `json:"total_evictions"`
	MLFileEvictions       int64   `json:"ml_file_evictions"`
	TrainingDataEvictions int64   `json:"training_data_evictions"`
	ModelFileEvictions    int64   `json:"model_file_evictions"`
	
	// Configuration weights
	AccessFrequencyWeight float64 `json:"access_frequency_weight"`
	RecencyWeight         float64 `json:"recency_weight"`
	SizeWeight           float64 `json:"size_weight"`
	MLWeight             float64 `json:"ml_weight"`
}

// SetWeights updates the eviction policy weights
func (policy *MLCachePolicy) SetWeights(frequency, recency, size, ml float64) {
	total := frequency + recency + size + ml
	if total == 0 {
		glog.Warningf("Invalid weights provided, using defaults")
		return
	}
	
	// Normalize weights to sum to 1.0
	policy.accessFrequencyWeight = frequency / total
	policy.recencyWeight = recency / total
	policy.sizeWeight = size / total
	policy.mlWeight = ml / total
	
	glog.V(2).Infof("Updated eviction policy weights: freq=%.2f, recency=%.2f, size=%.2f, ml=%.2f", 
		policy.accessFrequencyWeight, policy.recencyWeight, policy.sizeWeight, policy.mlWeight)
}

// SetMLBoosts updates the ML-specific boost factors
func (policy *MLCachePolicy) SetMLBoosts(trainingData, model, sequential, epochRelevance float64) {
	policy.trainingDataBoost = trainingData
	policy.modelFileBoost = model
	policy.sequentialBoost = sequential
	policy.epochRelevanceBoost = epochRelevance
	
	glog.V(2).Infof("Updated ML boost factors: training=%.2f, model=%.2f, sequential=%.2f, epoch=%.2f", 
		trainingData, model, sequential, epochRelevance)
}