aboutsummaryrefslogtreecommitdiff
path: root/weed/mount/ml/cache_policy_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'weed/mount/ml/cache_policy_test.go')
-rw-r--r--weed/mount/ml/cache_policy_test.go549
1 files changed, 549 insertions, 0 deletions
diff --git a/weed/mount/ml/cache_policy_test.go b/weed/mount/ml/cache_policy_test.go
new file mode 100644
index 000000000..29df5b859
--- /dev/null
+++ b/weed/mount/ml/cache_policy_test.go
@@ -0,0 +1,549 @@
+package ml
+
+import (
+ "testing"
+ "time"
+)
+
+func TestMLCachePolicy_Basic(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Test basic eviction score calculation
+ entry := &CacheEntry{
+ Inode: 1,
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ CacheLevel: 0,
+ Pattern: RandomAccess,
+ FileType: MLFileUnknown,
+ IsHot: false,
+ }
+
+ score := policy.CalculateEvictionScore(entry)
+ if score <= 0 {
+ t.Error("Eviction score should be positive")
+ }
+
+ shouldEvict := policy.ShouldEvict(entry)
+ t.Logf("Basic entry eviction: score=%.3f, shouldEvict=%v", score, shouldEvict)
+}
+
+func TestMLCachePolicy_ModelFileBoost(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Create two identical entries, one is a model file
+ baseEntry := &CacheEntry{
+ Inode: 1,
+ Size: 10 * 1024 * 1024, // 10MB
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 3,
+ CacheLevel: 0,
+ Pattern: SequentialAccess,
+ FileType: MLFileUnknown,
+ IsModel: false,
+ }
+
+ modelEntry := &CacheEntry{
+ Inode: 2,
+ Size: 10 * 1024 * 1024, // 10MB
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 3,
+ CacheLevel: 0,
+ Pattern: SequentialAccess,
+ FileType: MLFileModel,
+ IsModel: true,
+ }
+
+ baseScore := policy.CalculateEvictionScore(baseEntry)
+ modelScore := policy.CalculateEvictionScore(modelEntry)
+
+ if modelScore <= baseScore {
+ t.Errorf("Model file should have higher score than regular file: model=%.3f, base=%.3f",
+ modelScore, baseScore)
+ }
+
+ // Model files should be less likely to be evicted
+ baseShouldEvict := policy.ShouldEvict(baseEntry)
+ modelShouldEvict := policy.ShouldEvict(modelEntry)
+
+ if modelShouldEvict && !baseShouldEvict {
+ t.Error("Model file should not be evicted if regular file is not evicted")
+ }
+
+ t.Logf("Model vs Base eviction: model=%.3f (evict=%v), base=%.3f (evict=%v)",
+ modelScore, modelShouldEvict, baseScore, baseShouldEvict)
+}
+
+func TestMLCachePolicy_TrainingDataBoost(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ regularEntry := &CacheEntry{
+ Inode: 1,
+ Size: 1024,
+ LastAccess: time.Now().Add(-2 * time.Minute),
+ AccessCount: 10,
+ FileType: MLFileUnknown,
+ IsTrainingData: false,
+ }
+
+ trainingEntry := &CacheEntry{
+ Inode: 2,
+ Size: 1024,
+ LastAccess: time.Now().Add(-2 * time.Minute),
+ AccessCount: 10,
+ FileType: MLFileDataset,
+ IsTrainingData: true,
+ }
+
+ regularScore := policy.CalculateEvictionScore(regularEntry)
+ trainingScore := policy.CalculateEvictionScore(trainingEntry)
+
+ if trainingScore <= regularScore {
+ t.Errorf("Training data should have higher score: training=%.3f, regular=%.3f",
+ trainingScore, regularScore)
+ }
+}
+
+func TestMLCachePolicy_AccessPatternBoost(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ randomEntry := &CacheEntry{
+ Inode: 1,
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ Pattern: RandomAccess,
+ FileType: MLFileDataset,
+ }
+
+ sequentialEntry := &CacheEntry{
+ Inode: 2,
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ Pattern: SequentialAccess,
+ FileType: MLFileDataset,
+ }
+
+ modelAccessEntry := &CacheEntry{
+ Inode: 3,
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ Pattern: ModelAccess,
+ FileType: MLFileModel,
+ }
+
+ randomScore := policy.CalculateEvictionScore(randomEntry)
+ sequentialScore := policy.CalculateEvictionScore(sequentialEntry)
+ modelScore := policy.CalculateEvictionScore(modelAccessEntry)
+
+ if sequentialScore <= randomScore {
+ t.Errorf("Sequential access should have higher score than random: seq=%.3f, random=%.3f",
+ sequentialScore, randomScore)
+ }
+
+ if modelScore <= sequentialScore {
+ t.Errorf("Model access should have highest score: model=%.3f, seq=%.3f",
+ modelScore, sequentialScore)
+ }
+
+ t.Logf("Pattern comparison: random=%.3f, sequential=%.3f, model=%.3f",
+ randomScore, sequentialScore, modelScore)
+}
+
+func TestMLCachePolicy_SizePreference(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ smallEntry := &CacheEntry{
+ Inode: 1,
+ Size: 1024, // 1KB
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 3,
+ FileType: MLFileUnknown,
+ }
+
+ largeEntry := &CacheEntry{
+ Inode: 2,
+ Size: 50 * 1024 * 1024, // 50MB
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 3,
+ FileType: MLFileUnknown,
+ }
+
+ smallScore := policy.CalculateEvictionScore(smallEntry)
+ largeScore := policy.CalculateEvictionScore(largeEntry)
+
+ if smallScore <= largeScore {
+ t.Errorf("Small files should have higher score than large files: small=%.3f, large=%.3f",
+ smallScore, largeScore)
+ }
+}
+
+func TestMLCachePolicy_RecencyDecay(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Create entries with different access times
+ recentEntry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ FileType: MLFileUnknown,
+ }
+
+ oldEntry := &CacheEntry{
+ Inode: 2,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-20 * time.Minute),
+ AccessCount: 5,
+ FileType: MLFileUnknown,
+ }
+
+ recentScore := policy.CalculateEvictionScore(recentEntry)
+ oldScore := policy.CalculateEvictionScore(oldEntry)
+
+ if recentScore <= oldScore {
+ t.Errorf("Recent access should have higher score: recent=%.3f, old=%.3f",
+ recentScore, oldScore)
+ }
+}
+
+func TestMLCachePolicy_EpochRelevance(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ lowRelevanceEntry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ FileType: MLFileDataset,
+ EpochRelevance: 0.2,
+ }
+
+ highRelevanceEntry := &CacheEntry{
+ Inode: 2,
+
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ FileType: MLFileDataset,
+ EpochRelevance: 0.9,
+ }
+
+ lowScore := policy.CalculateEvictionScore(lowRelevanceEntry)
+ highScore := policy.CalculateEvictionScore(highRelevanceEntry)
+
+ if highScore <= lowScore {
+ t.Errorf("High epoch relevance should have higher score: high=%.3f, low=%.3f",
+ highScore, lowScore)
+ }
+}
+
+func TestMLCachePolicy_DifferentThresholds(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Create entries for different file types with same base score
+ unknownEntry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-15 * time.Minute), // Old enough to potentially evict
+ AccessCount: 2,
+ FileType: MLFileUnknown,
+ }
+
+ modelEntry := &CacheEntry{
+ Inode: 2,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-15 * time.Minute),
+ AccessCount: 2,
+ FileType: MLFileModel,
+ IsModel: true,
+ }
+
+ datasetEntry := &CacheEntry{
+ Inode: 3,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-15 * time.Minute),
+ AccessCount: 2,
+ FileType: MLFileDataset,
+ Pattern: SequentialAccess,
+ }
+
+ unknownShouldEvict := policy.ShouldEvict(unknownEntry)
+ modelShouldEvict := policy.ShouldEvict(modelEntry)
+ datasetShouldEvict := policy.ShouldEvict(datasetEntry)
+
+ // Models should be least likely to be evicted
+ if modelShouldEvict && (!unknownShouldEvict || !datasetShouldEvict) {
+ t.Error("Model files should be least likely to be evicted")
+ }
+
+ t.Logf("Eviction by type: unknown=%v, model=%v, dataset=%v",
+ unknownShouldEvict, modelShouldEvict, datasetShouldEvict)
+}
+
+func TestMLCachePolicy_SetWeights(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Test setting custom weights
+ policy.SetWeights(0.4, 0.3, 0.1, 0.2)
+
+ if policy.accessFrequencyWeight != 0.4 {
+ t.Errorf("Expected frequency weight 0.4, got %.2f", policy.accessFrequencyWeight)
+ }
+
+ if policy.recencyWeight != 0.3 {
+ t.Errorf("Expected recency weight 0.3, got %.2f", policy.recencyWeight)
+ }
+
+ if policy.sizeWeight != 0.1 {
+ t.Errorf("Expected size weight 0.1, got %.2f", policy.sizeWeight)
+ }
+
+ if policy.mlWeight != 0.2 {
+ t.Errorf("Expected ML weight 0.2, got %.2f", policy.mlWeight)
+ }
+
+ // Test weight normalization
+ policy.SetWeights(2.0, 2.0, 1.0, 1.0) // Total = 6.0
+
+ expectedFreq := 2.0 / 6.0
+ if abs(policy.accessFrequencyWeight - expectedFreq) > 0.001 {
+ t.Errorf("Expected normalized frequency weight %.3f, got %.3f",
+ expectedFreq, policy.accessFrequencyWeight)
+ }
+}
+
+func TestMLCachePolicy_SetMLBoosts(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Test setting custom boost factors
+ policy.SetMLBoosts(2.0, 3.0, 1.5, 1.8)
+
+ if policy.trainingDataBoost != 2.0 {
+ t.Errorf("Expected training data boost 2.0, got %.2f", policy.trainingDataBoost)
+ }
+
+ if policy.modelFileBoost != 3.0 {
+ t.Errorf("Expected model file boost 3.0, got %.2f", policy.modelFileBoost)
+ }
+
+ if policy.sequentialBoost != 1.5 {
+ t.Errorf("Expected sequential boost 1.5, got %.2f", policy.sequentialBoost)
+ }
+
+ if policy.epochRelevanceBoost != 1.8 {
+ t.Errorf("Expected epoch relevance boost 1.8, got %.2f", policy.epochRelevanceBoost)
+ }
+}
+
+func TestMLCachePolicy_Metrics(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Simulate some evictions
+ entries := []*CacheEntry{
+ {FileType: MLFileModel, IsModel: true},
+ {FileType: MLFileDataset, IsTrainingData: true},
+ {FileType: MLFileUnknown},
+ }
+
+ for _, entry := range entries {
+ entry.LastAccess = time.Now().Add(-30 * time.Minute) // Old enough to evict
+ entry.AccessCount = 1
+ entry.Size = 1024
+
+ if policy.ShouldEvict(entry) {
+ // Eviction counters are updated in ShouldEvict
+ }
+ }
+
+ metrics := policy.GetEvictionMetrics()
+
+ if metrics.TotalEvictions == 0 {
+ t.Error("Should have some total evictions")
+ }
+
+ // Verify weight configuration in metrics
+ if metrics.AccessFrequencyWeight != policy.accessFrequencyWeight {
+ t.Error("Metrics should reflect current weight configuration")
+ }
+}
+
+func TestMLCachePolicy_HotChunkPreference(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ coldEntry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ IsHot: false,
+ FileType: MLFileDataset,
+ }
+
+ hotEntry := &CacheEntry{
+ Inode: 2,
+
+ Size: 1024,
+ LastAccess: time.Now(),
+ AccessCount: 5,
+ IsHot: true,
+ FileType: MLFileDataset,
+ }
+
+ coldScore := policy.CalculateEvictionScore(coldEntry)
+ hotScore := policy.CalculateEvictionScore(hotEntry)
+
+ if hotScore <= coldScore {
+ t.Errorf("Hot chunk should have higher score: hot=%.3f, cold=%.3f", hotScore, coldScore)
+ }
+}
+
+func TestMLCachePolicy_RecencyThresholds(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ // Test hot threshold
+ hotEntry := &CacheEntry{
+ Inode: 1,
+ Size: 1024,
+ LastAccess: time.Now().Add(-30 * time.Second), // Within hot threshold
+ AccessCount: 1,
+ }
+
+ // Test cold threshold
+ coldEntry := &CacheEntry{
+ Inode: 2,
+ Size: 1024,
+ LastAccess: time.Now().Add(-15 * time.Minute), // Beyond cold threshold
+ AccessCount: 1,
+ }
+
+ // Test middle
+ middleEntry := &CacheEntry{
+ Inode: 3,
+ Size: 1024,
+ LastAccess: time.Now().Add(-5 * time.Minute), // Between thresholds
+ AccessCount: 1,
+ }
+
+ hotScore := policy.calculateRecencyScore(time.Since(hotEntry.LastAccess))
+ coldScore := policy.calculateRecencyScore(time.Since(coldEntry.LastAccess))
+ middleScore := policy.calculateRecencyScore(time.Since(middleEntry.LastAccess))
+
+ if hotScore != 1.0 {
+ t.Errorf("Hot entry should have score 1.0, got %.3f", hotScore)
+ }
+
+ if coldScore != 0.1 {
+ t.Errorf("Cold entry should have score 0.1, got %.3f", coldScore)
+ }
+
+ if middleScore <= coldScore || middleScore >= hotScore {
+ t.Errorf("Middle entry should have score between hot and cold: %.3f not in (%.3f, %.3f)",
+ middleScore, coldScore, hotScore)
+ }
+}
+
+func TestMLCachePolicy_SizeScore(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ smallSize := uint64(1024) // 1KB
+ largeSize := uint64(100 * 1024 * 1024) // 100MB
+
+ smallScore := policy.calculateSizeScore(smallSize)
+ largeScore := policy.calculateSizeScore(largeSize)
+
+ if smallScore <= largeScore {
+ t.Errorf("Small files should have higher size score: small=%.3f, large=%.3f",
+ smallScore, largeScore)
+ }
+
+ // Large files should still have reasonable score (not too low)
+ if largeScore < 0.2 {
+ t.Errorf("Large files should have reasonable score, got %.3f", largeScore)
+ }
+}
+
+func TestMLCachePolicy_AccessFrequencyScore(t *testing.T) {
+ policy := NewMLCachePolicy()
+
+ lowAccessEntry := &CacheEntry{
+ AccessCount: 1,
+ FileType: MLFileUnknown,
+ Pattern: RandomAccess,
+ }
+
+ highAccessEntry := &CacheEntry{
+ AccessCount: 100,
+ FileType: MLFileUnknown,
+ Pattern: RandomAccess,
+ }
+
+ lowScore := policy.calculateAccessFrequencyScore(lowAccessEntry)
+ highScore := policy.calculateAccessFrequencyScore(highAccessEntry)
+
+ if highScore <= lowScore {
+ t.Errorf("High access count should have higher score: high=%.3f, low=%.3f",
+ highScore, lowScore)
+ }
+}
+
+// Helper function
+func abs(x float64) float64 {
+ if x < 0 {
+ return -x
+ }
+ return x
+}
+
+// Benchmark tests
+
+func BenchmarkMLCachePolicy_CalculateEvictionScore(b *testing.B) {
+ policy := NewMLCachePolicy()
+
+ entry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 10,
+ FileType: MLFileDataset,
+ Pattern: SequentialAccess,
+ IsTrainingData: true,
+ EpochRelevance: 0.8,
+ }
+
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ policy.CalculateEvictionScore(entry)
+ }
+}
+
+func BenchmarkMLCachePolicy_ShouldEvict(b *testing.B) {
+ policy := NewMLCachePolicy()
+
+ entry := &CacheEntry{
+ Inode: 1,
+
+ Size: 1024,
+ LastAccess: time.Now().Add(-5 * time.Minute),
+ AccessCount: 10,
+ FileType: MLFileDataset,
+ }
+
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ policy.ShouldEvict(entry)
+ }
+}