diff options
Diffstat (limited to 'weed/mount/ml/cache_policy_test.go')
| -rw-r--r-- | weed/mount/ml/cache_policy_test.go | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/weed/mount/ml/cache_policy_test.go b/weed/mount/ml/cache_policy_test.go new file mode 100644 index 000000000..29df5b859 --- /dev/null +++ b/weed/mount/ml/cache_policy_test.go @@ -0,0 +1,549 @@ +package ml + +import ( + "testing" + "time" +) + +func TestMLCachePolicy_Basic(t *testing.T) { + policy := NewMLCachePolicy() + + // Test basic eviction score calculation + entry := &CacheEntry{ + Inode: 1, + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + CacheLevel: 0, + Pattern: RandomAccess, + FileType: MLFileUnknown, + IsHot: false, + } + + score := policy.CalculateEvictionScore(entry) + if score <= 0 { + t.Error("Eviction score should be positive") + } + + shouldEvict := policy.ShouldEvict(entry) + t.Logf("Basic entry eviction: score=%.3f, shouldEvict=%v", score, shouldEvict) +} + +func TestMLCachePolicy_ModelFileBoost(t *testing.T) { + policy := NewMLCachePolicy() + + // Create two identical entries, one is a model file + baseEntry := &CacheEntry{ + Inode: 1, + Size: 10 * 1024 * 1024, // 10MB + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 3, + CacheLevel: 0, + Pattern: SequentialAccess, + FileType: MLFileUnknown, + IsModel: false, + } + + modelEntry := &CacheEntry{ + Inode: 2, + Size: 10 * 1024 * 1024, // 10MB + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 3, + CacheLevel: 0, + Pattern: SequentialAccess, + FileType: MLFileModel, + IsModel: true, + } + + baseScore := policy.CalculateEvictionScore(baseEntry) + modelScore := policy.CalculateEvictionScore(modelEntry) + + if modelScore <= baseScore { + t.Errorf("Model file should have higher score than regular file: model=%.3f, base=%.3f", + modelScore, baseScore) + } + + // Model files should be less likely to be evicted + baseShouldEvict := policy.ShouldEvict(baseEntry) + modelShouldEvict := policy.ShouldEvict(modelEntry) + + if modelShouldEvict && !baseShouldEvict { + t.Error("Model file should not be evicted if regular file is not evicted") + } + + t.Logf("Model vs Base eviction: model=%.3f (evict=%v), base=%.3f (evict=%v)", + modelScore, modelShouldEvict, baseScore, baseShouldEvict) +} + +func TestMLCachePolicy_TrainingDataBoost(t *testing.T) { + policy := NewMLCachePolicy() + + regularEntry := &CacheEntry{ + Inode: 1, + Size: 1024, + LastAccess: time.Now().Add(-2 * time.Minute), + AccessCount: 10, + FileType: MLFileUnknown, + IsTrainingData: false, + } + + trainingEntry := &CacheEntry{ + Inode: 2, + Size: 1024, + LastAccess: time.Now().Add(-2 * time.Minute), + AccessCount: 10, + FileType: MLFileDataset, + IsTrainingData: true, + } + + regularScore := policy.CalculateEvictionScore(regularEntry) + trainingScore := policy.CalculateEvictionScore(trainingEntry) + + if trainingScore <= regularScore { + t.Errorf("Training data should have higher score: training=%.3f, regular=%.3f", + trainingScore, regularScore) + } +} + +func TestMLCachePolicy_AccessPatternBoost(t *testing.T) { + policy := NewMLCachePolicy() + + randomEntry := &CacheEntry{ + Inode: 1, + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + Pattern: RandomAccess, + FileType: MLFileDataset, + } + + sequentialEntry := &CacheEntry{ + Inode: 2, + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + Pattern: SequentialAccess, + FileType: MLFileDataset, + } + + modelAccessEntry := &CacheEntry{ + Inode: 3, + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + Pattern: ModelAccess, + FileType: MLFileModel, + } + + randomScore := policy.CalculateEvictionScore(randomEntry) + sequentialScore := policy.CalculateEvictionScore(sequentialEntry) + modelScore := policy.CalculateEvictionScore(modelAccessEntry) + + if sequentialScore <= randomScore { + t.Errorf("Sequential access should have higher score than random: seq=%.3f, random=%.3f", + sequentialScore, randomScore) + } + + if modelScore <= sequentialScore { + t.Errorf("Model access should have highest score: model=%.3f, seq=%.3f", + modelScore, sequentialScore) + } + + t.Logf("Pattern comparison: random=%.3f, sequential=%.3f, model=%.3f", + randomScore, sequentialScore, modelScore) +} + +func TestMLCachePolicy_SizePreference(t *testing.T) { + policy := NewMLCachePolicy() + + smallEntry := &CacheEntry{ + Inode: 1, + Size: 1024, // 1KB + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 3, + FileType: MLFileUnknown, + } + + largeEntry := &CacheEntry{ + Inode: 2, + Size: 50 * 1024 * 1024, // 50MB + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 3, + FileType: MLFileUnknown, + } + + smallScore := policy.CalculateEvictionScore(smallEntry) + largeScore := policy.CalculateEvictionScore(largeEntry) + + if smallScore <= largeScore { + t.Errorf("Small files should have higher score than large files: small=%.3f, large=%.3f", + smallScore, largeScore) + } +} + +func TestMLCachePolicy_RecencyDecay(t *testing.T) { + policy := NewMLCachePolicy() + + // Create entries with different access times + recentEntry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + FileType: MLFileUnknown, + } + + oldEntry := &CacheEntry{ + Inode: 2, + + Size: 1024, + LastAccess: time.Now().Add(-20 * time.Minute), + AccessCount: 5, + FileType: MLFileUnknown, + } + + recentScore := policy.CalculateEvictionScore(recentEntry) + oldScore := policy.CalculateEvictionScore(oldEntry) + + if recentScore <= oldScore { + t.Errorf("Recent access should have higher score: recent=%.3f, old=%.3f", + recentScore, oldScore) + } +} + +func TestMLCachePolicy_EpochRelevance(t *testing.T) { + policy := NewMLCachePolicy() + + lowRelevanceEntry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + FileType: MLFileDataset, + EpochRelevance: 0.2, + } + + highRelevanceEntry := &CacheEntry{ + Inode: 2, + + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + FileType: MLFileDataset, + EpochRelevance: 0.9, + } + + lowScore := policy.CalculateEvictionScore(lowRelevanceEntry) + highScore := policy.CalculateEvictionScore(highRelevanceEntry) + + if highScore <= lowScore { + t.Errorf("High epoch relevance should have higher score: high=%.3f, low=%.3f", + highScore, lowScore) + } +} + +func TestMLCachePolicy_DifferentThresholds(t *testing.T) { + policy := NewMLCachePolicy() + + // Create entries for different file types with same base score + unknownEntry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now().Add(-15 * time.Minute), // Old enough to potentially evict + AccessCount: 2, + FileType: MLFileUnknown, + } + + modelEntry := &CacheEntry{ + Inode: 2, + + Size: 1024, + LastAccess: time.Now().Add(-15 * time.Minute), + AccessCount: 2, + FileType: MLFileModel, + IsModel: true, + } + + datasetEntry := &CacheEntry{ + Inode: 3, + + Size: 1024, + LastAccess: time.Now().Add(-15 * time.Minute), + AccessCount: 2, + FileType: MLFileDataset, + Pattern: SequentialAccess, + } + + unknownShouldEvict := policy.ShouldEvict(unknownEntry) + modelShouldEvict := policy.ShouldEvict(modelEntry) + datasetShouldEvict := policy.ShouldEvict(datasetEntry) + + // Models should be least likely to be evicted + if modelShouldEvict && (!unknownShouldEvict || !datasetShouldEvict) { + t.Error("Model files should be least likely to be evicted") + } + + t.Logf("Eviction by type: unknown=%v, model=%v, dataset=%v", + unknownShouldEvict, modelShouldEvict, datasetShouldEvict) +} + +func TestMLCachePolicy_SetWeights(t *testing.T) { + policy := NewMLCachePolicy() + + // Test setting custom weights + policy.SetWeights(0.4, 0.3, 0.1, 0.2) + + if policy.accessFrequencyWeight != 0.4 { + t.Errorf("Expected frequency weight 0.4, got %.2f", policy.accessFrequencyWeight) + } + + if policy.recencyWeight != 0.3 { + t.Errorf("Expected recency weight 0.3, got %.2f", policy.recencyWeight) + } + + if policy.sizeWeight != 0.1 { + t.Errorf("Expected size weight 0.1, got %.2f", policy.sizeWeight) + } + + if policy.mlWeight != 0.2 { + t.Errorf("Expected ML weight 0.2, got %.2f", policy.mlWeight) + } + + // Test weight normalization + policy.SetWeights(2.0, 2.0, 1.0, 1.0) // Total = 6.0 + + expectedFreq := 2.0 / 6.0 + if abs(policy.accessFrequencyWeight - expectedFreq) > 0.001 { + t.Errorf("Expected normalized frequency weight %.3f, got %.3f", + expectedFreq, policy.accessFrequencyWeight) + } +} + +func TestMLCachePolicy_SetMLBoosts(t *testing.T) { + policy := NewMLCachePolicy() + + // Test setting custom boost factors + policy.SetMLBoosts(2.0, 3.0, 1.5, 1.8) + + if policy.trainingDataBoost != 2.0 { + t.Errorf("Expected training data boost 2.0, got %.2f", policy.trainingDataBoost) + } + + if policy.modelFileBoost != 3.0 { + t.Errorf("Expected model file boost 3.0, got %.2f", policy.modelFileBoost) + } + + if policy.sequentialBoost != 1.5 { + t.Errorf("Expected sequential boost 1.5, got %.2f", policy.sequentialBoost) + } + + if policy.epochRelevanceBoost != 1.8 { + t.Errorf("Expected epoch relevance boost 1.8, got %.2f", policy.epochRelevanceBoost) + } +} + +func TestMLCachePolicy_Metrics(t *testing.T) { + policy := NewMLCachePolicy() + + // Simulate some evictions + entries := []*CacheEntry{ + {FileType: MLFileModel, IsModel: true}, + {FileType: MLFileDataset, IsTrainingData: true}, + {FileType: MLFileUnknown}, + } + + for _, entry := range entries { + entry.LastAccess = time.Now().Add(-30 * time.Minute) // Old enough to evict + entry.AccessCount = 1 + entry.Size = 1024 + + if policy.ShouldEvict(entry) { + // Eviction counters are updated in ShouldEvict + } + } + + metrics := policy.GetEvictionMetrics() + + if metrics.TotalEvictions == 0 { + t.Error("Should have some total evictions") + } + + // Verify weight configuration in metrics + if metrics.AccessFrequencyWeight != policy.accessFrequencyWeight { + t.Error("Metrics should reflect current weight configuration") + } +} + +func TestMLCachePolicy_HotChunkPreference(t *testing.T) { + policy := NewMLCachePolicy() + + coldEntry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + IsHot: false, + FileType: MLFileDataset, + } + + hotEntry := &CacheEntry{ + Inode: 2, + + Size: 1024, + LastAccess: time.Now(), + AccessCount: 5, + IsHot: true, + FileType: MLFileDataset, + } + + coldScore := policy.CalculateEvictionScore(coldEntry) + hotScore := policy.CalculateEvictionScore(hotEntry) + + if hotScore <= coldScore { + t.Errorf("Hot chunk should have higher score: hot=%.3f, cold=%.3f", hotScore, coldScore) + } +} + +func TestMLCachePolicy_RecencyThresholds(t *testing.T) { + policy := NewMLCachePolicy() + + // Test hot threshold + hotEntry := &CacheEntry{ + Inode: 1, + Size: 1024, + LastAccess: time.Now().Add(-30 * time.Second), // Within hot threshold + AccessCount: 1, + } + + // Test cold threshold + coldEntry := &CacheEntry{ + Inode: 2, + Size: 1024, + LastAccess: time.Now().Add(-15 * time.Minute), // Beyond cold threshold + AccessCount: 1, + } + + // Test middle + middleEntry := &CacheEntry{ + Inode: 3, + Size: 1024, + LastAccess: time.Now().Add(-5 * time.Minute), // Between thresholds + AccessCount: 1, + } + + hotScore := policy.calculateRecencyScore(time.Since(hotEntry.LastAccess)) + coldScore := policy.calculateRecencyScore(time.Since(coldEntry.LastAccess)) + middleScore := policy.calculateRecencyScore(time.Since(middleEntry.LastAccess)) + + if hotScore != 1.0 { + t.Errorf("Hot entry should have score 1.0, got %.3f", hotScore) + } + + if coldScore != 0.1 { + t.Errorf("Cold entry should have score 0.1, got %.3f", coldScore) + } + + if middleScore <= coldScore || middleScore >= hotScore { + t.Errorf("Middle entry should have score between hot and cold: %.3f not in (%.3f, %.3f)", + middleScore, coldScore, hotScore) + } +} + +func TestMLCachePolicy_SizeScore(t *testing.T) { + policy := NewMLCachePolicy() + + smallSize := uint64(1024) // 1KB + largeSize := uint64(100 * 1024 * 1024) // 100MB + + smallScore := policy.calculateSizeScore(smallSize) + largeScore := policy.calculateSizeScore(largeSize) + + if smallScore <= largeScore { + t.Errorf("Small files should have higher size score: small=%.3f, large=%.3f", + smallScore, largeScore) + } + + // Large files should still have reasonable score (not too low) + if largeScore < 0.2 { + t.Errorf("Large files should have reasonable score, got %.3f", largeScore) + } +} + +func TestMLCachePolicy_AccessFrequencyScore(t *testing.T) { + policy := NewMLCachePolicy() + + lowAccessEntry := &CacheEntry{ + AccessCount: 1, + FileType: MLFileUnknown, + Pattern: RandomAccess, + } + + highAccessEntry := &CacheEntry{ + AccessCount: 100, + FileType: MLFileUnknown, + Pattern: RandomAccess, + } + + lowScore := policy.calculateAccessFrequencyScore(lowAccessEntry) + highScore := policy.calculateAccessFrequencyScore(highAccessEntry) + + if highScore <= lowScore { + t.Errorf("High access count should have higher score: high=%.3f, low=%.3f", + highScore, lowScore) + } +} + +// Helper function +func abs(x float64) float64 { + if x < 0 { + return -x + } + return x +} + +// Benchmark tests + +func BenchmarkMLCachePolicy_CalculateEvictionScore(b *testing.B) { + policy := NewMLCachePolicy() + + entry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 10, + FileType: MLFileDataset, + Pattern: SequentialAccess, + IsTrainingData: true, + EpochRelevance: 0.8, + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + policy.CalculateEvictionScore(entry) + } +} + +func BenchmarkMLCachePolicy_ShouldEvict(b *testing.B) { + policy := NewMLCachePolicy() + + entry := &CacheEntry{ + Inode: 1, + + Size: 1024, + LastAccess: time.Now().Add(-5 * time.Minute), + AccessCount: 10, + FileType: MLFileDataset, + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + policy.ShouldEvict(entry) + } +} |
