aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chris.lu@gmail.com>2025-10-26 13:48:11 -0700
committerchrislu <chris.lu@gmail.com>2025-10-26 13:48:11 -0700
commit7b6f364e7f52424b782f41d19291355c17e4d924 (patch)
tree31d93cff8bb572c0ace222fa6c824bd6f3183337
parent96caa7610189f581657c02a1948ee6dec15bc61b (diff)
downloadseaweedfs-7b6f364e7f52424b782f41d19291355c17e4d924.tar.xz
seaweedfs-7b6f364e7f52424b782f41d19291355c17e4d924.zip
calculate expected shard size
-rw-r--r--weed/storage/disk_location_ec.go55
-rw-r--r--weed/storage/disk_location_ec_realworld_test.go197
-rw-r--r--weed/storage/disk_location_ec_shard_size_test.go118
-rw-r--r--weed/storage/disk_location_ec_test.go31
4 files changed, 373 insertions, 28 deletions
diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go
index 0d39a4223..2859f7683 100644
--- a/weed/storage/disk_location_ec.go
+++ b/weed/storage/disk_location_ec.go
@@ -307,6 +307,30 @@ func (l *DiskLocation) EcShardCount() int {
return shardCount
}
+// calculateExpectedShardSize computes the exact expected shard size based on .dat file size
+// The EC encoding process is deterministic:
+// 1. Data is processed in batches of (LargeBlockSize * DataShardsCount) for large blocks
+// 2. Remaining data is processed in batches of (SmallBlockSize * DataShardsCount) for small blocks
+// 3. Each shard gets exactly its portion, with zero-padding applied to incomplete blocks
+func calculateExpectedShardSize(datFileSize int64) int64 {
+ var shardSize int64
+
+ // Process large blocks (1GB * 10 = 10GB batches)
+ largeBatchSize := int64(erasure_coding.ErasureCodingLargeBlockSize) * int64(erasure_coding.DataShardsCount)
+ numLargeBatches := datFileSize / largeBatchSize
+ shardSize = numLargeBatches * int64(erasure_coding.ErasureCodingLargeBlockSize)
+ remainingSize := datFileSize - (numLargeBatches * largeBatchSize)
+
+ // Process remaining data in small blocks (1MB * 10 = 10MB batches)
+ if remainingSize > 0 {
+ smallBatchSize := int64(erasure_coding.ErasureCodingSmallBlockSize) * int64(erasure_coding.DataShardsCount)
+ numSmallBatches := (remainingSize + smallBatchSize - 1) / smallBatchSize // Ceiling division
+ shardSize += numSmallBatches * int64(erasure_coding.ErasureCodingSmallBlockSize)
+ }
+
+ return shardSize
+}
+
// validateEcVolume checks if EC volume has enough shards to be functional
// For distributed EC volumes (where .dat is deleted), any number of shards is valid
// For incomplete EC encoding (where .dat still exists), we need at least DataShardsCount shards
@@ -319,21 +343,29 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
var expectedShardSize int64 = -1
datExists := false
- // If .dat file exists, compute expected shard size from it
+ // If .dat file exists, compute exact expected shard size from it
if datFileInfo, err := os.Stat(datFileName); err == nil {
datExists = true
- // Each shard should be approximately datFileSize / DataShardsCount (10 data shards)
- // Note: Due to block alignment and padding, actual shard size may be slightly larger
- expectedShardSize = datFileInfo.Size() / erasure_coding.DataShardsCount
+ expectedShardSize = calculateExpectedShardSize(datFileInfo.Size())
}
shardCount := 0
var actualShardSize int64 = -1
// Count shards and validate they all have the same size (required for Reed-Solomon EC)
+ // Check both l.Directory (where shards normally are) and l.IdxDirectory (in case of manual moves)
for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+ // Check in primary directory (l.Directory)
shardFileName := baseFileName + erasure_coding.ToExt(i)
- if fi, err := os.Stat(shardFileName); err == nil {
+ fi, err := os.Stat(shardFileName)
+
+ // If not found in primary directory and IdxDirectory is different, check there too
+ if err != nil && l.Directory != l.IdxDirectory {
+ indexShardFileName := erasure_coding.EcShardFileName(collection, l.IdxDirectory, int(vid)) + erasure_coding.ToExt(i)
+ fi, err = os.Stat(indexShardFileName)
+ }
+
+ if err == nil {
// Check if file has non-zero size
if fi.Size() > 0 {
// Validate all shards are the same size (required for Reed-Solomon EC)
@@ -351,14 +383,11 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
}
}
- // If .dat file exists, validate shard size is reasonable compared to expected size
- // Due to block alignment and padding in EC encoding, actual shard size can be slightly larger
- // We allow up to SmallBlockSize (1MB) of padding per shard for block alignment
- if datExists && actualShardSize > 0 {
- maxExpectedSize := expectedShardSize + erasure_coding.ErasureCodingSmallBlockSize
- if actualShardSize < expectedShardSize || actualShardSize > maxExpectedSize {
- glog.V(0).Infof("EC volume %d: shard size %d outside expected range [%d, %d] (based on .dat file size with padding)",
- vid, actualShardSize, expectedShardSize, maxExpectedSize)
+ // If .dat file exists, validate shard size matches expected size
+ if datExists && actualShardSize > 0 && expectedShardSize > 0 {
+ if actualShardSize != expectedShardSize {
+ glog.V(0).Infof("EC volume %d: shard size %d doesn't match expected size %d (based on .dat file size)",
+ vid, actualShardSize, expectedShardSize)
return false
}
}
diff --git a/weed/storage/disk_location_ec_realworld_test.go b/weed/storage/disk_location_ec_realworld_test.go
new file mode 100644
index 000000000..7ebe16829
--- /dev/null
+++ b/weed/storage/disk_location_ec_realworld_test.go
@@ -0,0 +1,197 @@
+package storage
+
+import (
+ "os"
+ "testing"
+
+ "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
+)
+
+// TestCalculateExpectedShardSizeWithRealEncoding validates our shard size calculation
+// by actually running EC encoding on real files and comparing the results
+func TestCalculateExpectedShardSizeWithRealEncoding(t *testing.T) {
+ tempDir := t.TempDir()
+
+ tests := []struct {
+ name string
+ datFileSize int64
+ description string
+ }{
+ {
+ name: "5MB file",
+ datFileSize: 5 * 1024 * 1024,
+ description: "Small file that needs 1 small block per shard",
+ },
+ {
+ name: "10MB file (exactly 10 small blocks)",
+ datFileSize: 10 * 1024 * 1024,
+ description: "Exactly fits in 1MB small blocks",
+ },
+ {
+ name: "15MB file",
+ datFileSize: 15 * 1024 * 1024,
+ description: "Requires 2 small blocks per shard",
+ },
+ {
+ name: "50MB file",
+ datFileSize: 50 * 1024 * 1024,
+ description: "Requires 5 small blocks per shard",
+ },
+ {
+ name: "100MB file",
+ datFileSize: 100 * 1024 * 1024,
+ description: "Requires 10 small blocks per shard",
+ },
+ {
+ name: "512MB file",
+ datFileSize: 512 * 1024 * 1024,
+ description: "Requires 52 small blocks per shard (rounded up)",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create a test .dat file with the specified size
+ baseFileName := tempDir + "/test_volume"
+ datFileName := baseFileName + ".dat"
+
+ // Create .dat file with random data pattern (so it's compressible but realistic)
+ datFile, err := os.Create(datFileName)
+ if err != nil {
+ t.Fatalf("Failed to create .dat file: %v", err)
+ }
+
+ // Write some pattern data (not all zeros, to be more realistic)
+ pattern := make([]byte, 4096)
+ for i := range pattern {
+ pattern[i] = byte(i % 256)
+ }
+
+ written := int64(0)
+ for written < tt.datFileSize {
+ toWrite := tt.datFileSize - written
+ if toWrite > int64(len(pattern)) {
+ toWrite = int64(len(pattern))
+ }
+ n, err := datFile.Write(pattern[:toWrite])
+ if err != nil {
+ t.Fatalf("Failed to write to .dat file: %v", err)
+ }
+ written += int64(n)
+ }
+ datFile.Close()
+
+ // Calculate expected shard size using our function
+ expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+ // Run actual EC encoding
+ err = erasure_coding.WriteEcFiles(baseFileName)
+ if err != nil {
+ t.Fatalf("Failed to encode EC files: %v", err)
+ }
+
+ // Measure actual shard sizes
+ for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+ shardFileName := baseFileName + erasure_coding.ToExt(i)
+ shardInfo, err := os.Stat(shardFileName)
+ if err != nil {
+ t.Fatalf("Failed to stat shard file %s: %v", shardFileName, err)
+ }
+
+ actualShardSize := shardInfo.Size()
+
+ // Verify actual size matches expected size
+ if actualShardSize != expectedShardSize {
+ t.Errorf("Shard %d size mismatch:\n"+
+ " .dat file size: %d bytes\n"+
+ " Expected shard size: %d bytes\n"+
+ " Actual shard size: %d bytes\n"+
+ " Difference: %d bytes\n"+
+ " %s",
+ i, tt.datFileSize, expectedShardSize, actualShardSize,
+ actualShardSize-expectedShardSize, tt.description)
+ }
+ }
+
+ // If we got here, all shards match!
+ t.Logf("✓ SUCCESS: .dat size %d → actual shard size %d matches calculated size (%s)",
+ tt.datFileSize, expectedShardSize, tt.description)
+
+ // Cleanup
+ os.Remove(datFileName)
+ for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+ os.Remove(baseFileName + erasure_coding.ToExt(i))
+ }
+ })
+ }
+}
+
+// TestCalculateExpectedShardSizeEdgeCases tests edge cases with real encoding
+func TestCalculateExpectedShardSizeEdgeCases(t *testing.T) {
+ tempDir := t.TempDir()
+
+ tests := []struct {
+ name string
+ datFileSize int64
+ }{
+ {"1 byte file", 1},
+ {"1KB file", 1024},
+ {"10KB file", 10 * 1024},
+ {"1MB file (1 small block)", 1024 * 1024},
+ {"1MB + 1 byte", 1024*1024 + 1},
+ {"9.9MB (almost 1 small block per shard)", 9*1024*1024 + 900*1024},
+ {"10.1MB (just over 1 small block per shard)", 10*1024*1024 + 100*1024},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ baseFileName := tempDir + "/" + tt.name
+ datFileName := baseFileName + ".dat"
+
+ // Create .dat file
+ datFile, err := os.Create(datFileName)
+ if err != nil {
+ t.Fatalf("Failed to create .dat file: %v", err)
+ }
+
+ // Write exactly the specified number of bytes
+ data := make([]byte, tt.datFileSize)
+ for i := range data {
+ data[i] = byte(i % 256)
+ }
+ datFile.Write(data)
+ datFile.Close()
+
+ // Calculate expected
+ expectedShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+ // Run actual EC encoding
+ err = erasure_coding.WriteEcFiles(baseFileName)
+ if err != nil {
+ t.Fatalf("Failed to encode EC files: %v", err)
+ }
+
+ // Check first shard (all should be same size)
+ shardFileName := baseFileName + erasure_coding.ToExt(0)
+ shardInfo, err := os.Stat(shardFileName)
+ if err != nil {
+ t.Fatalf("Failed to stat shard file: %v", err)
+ }
+
+ actualShardSize := shardInfo.Size()
+
+ if actualShardSize != expectedShardSize {
+ t.Errorf("File size %d: expected shard %d, got %d (diff: %d)",
+ tt.datFileSize, expectedShardSize, actualShardSize, actualShardSize-expectedShardSize)
+ } else {
+ t.Logf("✓ File size %d → shard size %d (correct)", tt.datFileSize, actualShardSize)
+ }
+
+ // Cleanup
+ os.Remove(datFileName)
+ for i := 0; i < erasure_coding.TotalShardsCount; i++ {
+ os.Remove(baseFileName + erasure_coding.ToExt(i))
+ }
+ })
+ }
+}
diff --git a/weed/storage/disk_location_ec_shard_size_test.go b/weed/storage/disk_location_ec_shard_size_test.go
new file mode 100644
index 000000000..061783736
--- /dev/null
+++ b/weed/storage/disk_location_ec_shard_size_test.go
@@ -0,0 +1,118 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestCalculateExpectedShardSize(t *testing.T) {
+ tests := []struct {
+ name string
+ datFileSize int64
+ expectedShardSize int64
+ description string
+ }{
+ {
+ name: "Exact 10GB (1 large batch)",
+ datFileSize: 10 * 1024 * 1024 * 1024, // 10GB = 1 large batch
+ expectedShardSize: 1 * 1024 * 1024 * 1024, // 1GB per shard
+ description: "Exactly fits in large blocks",
+ },
+ {
+ name: "11GB (1 large batch + 103 small blocks)",
+ datFileSize: 11 * 1024 * 1024 * 1024, // 11GB
+ expectedShardSize: 1*1024*1024*1024 + 103*1024*1024, // 1GB + 103MB (103 small blocks for 1GB remaining)
+ description: "1GB large + 1GB remaining needs 103 small blocks",
+ },
+ {
+ name: "5MB (requires 1 small block per shard)",
+ datFileSize: 5 * 1024 * 1024, // 5MB
+ expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (rounded up)
+ description: "Small file rounds up to 1MB per shard",
+ },
+ {
+ name: "15MB (requires 2 small blocks per shard)",
+ datFileSize: 15 * 1024 * 1024, // 15MB
+ expectedShardSize: 2 * 1024 * 1024, // 2MB per shard
+ description: "15MB needs 2 small blocks per shard",
+ },
+ {
+ name: "1KB (minimum size)",
+ datFileSize: 1024,
+ expectedShardSize: 1 * 1024 * 1024, // 1MB per shard (1 small block)
+ description: "Tiny file needs 1 small block",
+ },
+ {
+ name: "10.5GB (mixed)",
+ datFileSize: 10*1024*1024*1024 + 512*1024*1024, // 10.5GB
+ expectedShardSize: 1*1024*1024*1024 + 52*1024*1024, // 1GB + 52MB (52 small blocks for 512MB remaining)
+ description: "1GB large + 512MB remaining needs 52 small blocks",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ actualShardSize := calculateExpectedShardSize(tt.datFileSize)
+
+ if actualShardSize != tt.expectedShardSize {
+ t.Errorf("Expected shard size %d, got %d. %s",
+ tt.expectedShardSize, actualShardSize, tt.description)
+ }
+
+ t.Logf("✓ File size: %d → Shard size: %d (%s)",
+ tt.datFileSize, actualShardSize, tt.description)
+ })
+ }
+}
+
+// TestShardSizeValidationScenarios tests realistic scenarios
+func TestShardSizeValidationScenarios(t *testing.T) {
+ scenarios := []struct {
+ name string
+ datFileSize int64
+ actualShardSize int64
+ shouldBeValid bool
+ }{
+ {
+ name: "Valid: exact match for 10GB",
+ datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
+ actualShardSize: 1 * 1024 * 1024 * 1024, // 1GB (exact)
+ shouldBeValid: true,
+ },
+ {
+ name: "Invalid: 1 byte too small",
+ datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
+ actualShardSize: 1*1024*1024*1024 - 1, // 1GB - 1 byte
+ shouldBeValid: false,
+ },
+ {
+ name: "Invalid: 1 byte too large",
+ datFileSize: 10 * 1024 * 1024 * 1024, // 10GB
+ actualShardSize: 1*1024*1024*1024 + 1, // 1GB + 1 byte
+ shouldBeValid: false,
+ },
+ {
+ name: "Valid: small file exact match",
+ datFileSize: 5 * 1024 * 1024, // 5MB
+ actualShardSize: 1 * 1024 * 1024, // 1MB (exact)
+ shouldBeValid: true,
+ },
+ {
+ name: "Invalid: wrong size for small file",
+ datFileSize: 5 * 1024 * 1024, // 5MB
+ actualShardSize: 500 * 1024, // 500KB (too small)
+ shouldBeValid: false,
+ },
+ }
+
+ for _, scenario := range scenarios {
+ t.Run(scenario.name, func(t *testing.T) {
+ expectedSize := calculateExpectedShardSize(scenario.datFileSize)
+ isValid := scenario.actualShardSize == expectedSize
+
+ if isValid != scenario.shouldBeValid {
+ t.Errorf("Expected validation result %v, got %v. Actual shard: %d, Expected: %d",
+ scenario.shouldBeValid, isValid, scenario.actualShardSize, expectedSize)
+ }
+ })
+ }
+}
diff --git a/weed/storage/disk_location_ec_test.go b/weed/storage/disk_location_ec_test.go
index f5ff8b281..874e73da6 100644
--- a/weed/storage/disk_location_ec_test.go
+++ b/weed/storage/disk_location_ec_test.go
@@ -113,18 +113,17 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
// Setup test files
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
- // Standard shard size for test cases
- shardSize := 1024 // 1KB per shard
+ // Use deterministic sizes that match EC encoding
+ datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
+ expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
- // .dat file size should be DataShardsCount * shard size (10 shards)
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
- datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
- datFile.Write(datData)
+ datFile.Truncate(datFileSize)
datFile.Close()
}
@@ -134,8 +133,7 @@ func TestIncompleteEcEncodingCleanup(t *testing.T) {
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
- shardData := make([]byte, shardSize)
- shardFile.Write(shardData)
+ shardFile.Truncate(expectedShardSize)
shardFile.Close()
}
@@ -276,29 +274,32 @@ func TestValidateEcVolume(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
baseFileName := erasure_coding.EcShardFileName(tt.collection, tempDir, int(tt.volumeId))
- // Standard shard size for normal test cases
- shardSize := 1024 // 1KB per shard
+ // For proper testing, we need to use realistic sizes that match EC encoding
+ // EC uses large blocks (1GB) and small blocks (1MB)
+ // For test purposes, use a .dat file size that results in expected shard sizes
+ // 10GB .dat file = 1GB per shard (1 large block)
+ datFileSize := int64(10 * 1024 * 1024 * 1024) // 10GB
+ expectedShardSize := calculateExpectedShardSize(datFileSize)
// Create .dat file if needed
- // .dat file size should be DataShardsCount * shard size (10 shards)
if tt.createDatFile {
datFile, err := os.Create(baseFileName + ".dat")
if err != nil {
t.Fatalf("Failed to create .dat file: %v", err)
}
- datData := make([]byte, erasure_coding.DataShardsCount*shardSize)
- datFile.Write(datData)
+ // Write minimal data (don't need to fill entire 10GB for tests)
+ datFile.Truncate(datFileSize)
datFile.Close()
}
- // Create EC shard files
+ // Create EC shard files with correct size
for i := 0; i < tt.numShards; i++ {
shardFile, err := os.Create(baseFileName + erasure_coding.ToExt(i))
if err != nil {
t.Fatalf("Failed to create shard file: %v", err)
}
- shardData := make([]byte, shardSize)
- shardFile.Write(shardData)
+ // Use truncate to create file of correct size without allocating all the space
+ shardFile.Truncate(expectedShardSize)
shardFile.Close()
}