aboutsummaryrefslogtreecommitdiff
path: root/weed/storage/erasure_coding
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-10-27 22:13:31 -0700
committerGitHub <noreply@github.com>2025-10-27 22:13:31 -0700
commit208d7f24f46fead096fd92432b90ebee7c406216 (patch)
tree823966d31ac8e4f1b32616ca3d2d09fdda402862 /weed/storage/erasure_coding
parentdecfb07eea83b125e9e09b74546c4f3611aaaf86 (diff)
downloadseaweedfs-208d7f24f46fead096fd92432b90ebee7c406216.tar.xz
seaweedfs-208d7f24f46fead096fd92432b90ebee7c406216.zip
Erasure Coding: Ec refactoring (#7396)
* refactor: add ECContext structure to encapsulate EC parameters - Create ec_context.go with ECContext struct - NewDefaultECContext() creates context with default 10+4 configuration - Helper methods: CreateEncoder(), ToExt(), String() - Foundation for cleaner function signatures - No behavior change, still uses hardcoded 10+4 * refactor: update ec_encoder.go to use ECContext - Add WriteEcFilesWithContext() and RebuildEcFilesWithContext() functions - Keep old functions for backward compatibility (call new versions) - Update all internal functions to accept ECContext parameter - Use ctx.DataShards, ctx.ParityShards, ctx.TotalShards consistently - Use ctx.CreateEncoder() instead of hardcoded reedsolomon.New() - Use ctx.ToExt() for shard file extensions - No behavior change, still uses default 10+4 configuration * refactor: update ec_volume.go to use ECContext - Add ECContext field to EcVolume struct - Initialize ECContext with default configuration in NewEcVolume() - Update LocateEcShardNeedleInterval() to use ECContext.DataShards - Phase 1: Always uses default 10+4 configuration - No behavior change * refactor: add EC shard count fields to VolumeInfo protobuf - Add data_shards_count field (field 8) to VolumeInfo message - Add parity_shards_count field (field 9) to VolumeInfo message - Fields are optional, 0 means use default (10+4) - Backward compatible: fields added at end - Phase 1: Foundation for future customization * refactor: regenerate protobuf Go files with EC shard count fields - Regenerated volume_server_pb/*.go with new EC fields - DataShardsCount and ParityShardsCount accessors added to VolumeInfo - No behavior change, fields not yet used * refactor: update VolumeEcShardsGenerate to use ECContext - Create ECContext with default configuration in VolumeEcShardsGenerate - Use ecCtx.TotalShards and ecCtx.ToExt() in cleanup - Call WriteEcFilesWithContext() instead of WriteEcFiles() - Save EC configuration (DataShardsCount, ParityShardsCount) to VolumeInfo - Log EC context being used - Phase 1: Always uses default 10+4 configuration - No behavior change * fmt * refactor: update ec_test.go to use ECContext - Update TestEncodingDecoding to create and use ECContext - Update validateFiles() to accept ECContext parameter - Update removeGeneratedFiles() to use ctx.TotalShards and ctx.ToExt() - Test passes with default 10+4 configuration * refactor: use EcShardConfig message instead of separate fields * optimize: pre-calculate row sizes in EC encoding loop * refactor: replace TotalShards field with Total() method - Remove TotalShards field from ECContext to avoid field drift - Add Total() method that computes DataShards + ParityShards - Update all references to use ctx.Total() instead of ctx.TotalShards - Read EC config from VolumeInfo when loading EC volumes - Read data shard count from .vif in VolumeEcShardsToVolume - Use >= instead of > for exact boundary handling in encoding loops * optimize: simplify VolumeEcShardsToVolume to use existing EC context - Remove redundant CollectEcShards call - Remove redundant .vif file loading - Use v.ECContext.DataShards directly (already loaded by NewEcVolume) - Slice tempShards instead of collecting again * refactor: rename MaxShardId to MaxShardCount for clarity - Change from MaxShardId=31 to MaxShardCount=32 - Eliminates confusing +1 arithmetic (MaxShardId+1) - More intuitive: MaxShardCount directly represents the limit fix: support custom EC ratios beyond 14 shards in VolumeEcShardsToVolume - Add MaxShardId constant (31, since ShardBits is uint32) - Use MaxShardId+1 (32) instead of TotalShardsCount (14) for tempShards buffer - Prevents panic when slicing for volumes with >14 total shards - Critical fix for custom EC configurations like 20+10 * fix: add validation for EC shard counts from VolumeInfo - Validate DataShards/ParityShards are positive and within MaxShardCount - Prevent zero or invalid values that could cause divide-by-zero - Fallback to defaults if validation fails, with warning log - VolumeEcShardsGenerate now preserves existing EC config when regenerating - Critical safety fix for corrupted or legacy .vif files * fix: RebuildEcFiles now loads EC config from .vif file - Critical: RebuildEcFiles was always using default 10+4 config - Now loads actual EC config from .vif file when rebuilding shards - Validates config before use (positive shards, within MaxShardCount) - Falls back to default if .vif missing or invalid - Prevents data corruption when rebuilding custom EC volumes * add: defensive validation for dataShards in VolumeEcShardsToVolume - Validate dataShards > 0 and <= MaxShardCount before use - Prevents panic from corrupted or uninitialized ECContext - Returns clear error message instead of panic - Defense-in-depth: validates even though upstream should catch issues * fix: replace TotalShardsCount with MaxShardCount for custom EC ratio support Critical fixes to support custom EC ratios > 14 shards: disk_location_ec.go: - validateEcVolume: Check shards 0-31 instead of 0-13 during validation - removeEcVolumeFiles: Remove shards 0-31 instead of 0-13 during cleanup ec_volume_info.go ShardBits methods: - ShardIds(): Iterate up to MaxShardCount (32) instead of TotalShardsCount (14) - ToUint32Slice(): Iterate up to MaxShardCount (32) - IndexToShardId(): Iterate up to MaxShardCount (32) - MinusParityShards(): Remove shards 10-31 instead of 10-13 (added note about Phase 2) - Minus() shard size copy: Iterate up to MaxShardCount (32) - resizeShardSizes(): Iterate up to MaxShardCount (32) Without these changes: - Custom EC ratios > 14 total shards would fail validation on startup - Shards 14-31 would never be discovered or cleaned up - ShardBits operations would miss shards >= 14 These changes are backward compatible - MaxShardCount (32) includes the default TotalShardsCount (14), so existing 10+4 volumes work as before. * fix: replace TotalShardsCount with MaxShardCount in critical data structures Critical fixes for buffer allocations and loops that must support custom EC ratios up to 32 shards: Data Structures: - store_ec.go:354: Buffer allocation for shard recovery (bufs array) - topology_ec.go:14: EcShardLocations.Locations fixed array size - command_ec_rebuild.go:268: EC shard map allocation - command_ec_common.go:626: Shard-to-locations map allocation Shard Discovery Loops: - ec_task.go:378: Loop to find generated shard files - ec_shard_management.go: All 8 loops that check/count EC shards These changes are critical because: 1. Buffer allocations sized to 14 would cause index-out-of-bounds panics when accessing shards 14-31 2. Fixed arrays sized to 14 would truncate shard location data 3. Loops limited to 0-13 would never discover/manage shards 14-31 Note: command_ec_encode.go:208 intentionally NOT changed - it creates shard IDs to mount after encoding. In Phase 1 we always generate 14 shards, so this remains TotalShardsCount and will be made dynamic in Phase 2 based on actual EC context. Without these fixes, custom EC ratios > 14 total shards would cause: - Runtime panics (array index out of bounds) - Data loss (shards 14-31 never discovered/tracked) - Incomplete shard management (missing shards not detected) * refactor: move MaxShardCount constant to ec_encoder.go Moved MaxShardCount from ec_volume_info.go to ec_encoder.go to group it with other shard count constants (DataShardsCount, ParityShardsCount, TotalShardsCount). This improves code organization and makes it easier to understand the relationship between these constants. Location: ec_encoder.go line 22, between TotalShardsCount and MinTotalDisks * improve: add defensive programming and better error messages for EC Code review improvements from CodeRabbit: 1. ShardBits Guardrails (ec_volume_info.go): - AddShardId, RemoveShardId: Reject shard IDs >= MaxShardCount - HasShardId: Return false for out-of-range shard IDs - Prevents silent no-ops from bit shifts with invalid IDs 2. Future-Proof Regex (disk_location_ec.go): - Updated regex from \.ec[0-9][0-9] to \.ec\d{2,3} - Now matches .ec00 through .ec999 (currently .ec00-.ec31 used) - Supports future increases to MaxShardCount beyond 99 3. Better Error Messages (volume_grpc_erasure_coding.go): - Include valid range (1..32) in dataShards validation error - Helps operators quickly identify the problem 4. Validation Before Save (volume_grpc_erasure_coding.go): - Validate ECContext (DataShards > 0, ParityShards > 0, Total <= MaxShardCount) - Log EC config being saved to .vif for debugging - Prevents writing invalid configs to disk These changes improve robustness and debuggability without changing core functionality. * fmt * fix: critical bugs from code review + clean up comments Critical bug fixes: 1. command_ec_rebuild.go: Fixed indentation causing compilation error - Properly nested if/for blocks in registerEcNode 2. ec_shard_management.go: Fixed isComplete logic incorrectly using MaxShardCount - Changed from MaxShardCount (32) back to TotalShardsCount (14) - Default 10+4 volumes were being incorrectly reported as incomplete - Missing shards 14-31 were being incorrectly reported as missing - Fixed in 4 locations: volume completeness checks and getMissingShards 3. ec_volume_info.go: Fixed MinusParityShards removing too many shards - Changed from MaxShardCount (32) back to TotalShardsCount (14) - Was incorrectly removing shard IDs 10-31 instead of just 10-13 Comment cleanup: - Removed Phase 1/Phase 2 references (development plan context) - Replaced with clear statements about default 10+4 configuration - SeaweedFS repo uses fixed 10+4 EC ratio, no phases needed Root cause: Over-aggressive replacement of TotalShardsCount with MaxShardCount. MaxShardCount (32) is the limit for buffer allocations and shard ID loops, but TotalShardsCount (14) must be used for default EC configuration logic. * fix: add defensive bounds checks and compute actual shard counts Critical fixes from code review: 1. topology_ec.go: Add defensive bounds checks to AddShard/DeleteShard - Prevent panic when shardId >= MaxShardCount (32) - Return false instead of crashing on out-of-range shard IDs 2. command_ec_common.go: Fix doBalanceEcShardsAcrossRacks - Was using hardcoded TotalShardsCount (14) for all volumes - Now computes actual totalShardsForVolume from rackToShardCount - Fixes incorrect rebalancing for volumes with custom EC ratios - Example: 5+2=7 shards would incorrectly use 14 as average These fixes improve robustness and prepare for future custom EC ratios without changing current behavior for default 10+4 volumes. Note: MinusParityShards and ec_task.go intentionally NOT changed for seaweedfs repo - these will be enhanced in seaweed-enterprise repo where custom EC ratio configuration is added. * fmt * style: make MaxShardCount type casting explicit in loops Improved code clarity by explicitly casting MaxShardCount to the appropriate type when used in loop comparisons: - ShardId comparisons: Cast to ShardId(MaxShardCount) - uint32 comparisons: Cast to uint32(MaxShardCount) Changed in 5 locations: - Minus() loop (line 90) - ShardIds() loop (line 143) - ToUint32Slice() loop (line 152) - IndexToShardId() loop (line 219) - resizeShardSizes() loop (line 248) This makes the intent explicit and improves type safety readability. No functional changes - purely a style improvement.
Diffstat (limited to 'weed/storage/erasure_coding')
-rw-r--r--weed/storage/erasure_coding/ec_context.go46
-rw-r--r--weed/storage/erasure_coding/ec_encoder.go113
-rw-r--r--weed/storage/erasure_coding/ec_test.go19
-rw-r--r--weed/storage/erasure_coding/ec_volume.go28
-rw-r--r--weed/storage/erasure_coding/ec_volume_info.go21
5 files changed, 175 insertions, 52 deletions
diff --git a/weed/storage/erasure_coding/ec_context.go b/weed/storage/erasure_coding/ec_context.go
new file mode 100644
index 000000000..770fe41af
--- /dev/null
+++ b/weed/storage/erasure_coding/ec_context.go
@@ -0,0 +1,46 @@
+package erasure_coding
+
+import (
+ "fmt"
+
+ "github.com/klauspost/reedsolomon"
+ "github.com/seaweedfs/seaweedfs/weed/storage/needle"
+)
+
+// ECContext encapsulates erasure coding parameters for encoding/decoding operations
+type ECContext struct {
+ DataShards int
+ ParityShards int
+ Collection string
+ VolumeId needle.VolumeId
+}
+
+// Total returns the total number of shards (data + parity)
+func (ctx *ECContext) Total() int {
+ return ctx.DataShards + ctx.ParityShards
+}
+
+// NewDefaultECContext creates a context with default 10+4 shard configuration
+func NewDefaultECContext(collection string, volumeId needle.VolumeId) *ECContext {
+ return &ECContext{
+ DataShards: DataShardsCount,
+ ParityShards: ParityShardsCount,
+ Collection: collection,
+ VolumeId: volumeId,
+ }
+}
+
+// CreateEncoder creates a Reed-Solomon encoder for this context
+func (ctx *ECContext) CreateEncoder() (reedsolomon.Encoder, error) {
+ return reedsolomon.New(ctx.DataShards, ctx.ParityShards)
+}
+
+// ToExt returns the file extension for a given shard index
+func (ctx *ECContext) ToExt(shardIndex int) string {
+ return fmt.Sprintf(".ec%02d", shardIndex)
+}
+
+// String returns a human-readable representation of the EC configuration
+func (ctx *ECContext) String() string {
+ return fmt.Sprintf("%d+%d (total: %d)", ctx.DataShards, ctx.ParityShards, ctx.Total())
+}
diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go
index eeeb156e6..81ebffdcb 100644
--- a/weed/storage/erasure_coding/ec_encoder.go
+++ b/weed/storage/erasure_coding/ec_encoder.go
@@ -11,6 +11,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/storage/idx"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
+ "github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
"github.com/seaweedfs/seaweedfs/weed/util"
)
@@ -18,6 +19,7 @@ const (
DataShardsCount = 10
ParityShardsCount = 4
TotalShardsCount = DataShardsCount + ParityShardsCount
+ MaxShardCount = 32 // Maximum number of shards since ShardBits is uint32 (bits 0-31)
MinTotalDisks = TotalShardsCount/ParityShardsCount + 1
ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
@@ -54,20 +56,53 @@ func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) {
return nil
}
-// WriteEcFiles generates .ec00 ~ .ec13 files
+// WriteEcFiles generates .ec00 ~ .ec13 files using default EC context
func WriteEcFiles(baseFileName string) error {
- return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
+ ctx := NewDefaultECContext("", 0)
+ return WriteEcFilesWithContext(baseFileName, ctx)
+}
+
+// WriteEcFilesWithContext generates EC files using the provided context
+func WriteEcFilesWithContext(baseFileName string, ctx *ECContext) error {
+ return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx)
}
func RebuildEcFiles(baseFileName string) ([]uint32, error) {
- return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize)
+ // Attempt to load EC config from .vif file to preserve original configuration
+ var ctx *ECContext
+ if volumeInfo, _, found, _ := volume_info.MaybeLoadVolumeInfo(baseFileName + ".vif"); found && volumeInfo.EcShardConfig != nil {
+ ds := int(volumeInfo.EcShardConfig.DataShards)
+ ps := int(volumeInfo.EcShardConfig.ParityShards)
+
+ // Validate EC config before using it
+ if ds > 0 && ps > 0 && ds+ps <= MaxShardCount {
+ ctx = &ECContext{
+ DataShards: ds,
+ ParityShards: ps,
+ }
+ glog.V(0).Infof("Rebuilding EC files for %s with config from .vif: %s", baseFileName, ctx.String())
+ } else {
+ glog.Warningf("Invalid EC config in .vif for %s (data=%d, parity=%d), using default", baseFileName, ds, ps)
+ ctx = NewDefaultECContext("", 0)
+ }
+ } else {
+ glog.V(0).Infof("Rebuilding EC files for %s with default config", baseFileName)
+ ctx = NewDefaultECContext("", 0)
+ }
+
+ return RebuildEcFilesWithContext(baseFileName, ctx)
+}
+
+// RebuildEcFilesWithContext rebuilds missing EC files using the provided context
+func RebuildEcFilesWithContext(baseFileName string, ctx *ECContext) ([]uint32, error) {
+ return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx)
}
func ToExt(ecIndex int) string {
return fmt.Sprintf(".ec%02d", ecIndex)
}
-func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
+func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext) error {
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed to open dat file: %w", err)
@@ -79,21 +114,21 @@ func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64,
return fmt.Errorf("failed to stat dat file: %w", err)
}
- glog.V(0).Infof("encodeDatFile %s.dat size:%d", baseFileName, fi.Size())
- err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
+ glog.V(0).Infof("encodeDatFile %s.dat size:%d with EC context %s", baseFileName, fi.Size(), ctx.String())
+ err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize, ctx)
if err != nil {
return fmt.Errorf("encodeDatFile: %w", err)
}
return nil
}
-func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) {
+func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext) (generatedShardIds []uint32, err error) {
- shardHasData := make([]bool, TotalShardsCount)
- inputFiles := make([]*os.File, TotalShardsCount)
- outputFiles := make([]*os.File, TotalShardsCount)
- for shardId := 0; shardId < TotalShardsCount; shardId++ {
- shardFileName := baseFileName + ToExt(shardId)
+ shardHasData := make([]bool, ctx.Total())
+ inputFiles := make([]*os.File, ctx.Total())
+ outputFiles := make([]*os.File, ctx.Total())
+ for shardId := 0; shardId < ctx.Total(); shardId++ {
+ shardFileName := baseFileName + ctx.ToExt(shardId)
if util.FileExists(shardFileName) {
shardHasData[shardId] = true
inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0)
@@ -111,14 +146,14 @@ func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize
}
}
- err = rebuildEcFiles(shardHasData, inputFiles, outputFiles)
+ err = rebuildEcFiles(shardHasData, inputFiles, outputFiles, ctx)
if err != nil {
return nil, fmt.Errorf("rebuildEcFiles: %w", err)
}
return
}
-func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
+func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext) error {
bufferSize := int64(len(buffers[0]))
if bufferSize == 0 {
@@ -131,7 +166,7 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
}
for b := int64(0); b < batchCount; b++ {
- err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
+ err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs, ctx)
if err != nil {
return err
}
@@ -140,9 +175,9 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
return nil
}
-func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) {
- for i := 0; i < TotalShardsCount; i++ {
- fname := baseFileName + ToExt(i)
+func openEcFiles(baseFileName string, forRead bool, ctx *ECContext) (files []*os.File, err error) {
+ for i := 0; i < ctx.Total(); i++ {
+ fname := baseFileName + ctx.ToExt(i)
openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
if forRead {
openOption = os.O_RDONLY
@@ -164,10 +199,10 @@ func closeEcFiles(files []*os.File) {
}
}
-func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
+func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext) error {
// read data into buffers
- for i := 0; i < DataShardsCount; i++ {
+ for i := 0; i < ctx.DataShards; i++ {
n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
if err != nil {
if err != io.EOF {
@@ -186,7 +221,7 @@ func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blo
return err
}
- for i := 0; i < TotalShardsCount; i++ {
+ for i := 0; i < ctx.Total(); i++ {
_, err := outputs[i].Write(buffers[i])
if err != nil {
return err
@@ -196,53 +231,57 @@ func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blo
return nil
}
-func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
+func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64, ctx *ECContext) error {
var processedSize int64
- enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
+ enc, err := ctx.CreateEncoder()
if err != nil {
return fmt.Errorf("failed to create encoder: %w", err)
}
- buffers := make([][]byte, TotalShardsCount)
+ buffers := make([][]byte, ctx.Total())
for i := range buffers {
buffers[i] = make([]byte, bufferSize)
}
- outputs, err := openEcFiles(baseFileName, false)
+ outputs, err := openEcFiles(baseFileName, false, ctx)
defer closeEcFiles(outputs)
if err != nil {
return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
}
- for remainingSize > largeBlockSize*DataShardsCount {
- err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
+ // Pre-calculate row sizes to avoid redundant calculations in loops
+ largeRowSize := largeBlockSize * int64(ctx.DataShards)
+ smallRowSize := smallBlockSize * int64(ctx.DataShards)
+
+ for remainingSize >= largeRowSize {
+ err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs, ctx)
if err != nil {
return fmt.Errorf("failed to encode large chunk data: %w", err)
}
- remainingSize -= largeBlockSize * DataShardsCount
- processedSize += largeBlockSize * DataShardsCount
+ remainingSize -= largeRowSize
+ processedSize += largeRowSize
}
for remainingSize > 0 {
- err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
+ err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs, ctx)
if err != nil {
return fmt.Errorf("failed to encode small chunk data: %w", err)
}
- remainingSize -= smallBlockSize * DataShardsCount
- processedSize += smallBlockSize * DataShardsCount
+ remainingSize -= smallRowSize
+ processedSize += smallRowSize
}
return nil
}
-func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error {
+func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File, ctx *ECContext) error {
- enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
+ enc, err := ctx.CreateEncoder()
if err != nil {
return fmt.Errorf("failed to create encoder: %w", err)
}
- buffers := make([][]byte, TotalShardsCount)
+ buffers := make([][]byte, ctx.Total())
for i := range buffers {
if shardHasData[i] {
buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
@@ -254,7 +293,7 @@ func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*o
for {
// read the input data from files
- for i := 0; i < TotalShardsCount; i++ {
+ for i := 0; i < ctx.Total(); i++ {
if shardHasData[i] {
n, _ := inputFiles[i].ReadAt(buffers[i], startOffset)
if n == 0 {
@@ -278,7 +317,7 @@ func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*o
}
// write the data to output files
- for i := 0; i < TotalShardsCount; i++ {
+ for i := 0; i < ctx.Total(); i++ {
if !shardHasData[i] {
n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset)
if inputBufferDataSize != n {
diff --git a/weed/storage/erasure_coding/ec_test.go b/weed/storage/erasure_coding/ec_test.go
index b1cc9c441..cbb20832c 100644
--- a/weed/storage/erasure_coding/ec_test.go
+++ b/weed/storage/erasure_coding/ec_test.go
@@ -23,7 +23,10 @@ func TestEncodingDecoding(t *testing.T) {
bufferSize := 50
baseFileName := "1"
- err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
+ // Create default EC context for testing
+ ctx := NewDefaultECContext("", 0)
+
+ err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize, ctx)
if err != nil {
t.Logf("generateEcFiles: %v", err)
}
@@ -33,16 +36,16 @@ func TestEncodingDecoding(t *testing.T) {
t.Logf("WriteSortedFileFromIdx: %v", err)
}
- err = validateFiles(baseFileName)
+ err = validateFiles(baseFileName, ctx)
if err != nil {
t.Logf("WriteSortedFileFromIdx: %v", err)
}
- removeGeneratedFiles(baseFileName)
+ removeGeneratedFiles(baseFileName, ctx)
}
-func validateFiles(baseFileName string) error {
+func validateFiles(baseFileName string, ctx *ECContext) error {
nm, err := readNeedleMap(baseFileName)
if err != nil {
return fmt.Errorf("readNeedleMap: %v", err)
@@ -60,7 +63,7 @@ func validateFiles(baseFileName string) error {
return fmt.Errorf("failed to stat dat file: %v", err)
}
- ecFiles, err := openEcFiles(baseFileName, true)
+ ecFiles, err := openEcFiles(baseFileName, true, ctx)
if err != nil {
return fmt.Errorf("error opening ec files: %w", err)
}
@@ -184,9 +187,9 @@ func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) {
return
}
-func removeGeneratedFiles(baseFileName string) {
- for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
- fname := fmt.Sprintf("%s.ec%02d", baseFileName, i)
+func removeGeneratedFiles(baseFileName string, ctx *ECContext) {
+ for i := 0; i < ctx.Total(); i++ {
+ fname := baseFileName + ctx.ToExt(i)
os.Remove(fname)
}
os.Remove(baseFileName + ".ecx")
diff --git a/weed/storage/erasure_coding/ec_volume.go b/weed/storage/erasure_coding/ec_volume.go
index 839428e7b..3e323163e 100644
--- a/weed/storage/erasure_coding/ec_volume.go
+++ b/weed/storage/erasure_coding/ec_volume.go
@@ -41,7 +41,8 @@ type EcVolume struct {
ecjFileAccessLock sync.Mutex
diskType types.DiskType
datFileSize int64
- ExpireAtSec uint64 //ec volume destroy time, calculated from the ec volume was created
+ ExpireAtSec uint64 //ec volume destroy time, calculated from the ec volume was created
+ ECContext *ECContext // EC encoding parameters
}
func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection string, vid needle.VolumeId) (ev *EcVolume, err error) {
@@ -73,9 +74,32 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
ev.Version = needle.Version(volumeInfo.Version)
ev.datFileSize = volumeInfo.DatFileSize
ev.ExpireAtSec = volumeInfo.ExpireAtSec
+
+ // Initialize EC context from .vif if present; fallback to defaults
+ if volumeInfo.EcShardConfig != nil {
+ ds := int(volumeInfo.EcShardConfig.DataShards)
+ ps := int(volumeInfo.EcShardConfig.ParityShards)
+
+ // Validate shard counts to prevent zero or invalid values
+ if ds <= 0 || ps <= 0 || ds+ps > MaxShardCount {
+ glog.Warningf("Invalid EC config in VolumeInfo for volume %d (data=%d, parity=%d), using defaults", vid, ds, ps)
+ ev.ECContext = NewDefaultECContext(collection, vid)
+ } else {
+ ev.ECContext = &ECContext{
+ Collection: collection,
+ VolumeId: vid,
+ DataShards: ds,
+ ParityShards: ps,
+ }
+ glog.V(1).Infof("Loaded EC config from VolumeInfo for volume %d: %s", vid, ev.ECContext.String())
+ }
+ } else {
+ ev.ECContext = NewDefaultECContext(collection, vid)
+ }
} else {
glog.Warningf("vif file not found,volumeId:%d, filename:%s", vid, dataBaseFileName)
volume_info.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)})
+ ev.ECContext = NewDefaultECContext(collection, vid)
}
ev.ShardLocations = make(map[ShardId][]pb.ServerAddress)
@@ -260,7 +284,7 @@ func (ev *EcVolume) LocateEcShardNeedleInterval(version needle.Version, offset i
if ev.datFileSize > 0 {
// To get the correct LargeBlockRowsCount
// use datFileSize to calculate the shardSize to match the EC encoding logic.
- shardSize = ev.datFileSize / DataShardsCount
+ shardSize = ev.datFileSize / int64(ev.ECContext.DataShards)
}
// calculate the locations in the ec shards
intervals = LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, shardSize, offset, types.Size(needle.GetActualSize(size, version)))
diff --git a/weed/storage/erasure_coding/ec_volume_info.go b/weed/storage/erasure_coding/ec_volume_info.go
index 53b352168..4d34ccbde 100644
--- a/weed/storage/erasure_coding/ec_volume_info.go
+++ b/weed/storage/erasure_coding/ec_volume_info.go
@@ -87,7 +87,7 @@ func (ecInfo *EcVolumeInfo) Minus(other *EcVolumeInfo) *EcVolumeInfo {
// Copy shard sizes for remaining shards
retIndex := 0
- for shardId := ShardId(0); shardId < TotalShardsCount && retIndex < len(ret.ShardSizes); shardId++ {
+ for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && retIndex < len(ret.ShardSizes); shardId++ {
if ret.ShardBits.HasShardId(shardId) {
if size, exists := ecInfo.GetShardSize(shardId); exists {
ret.ShardSizes[retIndex] = size
@@ -119,19 +119,28 @@ func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.
type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension
func (b ShardBits) AddShardId(id ShardId) ShardBits {
+ if id >= MaxShardCount {
+ return b // Reject out-of-range shard IDs
+ }
return b | (1 << id)
}
func (b ShardBits) RemoveShardId(id ShardId) ShardBits {
+ if id >= MaxShardCount {
+ return b // Reject out-of-range shard IDs
+ }
return b &^ (1 << id)
}
func (b ShardBits) HasShardId(id ShardId) bool {
+ if id >= MaxShardCount {
+ return false // Out-of-range shard IDs are never present
+ }
return b&(1<<id) > 0
}
func (b ShardBits) ShardIds() (ret []ShardId) {
- for i := ShardId(0); i < TotalShardsCount; i++ {
+ for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
ret = append(ret, i)
}
@@ -140,7 +149,7 @@ func (b ShardBits) ShardIds() (ret []ShardId) {
}
func (b ShardBits) ToUint32Slice() (ret []uint32) {
- for i := uint32(0); i < TotalShardsCount; i++ {
+ for i := uint32(0); i < uint32(MaxShardCount); i++ {
if b.HasShardId(ShardId(i)) {
ret = append(ret, i)
}
@@ -164,6 +173,8 @@ func (b ShardBits) Plus(other ShardBits) ShardBits {
}
func (b ShardBits) MinusParityShards() ShardBits {
+ // Removes parity shards from the bit mask
+ // Assumes default 10+4 EC layout where parity shards are IDs 10-13
for i := DataShardsCount; i < TotalShardsCount; i++ {
b = b.RemoveShardId(ShardId(i))
}
@@ -205,7 +216,7 @@ func (b ShardBits) IndexToShardId(index int) (shardId ShardId, found bool) {
}
currentIndex := 0
- for i := ShardId(0); i < TotalShardsCount; i++ {
+ for i := ShardId(0); i < ShardId(MaxShardCount); i++ {
if b.HasShardId(i) {
if currentIndex == index {
return i, true
@@ -234,7 +245,7 @@ func (ecInfo *EcVolumeInfo) resizeShardSizes(prevShardBits ShardBits) {
// Copy existing sizes to new positions based on current ShardBits
if len(ecInfo.ShardSizes) > 0 {
newIndex := 0
- for shardId := ShardId(0); shardId < TotalShardsCount && newIndex < expectedLength; shardId++ {
+ for shardId := ShardId(0); shardId < ShardId(MaxShardCount) && newIndex < expectedLength; shardId++ {
if ecInfo.ShardBits.HasShardId(shardId) {
// Try to find the size for this shard in the old array using previous ShardBits
if oldIndex, found := prevShardBits.ShardIdToIndex(shardId); found && oldIndex < len(ecInfo.ShardSizes) {