aboutsummaryrefslogtreecommitdiff
path: root/weed/storage/disk_location.go
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2025-10-26 22:48:58 -0700
committerGitHub <noreply@github.com>2025-10-26 22:48:58 -0700
commit0813138d578fd63b928ec1afc9ddabb191657743 (patch)
tree19d92182af2c5946bdafd84ea7f679b9d43c8af2 /weed/storage/disk_location.go
parent824dcac3bf5b75fd4b74bf83d7b08895422d4374 (diff)
downloadseaweedfs-0813138d578fd63b928ec1afc9ddabb191657743.tar.xz
seaweedfs-0813138d578fd63b928ec1afc9ddabb191657743.zip
Volume Server: handle incomplete ec encoding (#7384)
* handle incomplete ec encoding * unit tests * simplify, and better logs * Update disk_location_ec.go When loadEcShards() fails partway through, some EC shards may already be loaded into the l.ecVolumes map in memory. The previous code only cleaned up filesystem files but left orphaned in-memory state, which could cause memory leaks and inconsistent state. * address comments * Performance: Avoid Double os.Stat() Call * Platform Compatibility: Use filepath.Join * in memory cleanup * Update disk_location_ec.go * refactor * Added Shard Size Validation * check ec shard sizes * validate shard size * calculate expected shard size * refactoring * minor * fix shard directory * 10GB sparse files can be slow or fail on non-sparse FS. Use 10MB to hit SmallBlockSize math (1MB shards) deterministically. * grouping logic should be updated to use both collection and volumeId to ensure correctness * unexpected error * handle exceptions in tests; use constants * The check for orphaned shards should be performed for the previous volume before resetting sameVolumeShards for the new volume. * address comments * Eliminated Redundant Parsing in checkOrphanedShards * minor * Avoid misclassifying local EC as distributed when .dat stat errors occur; also standardize unload-before-remove. * fmt * refactor * refactor * adjust to warning
Diffstat (limited to 'weed/storage/disk_location.go')
-rw-r--r--weed/storage/disk_location.go29
1 files changed, 19 insertions, 10 deletions
diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go
index aac824318..e8c1d10e4 100644
--- a/weed/storage/disk_location.go
+++ b/weed/storage/disk_location.go
@@ -144,10 +144,26 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
return false
}
- // skip if ec volumes exists
+ // parse out collection, volume id (moved up to use in EC validation)
+ vid, collection, err := volumeIdFromFileName(basename)
+ if err != nil {
+ glog.Warningf("get volume id failed, %s, err : %s", volumeName, err)
+ return false
+ }
+
+ // skip if ec volumes exists, but validate EC files first
if skipIfEcVolumesExists {
- if util.FileExists(l.IdxDirectory + "/" + volumeName + ".ecx") {
- return false
+ ecxFilePath := filepath.Join(l.IdxDirectory, volumeName+".ecx")
+ if util.FileExists(ecxFilePath) {
+ // Check if EC volume is valid by verifying shard count
+ if !l.validateEcVolume(collection, vid) {
+ glog.Warningf("EC volume %d validation failed, removing incomplete EC files to allow .dat file loading", vid)
+ l.removeEcVolumeFiles(collection, vid)
+ // Continue to load .dat file
+ } else {
+ // Valid EC volume exists, skip .dat file
+ return false
+ }
}
}
@@ -161,13 +177,6 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
return false
}
- // parse out collection, volume id
- vid, collection, err := volumeIdFromFileName(basename)
- if err != nil {
- glog.Warningf("get volume id failed, %s, err : %s", volumeName, err)
- return false
- }
-
// avoid loading one volume more than once
l.volumesLock.RLock()
_, found := l.volumes[vid]