aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chris.lu@gmail.com>2025-10-26 21:14:10 -0700
committerchrislu <chris.lu@gmail.com>2025-10-26 21:14:10 -0700
commita348381a5178c61e48d15cabacdaccefe5d4cb22 (patch)
tree1bfcb609dd0db888b049959d4373c93a29fc52f3
parent859a30bbd8e5d71b8f2e357912455315f531dc36 (diff)
downloadseaweedfs-a348381a5178c61e48d15cabacdaccefe5d4cb22.tar.xz
seaweedfs-a348381a5178c61e48d15cabacdaccefe5d4cb22.zip
refactor
-rw-r--r--weed/storage/disk_location_ec.go75
1 files changed, 40 insertions, 35 deletions
diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go
index 23fec42b6..34092ed50 100644
--- a/weed/storage/disk_location_ec.go
+++ b/weed/storage/disk_location_ec.go
@@ -220,41 +220,7 @@ func (l *DiskLocation) loadAllEcShards() (err error) {
}
if ext == ".ecx" && volumeId == prevVolumeId && collection == prevCollection {
- // Check if this is an incomplete EC encoding (not a distributed EC volume)
- // Key distinction: if .dat file still exists, EC encoding may have failed
- // If .dat file is gone, this is likely a distributed EC volume with shards on multiple servers
- baseFileName := erasure_coding.EcShardFileName(collection, l.Directory, int(volumeId))
- datFileName := baseFileName + ".dat"
-
- // Determine .dat presence robustly; unexpected errors are treated as "exists"
- datExists := l.checkDatFileExists(datFileName)
-
- // Validate EC volume if .dat file exists (incomplete EC encoding scenario)
- // This checks shard count, shard size consistency, and expected size vs .dat file
- // If .dat is gone, EC encoding completed and shards are distributed across servers
- if datExists && !l.validateEcVolume(collection, volumeId) {
- glog.Warningf("Incomplete or invalid EC volume %d: .dat exists but validation failed, cleaning up EC files...", volumeId)
- l.removeEcVolumeFiles(collection, volumeId)
- reset()
- continue
- }
-
- if err = l.loadEcShards(sameVolumeShards, collection, volumeId); err != nil {
- // If EC shards failed to load and .dat still exists, clean up EC files to allow .dat file to be used
- // If .dat is gone, log error but don't clean up (may be waiting for shards from other servers)
- if datExists {
- glog.Warningf("Failed to load EC shards for volume %d and .dat exists: %v, cleaning up EC files to use .dat...", volumeId, err)
- // Unload first to release FDs, then remove files
- l.unloadEcVolume(volumeId)
- l.removeEcVolumeFiles(collection, volumeId)
- } else {
- glog.Warningf("Failed to load EC shards for volume %d: %v (this may be normal for distributed EC volumes)", volumeId, err)
- // Clean up any partially loaded in-memory state. This does not delete files.
- l.unloadEcVolume(volumeId)
- }
- reset()
- continue
- }
+ l.handleFoundEcxFile(sameVolumeShards, collection, volumeId)
reset()
continue
}
@@ -307,6 +273,45 @@ func (l *DiskLocation) EcShardCount() int {
return shardCount
}
+// handleFoundEcxFile processes a complete group of EC shards when their .ecx file is found.
+// This includes validation, loading, and cleanup of incomplete/invalid EC volumes.
+func (l *DiskLocation) handleFoundEcxFile(shards []string, collection string, volumeId needle.VolumeId) {
+ // Check if this is an incomplete EC encoding (not a distributed EC volume)
+ // Key distinction: if .dat file still exists, EC encoding may have failed
+ // If .dat file is gone, this is likely a distributed EC volume with shards on multiple servers
+ baseFileName := erasure_coding.EcShardFileName(collection, l.Directory, int(volumeId))
+ datFileName := baseFileName + ".dat"
+
+ // Determine .dat presence robustly; unexpected errors are treated as "exists"
+ datExists := l.checkDatFileExists(datFileName)
+
+ // Validate EC volume if .dat file exists (incomplete EC encoding scenario)
+ // This checks shard count, shard size consistency, and expected size vs .dat file
+ // If .dat is gone, EC encoding completed and shards are distributed across servers
+ if datExists && !l.validateEcVolume(collection, volumeId) {
+ glog.Warningf("Incomplete or invalid EC volume %d: .dat exists but validation failed, cleaning up EC files...", volumeId)
+ l.removeEcVolumeFiles(collection, volumeId)
+ return
+ }
+
+ // Attempt to load the EC shards
+ if err := l.loadEcShards(shards, collection, volumeId); err != nil {
+ // If EC shards failed to load and .dat still exists, clean up EC files to allow .dat file to be used
+ // If .dat is gone, log error but don't clean up (may be waiting for shards from other servers)
+ if datExists {
+ glog.Warningf("Failed to load EC shards for volume %d and .dat exists: %v, cleaning up EC files to use .dat...", volumeId, err)
+ // Unload first to release FDs, then remove files
+ l.unloadEcVolume(volumeId)
+ l.removeEcVolumeFiles(collection, volumeId)
+ } else {
+ glog.Warningf("Failed to load EC shards for volume %d: %v (this may be normal for distributed EC volumes)", volumeId, err)
+ // Clean up any partially loaded in-memory state. This does not delete files.
+ l.unloadEcVolume(volumeId)
+ }
+ return
+ }
+}
+
// checkDatFileExists checks if .dat file exists with robust error handling.
// Unexpected errors (permission, I/O) are treated as "exists" to avoid misclassifying
// local EC as distributed EC, which is the safer fallback.