diff options
| author | chrislu <chris.lu@gmail.com> | 2025-10-26 20:28:47 -0700 |
|---|---|---|
| committer | chrislu <chris.lu@gmail.com> | 2025-10-26 20:28:47 -0700 |
| commit | 40f82aa806198976aa1a13c5eef3a05b73988457 (patch) | |
| tree | 13cfa62ec9449377a962562200b1292d5a356d6e | |
| parent | b5a6fa0cdf80b67df372d4fe73eff71a86b8f202 (diff) | |
| download | seaweedfs-40f82aa806198976aa1a13c5eef3a05b73988457.tar.xz seaweedfs-40f82aa806198976aa1a13c5eef3a05b73988457.zip | |
Avoid misclassifying local EC as distributed when .dat stat errors occur; also standardize unload-before-remove.
| -rw-r--r-- | weed/storage/disk_location_ec.go | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go index 0e57d791b..21bafeee3 100644 --- a/weed/storage/disk_location_ec.go +++ b/weed/storage/disk_location_ec.go @@ -13,7 +13,6 @@ import ( "github.com/seaweedfs/seaweedfs/weed/glog" "github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding" "github.com/seaweedfs/seaweedfs/weed/storage/needle" - "github.com/seaweedfs/seaweedfs/weed/util" ) var ( @@ -224,7 +223,15 @@ func (l *DiskLocation) loadAllEcShards() (err error) { prevCollection = "" } - datExists := util.FileExists(datFileName) + // Determine .dat presence robustly; unexpected errors are treated as "exists" + datExists := false + if _, err := os.Stat(datFileName); err == nil { + datExists = true + } else if !os.IsNotExist(err) { + glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err) + // Safer to assume local .dat exists to avoid misclassifying as distributed EC + datExists = true + } // Validate EC volume if .dat file exists (incomplete EC encoding scenario) // This checks shard count, shard size consistency, and expected size vs .dat file @@ -232,8 +239,6 @@ func (l *DiskLocation) loadAllEcShards() (err error) { if datExists && !l.validateEcVolume(collection, volumeId) { glog.Warningf("Incomplete or invalid EC volume %d: .dat exists but validation failed, cleaning up EC files...", volumeId) l.removeEcVolumeFiles(collection, volumeId) - // Clean up any in-memory state. This does not delete files (already deleted by removeEcVolumeFiles). - l.unloadEcVolume(volumeId) reset() continue } @@ -243,12 +248,14 @@ func (l *DiskLocation) loadAllEcShards() (err error) { // If .dat is gone, log error but don't clean up (may be waiting for shards from other servers) if datExists { glog.Warningf("Failed to load EC shards for volume %d and .dat exists: %v, cleaning up EC files to use .dat...", volumeId, err) + // Unload first to release FDs, then remove files + l.unloadEcVolume(volumeId) l.removeEcVolumeFiles(collection, volumeId) } else { glog.Warningf("Failed to load EC shards for volume %d: %v (this may be normal for distributed EC volumes)", volumeId, err) + // Clean up any partially loaded in-memory state. This does not delete files. + l.unloadEcVolume(volumeId) } - // Clean up any partially loaded in-memory state. This does not delete files. - l.unloadEcVolume(volumeId) reset() continue } @@ -311,17 +318,25 @@ func (l *DiskLocation) checkOrphanedShards(shards []string, collection string, v if len(shards) == 0 || volumeId == 0 { return false } - + // Check if .dat file exists (incomplete encoding, not distributed EC) - // If .dat file exists, this is not a distributed EC volume, so cleanup the orphaned shards + // Use os.Stat for robust error handling; unexpected errors treated as "exists" baseFileName := erasure_coding.EcShardFileName(collection, l.Directory, int(volumeId)) datFileName := baseFileName + ".dat" - if util.FileExists(datFileName) { + + datExists := false + if _, err := os.Stat(datFileName); err == nil { + datExists = true + } else if !os.IsNotExist(err) { + glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err) + // Safer to assume local .dat exists to avoid misclassifying as distributed EC + datExists = true + } + + if datExists { glog.Warningf("Found %d EC shards without .ecx file for volume %d (incomplete encoding interrupted before .ecx creation), cleaning up...", len(shards), volumeId) l.removeEcVolumeFiles(collection, volumeId) - // Clean up any in-memory state. This does not delete files (already deleted by removeEcVolumeFiles). - l.unloadEcVolume(volumeId) return true } return false |
