aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chris.lu@gmail.com>2025-10-26 20:28:47 -0700
committerchrislu <chris.lu@gmail.com>2025-10-26 20:28:47 -0700
commit40f82aa806198976aa1a13c5eef3a05b73988457 (patch)
tree13cfa62ec9449377a962562200b1292d5a356d6e
parentb5a6fa0cdf80b67df372d4fe73eff71a86b8f202 (diff)
downloadseaweedfs-40f82aa806198976aa1a13c5eef3a05b73988457.tar.xz
seaweedfs-40f82aa806198976aa1a13c5eef3a05b73988457.zip
Avoid misclassifying local EC as distributed when .dat stat errors occur; also standardize unload-before-remove.
-rw-r--r--weed/storage/disk_location_ec.go37
1 files changed, 26 insertions, 11 deletions
diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go
index 0e57d791b..21bafeee3 100644
--- a/weed/storage/disk_location_ec.go
+++ b/weed/storage/disk_location_ec.go
@@ -13,7 +13,6 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
- "github.com/seaweedfs/seaweedfs/weed/util"
)
var (
@@ -224,7 +223,15 @@ func (l *DiskLocation) loadAllEcShards() (err error) {
prevCollection = ""
}
- datExists := util.FileExists(datFileName)
+ // Determine .dat presence robustly; unexpected errors are treated as "exists"
+ datExists := false
+ if _, err := os.Stat(datFileName); err == nil {
+ datExists = true
+ } else if !os.IsNotExist(err) {
+ glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err)
+ // Safer to assume local .dat exists to avoid misclassifying as distributed EC
+ datExists = true
+ }
// Validate EC volume if .dat file exists (incomplete EC encoding scenario)
// This checks shard count, shard size consistency, and expected size vs .dat file
@@ -232,8 +239,6 @@ func (l *DiskLocation) loadAllEcShards() (err error) {
if datExists && !l.validateEcVolume(collection, volumeId) {
glog.Warningf("Incomplete or invalid EC volume %d: .dat exists but validation failed, cleaning up EC files...", volumeId)
l.removeEcVolumeFiles(collection, volumeId)
- // Clean up any in-memory state. This does not delete files (already deleted by removeEcVolumeFiles).
- l.unloadEcVolume(volumeId)
reset()
continue
}
@@ -243,12 +248,14 @@ func (l *DiskLocation) loadAllEcShards() (err error) {
// If .dat is gone, log error but don't clean up (may be waiting for shards from other servers)
if datExists {
glog.Warningf("Failed to load EC shards for volume %d and .dat exists: %v, cleaning up EC files to use .dat...", volumeId, err)
+ // Unload first to release FDs, then remove files
+ l.unloadEcVolume(volumeId)
l.removeEcVolumeFiles(collection, volumeId)
} else {
glog.Warningf("Failed to load EC shards for volume %d: %v (this may be normal for distributed EC volumes)", volumeId, err)
+ // Clean up any partially loaded in-memory state. This does not delete files.
+ l.unloadEcVolume(volumeId)
}
- // Clean up any partially loaded in-memory state. This does not delete files.
- l.unloadEcVolume(volumeId)
reset()
continue
}
@@ -311,17 +318,25 @@ func (l *DiskLocation) checkOrphanedShards(shards []string, collection string, v
if len(shards) == 0 || volumeId == 0 {
return false
}
-
+
// Check if .dat file exists (incomplete encoding, not distributed EC)
- // If .dat file exists, this is not a distributed EC volume, so cleanup the orphaned shards
+ // Use os.Stat for robust error handling; unexpected errors treated as "exists"
baseFileName := erasure_coding.EcShardFileName(collection, l.Directory, int(volumeId))
datFileName := baseFileName + ".dat"
- if util.FileExists(datFileName) {
+
+ datExists := false
+ if _, err := os.Stat(datFileName); err == nil {
+ datExists = true
+ } else if !os.IsNotExist(err) {
+ glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err)
+ // Safer to assume local .dat exists to avoid misclassifying as distributed EC
+ datExists = true
+ }
+
+ if datExists {
glog.Warningf("Found %d EC shards without .ecx file for volume %d (incomplete encoding interrupted before .ecx creation), cleaning up...",
len(shards), volumeId)
l.removeEcVolumeFiles(collection, volumeId)
- // Clean up any in-memory state. This does not delete files (already deleted by removeEcVolumeFiles).
- l.unloadEcVolume(volumeId)
return true
}
return false