aboutsummaryrefslogtreecommitdiff
path: root/pkg/driver/volume.go
diff options
context:
space:
mode:
authorchrislusf <chris.lu@gmail.com>2025-12-03 19:31:12 -0800
committerChris Lu <chrislusf@users.noreply.github.com>2025-12-03 20:52:27 -0800
commite76bd693e2022ac71f857548b0919155ebb04ca9 (patch)
tree9ecb087da5b777f5119f77cc0991ce1bb2124b95 /pkg/driver/volume.go
parent56b4ae6398fda983484c77b5c549a8c6bceab339 (diff)
downloadseaweedfs-csi-driver-e76bd693e2022ac71f857548b0919155ebb04ca9.tar.xz
seaweedfs-csi-driver-e76bd693e2022ac71f857548b0919155ebb04ca9.zip
fix: add self-healing for volume mount failures after driver restart
This addresses issue #203 - CSI Driver Self-Healing for Volume Mount Failures. Problem: When the CSI node driver restarts, the in-memory volume cache is lost. Kubelet then directly calls NodePublishVolume (skipping NodeStageVolume), which fails with 'volume hasn't been staged yet' error. Solution: 1. Added isStagingPathHealthy() to detect healthy vs stale/corrupted mounts 2. Added cleanupStaleStagingPath() to clean up stale mount points 3. Enhanced NodeStageVolume to clean up stale mounts before staging 4. Implemented self-healing in NodePublishVolume: - If staging path is healthy: rebuild volume cache from existing mount - If staging path is stale: clean up and re-stage automatically 5. Updated Volume.Unstage to handle rebuilt volumes without unmounter Benefits: - Automatic recovery after CSI driver restarts - No manual intervention required (no kubelet/pod restarts needed) - Handles both live and dead FUSE mount scenarios - Backward compatible with normal operations Fixes #203
Diffstat (limited to 'pkg/driver/volume.go')
-rw-r--r--pkg/driver/volume.go23
1 files changed, 18 insertions, 5 deletions
diff --git a/pkg/driver/volume.go b/pkg/driver/volume.go
index ab0dcd4..8abf88b 100644
--- a/pkg/driver/volume.go
+++ b/pkg/driver/volume.go
@@ -114,13 +114,26 @@ func (vol *Volume) Unpublish(targetPath string) error {
func (vol *Volume) Unstage(stagingTargetPath string) error {
glog.V(0).Infof("unmounting volume %s from %s", vol.VolumeId, stagingTargetPath)
- if vol.unmounter == nil {
- glog.Errorf("volume is not mounted: %s, path: %s", vol.VolumeId, stagingTargetPath)
- return nil
+ if stagingTargetPath != vol.StagedPath && vol.StagedPath != "" {
+ glog.Warningf("staging path %s differs for volume %s at %s", stagingTargetPath, vol.VolumeId, vol.StagedPath)
}
- if stagingTargetPath != vol.StagedPath {
- glog.Warningf("staging path %s differs for volume %s at %s", stagingTargetPath, vol.VolumeId, vol.StagedPath)
+ if vol.unmounter == nil {
+ // This can happen when the volume was rebuilt from an existing staging path
+ // after a CSI driver restart. In this case, we need to force unmount.
+ glog.Infof("volume %s has no unmounter (rebuilt from existing mount), using force unmount", vol.VolumeId)
+
+ // Try to unmount the staging path
+ if err := mountutil.Unmount(stagingTargetPath); err != nil {
+ glog.Warningf("error force unmounting volume %s: %v", vol.VolumeId, err)
+ }
+
+ // Clean up using mount utilities
+ if err := mount.CleanupMountPoint(stagingTargetPath, mountutil, true); err != nil {
+ glog.Warningf("error cleaning up mount point for volume %s: %v", vol.VolumeId, err)
+ }
+
+ return nil
}
if err := vol.unmounter.Unmount(); err != nil {