aboutsummaryrefslogtreecommitdiff
path: root/pkg/driver/volume.go
diff options
context:
space:
mode:
authorchrislusf <chris.lu@gmail.com>2025-12-03 19:31:12 -0800
committerchrislusf <chris.lu@gmail.com>2025-12-03 19:31:12 -0800
commit454b9651450a8feecf5896ce3b2526a442a73e33 (patch)
tree34a9dd4579cc72d01c4400adcedf7c56f445fc65 /pkg/driver/volume.go
parenta6742a3ec78ac67f4371d59e4ecd3d15878b940a (diff)
downloadseaweedfs-csi-driver-454b9651450a8feecf5896ce3b2526a442a73e33.tar.xz
seaweedfs-csi-driver-454b9651450a8feecf5896ce3b2526a442a73e33.zip
fix: add self-healing for volume mount failures after driver restart
This addresses issue #203 - CSI Driver Self-Healing for Volume Mount Failures. Problem: When the CSI node driver restarts, the in-memory volume cache is lost. Kubelet then directly calls NodePublishVolume (skipping NodeStageVolume), which fails with 'volume hasn't been staged yet' error. Solution: 1. Added isStagingPathHealthy() to detect healthy vs stale/corrupted mounts 2. Added cleanupStaleStagingPath() to clean up stale mount points 3. Enhanced NodeStageVolume to clean up stale mounts before staging 4. Implemented self-healing in NodePublishVolume: - If staging path is healthy: rebuild volume cache from existing mount - If staging path is stale: clean up and re-stage automatically 5. Updated Volume.Unstage to handle rebuilt volumes without unmounter Benefits: - Automatic recovery after CSI driver restarts - No manual intervention required (no kubelet/pod restarts needed) - Handles both live and dead FUSE mount scenarios - Backward compatible with normal operations Fixes #203
Diffstat (limited to 'pkg/driver/volume.go')
-rw-r--r--pkg/driver/volume.go23
1 files changed, 18 insertions, 5 deletions
diff --git a/pkg/driver/volume.go b/pkg/driver/volume.go
index ab0dcd4..8abf88b 100644
--- a/pkg/driver/volume.go
+++ b/pkg/driver/volume.go
@@ -114,13 +114,26 @@ func (vol *Volume) Unpublish(targetPath string) error {
func (vol *Volume) Unstage(stagingTargetPath string) error {
glog.V(0).Infof("unmounting volume %s from %s", vol.VolumeId, stagingTargetPath)
- if vol.unmounter == nil {
- glog.Errorf("volume is not mounted: %s, path: %s", vol.VolumeId, stagingTargetPath)
- return nil
+ if stagingTargetPath != vol.StagedPath && vol.StagedPath != "" {
+ glog.Warningf("staging path %s differs for volume %s at %s", stagingTargetPath, vol.VolumeId, vol.StagedPath)
}
- if stagingTargetPath != vol.StagedPath {
- glog.Warningf("staging path %s differs for volume %s at %s", stagingTargetPath, vol.VolumeId, vol.StagedPath)
+ if vol.unmounter == nil {
+ // This can happen when the volume was rebuilt from an existing staging path
+ // after a CSI driver restart. In this case, we need to force unmount.
+ glog.Infof("volume %s has no unmounter (rebuilt from existing mount), using force unmount", vol.VolumeId)
+
+ // Try to unmount the staging path
+ if err := mountutil.Unmount(stagingTargetPath); err != nil {
+ glog.Warningf("error force unmounting volume %s: %v", vol.VolumeId, err)
+ }
+
+ // Clean up using mount utilities
+ if err := mount.CleanupMountPoint(stagingTargetPath, mountutil, true); err != nil {
+ glog.Warningf("error cleaning up mount point for volume %s: %v", vol.VolumeId, err)
+ }
+
+ return nil
}
if err := vol.unmounter.Unmount(); err != nil {