aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chris.lu@gmail.com>2025-10-30 20:00:02 -0700
committerchrislu <chris.lu@gmail.com>2025-10-30 20:00:02 -0700
commitd00308399d37870a6a93272b75a76ead1847848c (patch)
tree8c8b1908d06d913c706f2ec86c4750378a5e4b64
parente8e50ebc75202fccb6a0c3554a44a7514af65844 (diff)
downloadseaweedfs-d00308399d37870a6a93272b75a76ead1847848c.tar.xz
seaweedfs-d00308399d37870a6a93272b75a76ead1847848c.zip
Fix: Critical data race in MasterClient vidMap
Fixes a critical data race where resetVidMap() was writing to the vidMap pointer while other methods were reading it concurrently without synchronization. Changes: - Removed embedded *vidMap from MasterClient - Added vidMapLock (sync.RWMutex) to protect vidMap pointer access - Created safe accessor methods (GetLocations, GetDataCenter, etc.) - Updated all direct vidMap accesses to use thread-safe methods - Updated resetVidMap() to acquire write lock during pointer swap The vidMap already has internal locking for its operations, but this fix protects the vidMap pointer itself from concurrent read/write races. Verified with: go test -race ./weed/wdclient/... Impact: - Prevents potential panics from concurrent pointer access - No performance impact - uses RWMutex for read-heavy workloads - Maintains backward compatibility through wrapper methods
-rw-r--r--weed/shell/commands.go2
-rw-r--r--weed/wdclient/masterclient.go111
2 files changed, 100 insertions, 13 deletions
diff --git a/weed/shell/commands.go b/weed/shell/commands.go
index 62dcfd7f8..55a09e392 100644
--- a/weed/shell/commands.go
+++ b/weed/shell/commands.go
@@ -116,7 +116,7 @@ func (ce *CommandEnv) AdjustedUrl(location *filer_pb.Location) string {
}
func (ce *CommandEnv) GetDataCenter() string {
- return ce.MasterClient.DataCenter
+ return ce.MasterClient.GetDataCenter()
}
func parseFilerUrl(entryPath string) (filerServer string, filerPort int64, path string, err error) {
diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go
index f3950bc37..b063171d7 100644
--- a/weed/wdclient/masterclient.go
+++ b/weed/wdclient/masterclient.go
@@ -35,10 +35,10 @@ type MasterClient struct {
masters pb.ServerDiscovery
grpcDialOption grpc.DialOption
- // TODO: CRITICAL - Data race: resetVidMap() writes to vidMap while other methods read concurrently
- // This embedded *vidMap should be changed to a private field protected by sync.RWMutex
- // See: https://github.com/seaweedfs/seaweedfs/issues/[ISSUE_NUMBER]
- *vidMap
+ // vidMap stores volume location mappings
+ // Protected by vidMapLock to prevent race conditions during pointer swaps in resetVidMap
+ vidMap *vidMap
+ vidMapLock sync.RWMutex
vidMapCacheSize int
OnPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time)
OnPeerUpdateLock sync.RWMutex
@@ -96,16 +96,17 @@ func (mc *MasterClient) LookupFileIdWithFallback(ctx context.Context, fileId str
}
// Build HTTP URLs from locations, preferring same data center
+ dataCenter := mc.getVidMapDataCenter()
var sameDcUrls, otherDcUrls []string
for _, loc := range locations {
httpUrl := "http://" + loc.Url + "/" + fileId
- if mc.DataCenter != "" && mc.DataCenter == loc.DataCenter {
+ if dataCenter != "" && dataCenter == loc.DataCenter {
sameDcUrls = append(sameDcUrls, httpUrl)
} else {
otherDcUrls = append(otherDcUrls, httpUrl)
}
}
-
+
// Prefer same data center
fullUrls = append(sameDcUrls, otherDcUrls...)
return fullUrls, nil
@@ -351,7 +352,7 @@ func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.Server
if err = stream.Send(&master_pb.KeepConnectedRequest{
FilerGroup: mc.FilerGroup,
- DataCenter: mc.DataCenter,
+ DataCenter: mc.getVidMapDataCenter(),
Rack: mc.rack,
ClientType: mc.clientType,
ClientAddress: string(mc.clientHost),
@@ -482,15 +483,101 @@ func (mc *MasterClient) WithClientCustomGetMaster(getMasterF func() pb.ServerAdd
})
}
+// getVidMap safely retrieves the current vidMap
+func (mc *MasterClient) getVidMap() *vidMap {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap
+}
+
+// getVidMapDataCenter safely retrieves the data center from vidMap
+func (mc *MasterClient) getVidMapDataCenter() string {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap.DataCenter
+}
+
+// GetLocations safely retrieves volume locations
+func (mc *MasterClient) GetLocations(vid uint32) (locations []Location, found bool) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap.GetLocations(vid)
+}
+
+// GetVidLocations safely retrieves volume locations by string ID
+func (mc *MasterClient) GetVidLocations(vid string) (locations []Location, err error) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap.GetVidLocations(vid)
+}
+
+// LookupFileId safely looks up URLs for a file ID
+func (mc *MasterClient) LookupFileId(ctx context.Context, fileId string) (fullUrls []string, err error) {
+ mc.vidMapLock.RLock()
+ vm := mc.vidMap
+ mc.vidMapLock.RUnlock()
+ return vm.LookupFileId(ctx, fileId)
+}
+
+// GetLocationsClone safely retrieves a clone of volume locations
+func (mc *MasterClient) GetLocationsClone(vid uint32) (locations []Location, found bool) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap.GetLocationsClone(vid)
+}
+
+// LookupVolumeServerUrl safely looks up volume server URLs
+func (mc *MasterClient) LookupVolumeServerUrl(vid string) (serverUrls []string, err error) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ return mc.vidMap.LookupVolumeServerUrl(vid)
+}
+
+// GetDataCenter safely retrieves the data center
+func (mc *MasterClient) GetDataCenter() string {
+ return mc.getVidMapDataCenter()
+}
+
+// addLocation safely adds a location to vidMap
+func (mc *MasterClient) addLocation(vid uint32, location Location) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ mc.vidMap.addLocation(vid, location)
+}
+
+// deleteLocation safely deletes a location from vidMap
+func (mc *MasterClient) deleteLocation(vid uint32, location Location) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ mc.vidMap.deleteLocation(vid, location)
+}
+
+// addEcLocation safely adds an EC location to vidMap
+func (mc *MasterClient) addEcLocation(vid uint32, location Location) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ mc.vidMap.addEcLocation(vid, location)
+}
+
+// deleteEcLocation safely deletes an EC location from vidMap
+func (mc *MasterClient) deleteEcLocation(vid uint32, location Location) {
+ mc.vidMapLock.RLock()
+ defer mc.vidMapLock.RUnlock()
+ mc.vidMap.deleteEcLocation(vid, location)
+}
+
func (mc *MasterClient) resetVidMap() {
+ mc.vidMapLock.Lock()
+ defer mc.vidMapLock.Unlock()
+
tail := &vidMap{
- vid2Locations: mc.vid2Locations,
- ecVid2Locations: mc.ecVid2Locations,
- DataCenter: mc.DataCenter,
- cache: mc.cache,
+ vid2Locations: mc.vidMap.vid2Locations,
+ ecVid2Locations: mc.vidMap.ecVid2Locations,
+ DataCenter: mc.vidMap.DataCenter,
+ cache: mc.vidMap.cache,
}
- nvm := newVidMap(mc.DataCenter)
+ nvm := newVidMap(mc.vidMap.DataCenter)
nvm.cache = tail
mc.vidMap = nvm