diff options
| author | chrislu <chris.lu@gmail.com> | 2025-10-30 20:00:02 -0700 |
|---|---|---|
| committer | chrislu <chris.lu@gmail.com> | 2025-10-30 20:00:02 -0700 |
| commit | d00308399d37870a6a93272b75a76ead1847848c (patch) | |
| tree | 8c8b1908d06d913c706f2ec86c4750378a5e4b64 | |
| parent | e8e50ebc75202fccb6a0c3554a44a7514af65844 (diff) | |
| download | seaweedfs-d00308399d37870a6a93272b75a76ead1847848c.tar.xz seaweedfs-d00308399d37870a6a93272b75a76ead1847848c.zip | |
Fix: Critical data race in MasterClient vidMap
Fixes a critical data race where resetVidMap() was writing to the vidMap
pointer while other methods were reading it concurrently without synchronization.
Changes:
- Removed embedded *vidMap from MasterClient
- Added vidMapLock (sync.RWMutex) to protect vidMap pointer access
- Created safe accessor methods (GetLocations, GetDataCenter, etc.)
- Updated all direct vidMap accesses to use thread-safe methods
- Updated resetVidMap() to acquire write lock during pointer swap
The vidMap already has internal locking for its operations, but this fix
protects the vidMap pointer itself from concurrent read/write races.
Verified with: go test -race ./weed/wdclient/...
Impact:
- Prevents potential panics from concurrent pointer access
- No performance impact - uses RWMutex for read-heavy workloads
- Maintains backward compatibility through wrapper methods
| -rw-r--r-- | weed/shell/commands.go | 2 | ||||
| -rw-r--r-- | weed/wdclient/masterclient.go | 111 |
2 files changed, 100 insertions, 13 deletions
diff --git a/weed/shell/commands.go b/weed/shell/commands.go index 62dcfd7f8..55a09e392 100644 --- a/weed/shell/commands.go +++ b/weed/shell/commands.go @@ -116,7 +116,7 @@ func (ce *CommandEnv) AdjustedUrl(location *filer_pb.Location) string { } func (ce *CommandEnv) GetDataCenter() string { - return ce.MasterClient.DataCenter + return ce.MasterClient.GetDataCenter() } func parseFilerUrl(entryPath string) (filerServer string, filerPort int64, path string, err error) { diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index f3950bc37..b063171d7 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -35,10 +35,10 @@ type MasterClient struct { masters pb.ServerDiscovery grpcDialOption grpc.DialOption - // TODO: CRITICAL - Data race: resetVidMap() writes to vidMap while other methods read concurrently - // This embedded *vidMap should be changed to a private field protected by sync.RWMutex - // See: https://github.com/seaweedfs/seaweedfs/issues/[ISSUE_NUMBER] - *vidMap + // vidMap stores volume location mappings + // Protected by vidMapLock to prevent race conditions during pointer swaps in resetVidMap + vidMap *vidMap + vidMapLock sync.RWMutex vidMapCacheSize int OnPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time) OnPeerUpdateLock sync.RWMutex @@ -96,16 +96,17 @@ func (mc *MasterClient) LookupFileIdWithFallback(ctx context.Context, fileId str } // Build HTTP URLs from locations, preferring same data center + dataCenter := mc.getVidMapDataCenter() var sameDcUrls, otherDcUrls []string for _, loc := range locations { httpUrl := "http://" + loc.Url + "/" + fileId - if mc.DataCenter != "" && mc.DataCenter == loc.DataCenter { + if dataCenter != "" && dataCenter == loc.DataCenter { sameDcUrls = append(sameDcUrls, httpUrl) } else { otherDcUrls = append(otherDcUrls, httpUrl) } } - + // Prefer same data center fullUrls = append(sameDcUrls, otherDcUrls...) return fullUrls, nil @@ -351,7 +352,7 @@ func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.Server if err = stream.Send(&master_pb.KeepConnectedRequest{ FilerGroup: mc.FilerGroup, - DataCenter: mc.DataCenter, + DataCenter: mc.getVidMapDataCenter(), Rack: mc.rack, ClientType: mc.clientType, ClientAddress: string(mc.clientHost), @@ -482,15 +483,101 @@ func (mc *MasterClient) WithClientCustomGetMaster(getMasterF func() pb.ServerAdd }) } +// getVidMap safely retrieves the current vidMap +func (mc *MasterClient) getVidMap() *vidMap { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap +} + +// getVidMapDataCenter safely retrieves the data center from vidMap +func (mc *MasterClient) getVidMapDataCenter() string { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap.DataCenter +} + +// GetLocations safely retrieves volume locations +func (mc *MasterClient) GetLocations(vid uint32) (locations []Location, found bool) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap.GetLocations(vid) +} + +// GetVidLocations safely retrieves volume locations by string ID +func (mc *MasterClient) GetVidLocations(vid string) (locations []Location, err error) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap.GetVidLocations(vid) +} + +// LookupFileId safely looks up URLs for a file ID +func (mc *MasterClient) LookupFileId(ctx context.Context, fileId string) (fullUrls []string, err error) { + mc.vidMapLock.RLock() + vm := mc.vidMap + mc.vidMapLock.RUnlock() + return vm.LookupFileId(ctx, fileId) +} + +// GetLocationsClone safely retrieves a clone of volume locations +func (mc *MasterClient) GetLocationsClone(vid uint32) (locations []Location, found bool) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap.GetLocationsClone(vid) +} + +// LookupVolumeServerUrl safely looks up volume server URLs +func (mc *MasterClient) LookupVolumeServerUrl(vid string) (serverUrls []string, err error) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + return mc.vidMap.LookupVolumeServerUrl(vid) +} + +// GetDataCenter safely retrieves the data center +func (mc *MasterClient) GetDataCenter() string { + return mc.getVidMapDataCenter() +} + +// addLocation safely adds a location to vidMap +func (mc *MasterClient) addLocation(vid uint32, location Location) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + mc.vidMap.addLocation(vid, location) +} + +// deleteLocation safely deletes a location from vidMap +func (mc *MasterClient) deleteLocation(vid uint32, location Location) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + mc.vidMap.deleteLocation(vid, location) +} + +// addEcLocation safely adds an EC location to vidMap +func (mc *MasterClient) addEcLocation(vid uint32, location Location) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + mc.vidMap.addEcLocation(vid, location) +} + +// deleteEcLocation safely deletes an EC location from vidMap +func (mc *MasterClient) deleteEcLocation(vid uint32, location Location) { + mc.vidMapLock.RLock() + defer mc.vidMapLock.RUnlock() + mc.vidMap.deleteEcLocation(vid, location) +} + func (mc *MasterClient) resetVidMap() { + mc.vidMapLock.Lock() + defer mc.vidMapLock.Unlock() + tail := &vidMap{ - vid2Locations: mc.vid2Locations, - ecVid2Locations: mc.ecVid2Locations, - DataCenter: mc.DataCenter, - cache: mc.cache, + vid2Locations: mc.vidMap.vid2Locations, + ecVid2Locations: mc.vidMap.ecVid2Locations, + DataCenter: mc.vidMap.DataCenter, + cache: mc.vidMap.cache, } - nvm := newVidMap(mc.DataCenter) + nvm := newVidMap(mc.vidMap.DataCenter) nvm.cache = tail mc.vidMap = nvm |
