aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Hartig <fastest963@gmail.com>2020-09-21 22:41:38 -0400
committerJames Hartig <fastest963@gmail.com>2020-09-21 22:41:38 -0400
commit91e4eca1e98cb5195346d90a2cc1fb9c92557213 (patch)
treeb62a91ceacc83df49f2562eedfa024cc0a7ed4f8
parent9cdbfc1a4987bdb46f16ae37624ed69ef66778a9 (diff)
downloadseaweedfs-91e4eca1e98cb5195346d90a2cc1fb9c92557213.tar.xz
seaweedfs-91e4eca1e98cb5195346d90a2cc1fb9c92557213.zip
Fix deadlock with KeepConnected and SendHeartbeat
There's the potential where we're writing to a clientConn and it goes away and we're stuck keeping a read lock on clientChansLock. This causes KeepConnected to not be able to remove the client since it requires a write lock on clientChansLock. This ends up backing up SendHeartbeat because it can't get a read lock.
-rw-r--r--weed/server/master_grpc_server.go10
1 files changed, 8 insertions, 2 deletions
diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go
index f3a2ee013..692909a29 100644
--- a/weed/server/master_grpc_server.go
+++ b/weed/server/master_grpc_server.go
@@ -187,7 +187,8 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ
peerAddress := findClientAddress(stream.Context(), req.GrpcPort)
- stopChan := make(chan bool)
+ // buffer by 1 so we don't end up getting stuck writing to stopChan forever
+ stopChan := make(chan bool, 1)
clientName, messageChan := ms.addClient(req.Name, peerAddress)
@@ -247,7 +248,12 @@ func (ms *MasterServer) addClient(clientType string, clientAddress string) (clie
clientName = clientType + "@" + clientAddress
glog.V(0).Infof("+ client %v", clientName)
- messageChan = make(chan *master_pb.VolumeLocation)
+ // we buffer this because otherwise we end up in a potential deadlock where
+ // the KeepConnected loop is no longer listening on this channel but we're
+ // trying to send to it in SendHeartbeat and so we can't lock the
+ // clientChansLock to remove the channel and we're stuck writing to it
+ // 100 is probably overkill
+ messageChan = make(chan *master_pb.VolumeLocation, 100)
ms.clientChansLock.Lock()
ms.clientChans[clientName] = messageChan