diff options
| author | askeipx <askeipx@gmail.com> | 2022-08-24 11:18:21 +0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-08-23 23:18:21 -0700 |
| commit | 2e78a522ab3892debf9aefeb978177678ae52a83 (patch) | |
| tree | 2f10264a407fb1361c91b3f6a6e9ef06712e477a /weed/server/master_server.go | |
| parent | 762dc219a6175d5c81968d817d41a16fb3a1163f (diff) | |
| download | seaweedfs-2e78a522ab3892debf9aefeb978177678ae52a83.tar.xz seaweedfs-2e78a522ab3892debf9aefeb978177678ae52a83.zip | |
remove old raft servers if they don't answer to pings for too long (#3398)
* remove old raft servers if they don't answer to pings for too long
add ping durations as options
rename ping fields
fix some todos
get masters through masterclient
raft remove server from leader
use raft servers to ping them
CheckMastersAlive for hashicorp raft only
* prepare blocking ping
* pass waitForReady as param
* pass waitForReady through all functions
* waitForReady works
* refactor
* remove unneeded params
* rollback unneeded changes
* fix
Diffstat (limited to 'weed/server/master_server.go')
| -rw-r--r-- | weed/server/master_server.go | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/weed/server/master_server.go b/weed/server/master_server.go index fbc27e610..9adcafc6f 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -1,8 +1,8 @@ package weed_server import ( + "context" "fmt" - "github.com/seaweedfs/seaweedfs/weed/stats" "net/http" "net/http/httputil" "net/url" @@ -12,6 +12,8 @@ import ( "sync" "time" + "github.com/seaweedfs/seaweedfs/weed/stats" + "github.com/seaweedfs/seaweedfs/weed/cluster" "github.com/seaweedfs/seaweedfs/weed/pb" @@ -242,7 +244,6 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc { } func (ms *MasterServer) startAdminScripts() { - v := util.GetViper() adminScripts := v.GetString("master.maintenance.scripts") if adminScripts == "" { @@ -342,8 +343,10 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF peerAddress := pb.ServerAddress(update.Address) peerName := string(peerAddress) - isLeader := ms.Topo.HashicorpRaft.State() == hashicorpRaft.Leader - if update.IsAdd && isLeader { + if ms.Topo.HashicorpRaft.State() != hashicorpRaft.Leader { + return + } + if update.IsAdd { raftServerFound := false for _, server := range ms.Topo.HashicorpRaft.GetConfiguration().Configuration().Servers { if string(server.ID) == peerName { @@ -356,5 +359,27 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF hashicorpRaft.ServerID(peerName), hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0) } + } else { + pb.WithMasterClient(false, peerAddress, ms.grpcDialOption, true, func(client master_pb.SeaweedClient) error { + ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*72) + defer cancel() + if _, err := client.Ping(ctx, &master_pb.PingRequest{Target: string(peerAddress), TargetType: cluster.MasterType}); err != nil { + glog.V(0).Infof("master %s didn't respond to pings. remove raft server", peerName) + if err := ms.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error { + _, err := client.RaftRemoveServer(context.Background(), &master_pb.RaftRemoveServerRequest{ + Id: peerName, + Force: false, + }) + return err + }); err != nil { + glog.Warningf("failed removing old raft server: %v", err) + return err + } + } else { + glog.V(0).Infof("master %s successfully responded to ping", peerName) + } + + return nil + }) } } |
