aboutsummaryrefslogtreecommitdiff
path: root/weed/server/master_server.go
diff options
context:
space:
mode:
authoraskeipx <askeipx@gmail.com>2022-08-24 11:18:21 +0500
committerGitHub <noreply@github.com>2022-08-23 23:18:21 -0700
commit2e78a522ab3892debf9aefeb978177678ae52a83 (patch)
tree2f10264a407fb1361c91b3f6a6e9ef06712e477a /weed/server/master_server.go
parent762dc219a6175d5c81968d817d41a16fb3a1163f (diff)
downloadseaweedfs-2e78a522ab3892debf9aefeb978177678ae52a83.tar.xz
seaweedfs-2e78a522ab3892debf9aefeb978177678ae52a83.zip
remove old raft servers if they don't answer to pings for too long (#3398)
* remove old raft servers if they don't answer to pings for too long add ping durations as options rename ping fields fix some todos get masters through masterclient raft remove server from leader use raft servers to ping them CheckMastersAlive for hashicorp raft only * prepare blocking ping * pass waitForReady as param * pass waitForReady through all functions * waitForReady works * refactor * remove unneeded params * rollback unneeded changes * fix
Diffstat (limited to 'weed/server/master_server.go')
-rw-r--r--weed/server/master_server.go33
1 files changed, 29 insertions, 4 deletions
diff --git a/weed/server/master_server.go b/weed/server/master_server.go
index fbc27e610..9adcafc6f 100644
--- a/weed/server/master_server.go
+++ b/weed/server/master_server.go
@@ -1,8 +1,8 @@
package weed_server
import (
+ "context"
"fmt"
- "github.com/seaweedfs/seaweedfs/weed/stats"
"net/http"
"net/http/httputil"
"net/url"
@@ -12,6 +12,8 @@ import (
"sync"
"time"
+ "github.com/seaweedfs/seaweedfs/weed/stats"
+
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@@ -242,7 +244,6 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc {
}
func (ms *MasterServer) startAdminScripts() {
-
v := util.GetViper()
adminScripts := v.GetString("master.maintenance.scripts")
if adminScripts == "" {
@@ -342,8 +343,10 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
peerAddress := pb.ServerAddress(update.Address)
peerName := string(peerAddress)
- isLeader := ms.Topo.HashicorpRaft.State() == hashicorpRaft.Leader
- if update.IsAdd && isLeader {
+ if ms.Topo.HashicorpRaft.State() != hashicorpRaft.Leader {
+ return
+ }
+ if update.IsAdd {
raftServerFound := false
for _, server := range ms.Topo.HashicorpRaft.GetConfiguration().Configuration().Servers {
if string(server.ID) == peerName {
@@ -356,5 +359,27 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
hashicorpRaft.ServerID(peerName),
hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0)
}
+ } else {
+ pb.WithMasterClient(false, peerAddress, ms.grpcDialOption, true, func(client master_pb.SeaweedClient) error {
+ ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*72)
+ defer cancel()
+ if _, err := client.Ping(ctx, &master_pb.PingRequest{Target: string(peerAddress), TargetType: cluster.MasterType}); err != nil {
+ glog.V(0).Infof("master %s didn't respond to pings. remove raft server", peerName)
+ if err := ms.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
+ _, err := client.RaftRemoveServer(context.Background(), &master_pb.RaftRemoveServerRequest{
+ Id: peerName,
+ Force: false,
+ })
+ return err
+ }); err != nil {
+ glog.Warningf("failed removing old raft server: %v", err)
+ return err
+ }
+ } else {
+ glog.V(0).Infof("master %s successfully responded to ping", peerName)
+ }
+
+ return nil
+ })
}
}