diff options
Diffstat (limited to 'weed/shell/command_volume_server_evacuate.go')
| -rw-r--r-- | weed/shell/command_volume_server_evacuate.go | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/weed/shell/command_volume_server_evacuate.go b/weed/shell/command_volume_server_evacuate.go new file mode 100644 index 000000000..f21d0334c --- /dev/null +++ b/weed/shell/command_volume_server_evacuate.go @@ -0,0 +1,221 @@ +package shell + +import ( + "flag" + "fmt" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "io" + "os" + "sort" +) + +func init() { + Commands = append(Commands, &commandVolumeServerEvacuate{}) +} + +type commandVolumeServerEvacuate struct { +} + +func (c *commandVolumeServerEvacuate) Name() string { + return "volumeServer.evacuate" +} + +func (c *commandVolumeServerEvacuate) Help() string { + return `move out all data on a volume server + + volumeServer.evacuate -node <host:port> + + This command moves all data away from the volume server. + The volumes on the volume servers will be redistributed. + + Usually this is used to prepare to shutdown or upgrade the volume server. + + Sometimes a volume can not be moved because there are no + good destination to meet the replication requirement. + E.g. a volume replication 001 in a cluster with 2 volume servers can not be moved. + You can use "-skipNonMoveable" to move the rest volumes. + +` +} + +func (c *commandVolumeServerEvacuate) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if err = commandEnv.confirmIsLocked(); err != nil { + return + } + + vsEvacuateCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeServer := vsEvacuateCommand.String("node", "", "<host>:<port> of the volume server") + skipNonMoveable := vsEvacuateCommand.Bool("skipNonMoveable", false, "skip volumes that can not be moved") + applyChange := vsEvacuateCommand.Bool("force", false, "actually apply the changes") + if err = vsEvacuateCommand.Parse(args); err != nil { + return nil + } + + if *volumeServer == "" { + return fmt.Errorf("need to specify volume server by -node=<host>:<port>") + } + + return volumeServerEvacuate(commandEnv, *volumeServer, *skipNonMoveable, *applyChange, writer) + +} + +func volumeServerEvacuate(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) (err error) { + // 1. confirm the volume server is part of the cluster + // 2. collect all other volume servers, sort by empty slots + // 3. move to any other volume server as long as it satisfy the replication requirements + + // list all the volumes + // collect topology information + topologyInfo, _, err := collectTopologyInfo(commandEnv) + if err != nil { + return err + } + + if err := evacuateNormalVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + return err + } + + if err := evacuateEcVolumes(commandEnv, topologyInfo, volumeServer, skipNonMoveable, applyChange, writer); err != nil { + return err + } + + return nil +} + +func evacuateNormalVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { + // find this volume server + volumeServers := collectVolumeServersByDc(topologyInfo, "") + thisNode, otherNodes := nodesOtherThan(volumeServers, volumeServer) + if thisNode == nil { + return fmt.Errorf("%s is not found in this cluster", volumeServer) + } + + // move away normal volumes + volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo) + for _, diskInfo := range thisNode.info.DiskInfos { + for _, vol := range diskInfo.VolumeInfos { + hasMoved, err := moveAwayOneNormalVolume(commandEnv, volumeReplicas, vol, thisNode, otherNodes, applyChange) + if err != nil { + return fmt.Errorf("move away volume %d from %s: %v", vol.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(vol.ReplicaPlacement)) + fmt.Fprintf(writer, "skipping non moveable volume %d replication:%s\n", vol.Id, replicaPlacement.String()) + } else { + return fmt.Errorf("failed to move volume %d from %s", vol.Id, volumeServer) + } + } + } + } + return nil +} + +func evacuateEcVolumes(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error { + // find this ec volume server + ecNodes, _ := collectEcVolumeServersByDc(topologyInfo, "") + thisNode, otherNodes := ecNodesOtherThan(ecNodes, volumeServer) + if thisNode == nil { + return fmt.Errorf("%s is not found in this cluster\n", volumeServer) + } + + // move away ec volumes + for _, diskInfo := range thisNode.info.DiskInfos { + for _, ecShardInfo := range diskInfo.EcShardInfos { + hasMoved, err := moveAwayOneEcVolume(commandEnv, ecShardInfo, thisNode, otherNodes, applyChange) + if err != nil { + return fmt.Errorf("move away volume %d from %s: %v", ecShardInfo.Id, volumeServer, err) + } + if !hasMoved { + if skipNonMoveable { + fmt.Fprintf(writer, "failed to move away ec volume %d from %s\n", ecShardInfo.Id, volumeServer) + } else { + return fmt.Errorf("failed to move away ec volume %d from %s", ecShardInfo.Id, volumeServer) + } + } + } + } + return nil +} + +func moveAwayOneEcVolume(commandEnv *CommandEnv, ecShardInfo *master_pb.VolumeEcShardInformationMessage, thisNode *EcNode, otherNodes []*EcNode, applyChange bool) (hasMoved bool, err error) { + + for _, shardId := range erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIds() { + + sort.Slice(otherNodes, func(i, j int) bool { + return otherNodes[i].localShardIdCount(ecShardInfo.Id) < otherNodes[j].localShardIdCount(ecShardInfo.Id) + }) + + for i := 0; i < len(otherNodes); i++ { + emptyNode := otherNodes[i] + collectionPrefix := "" + if ecShardInfo.Collection != "" { + collectionPrefix = ecShardInfo.Collection + "_" + } + fmt.Fprintf(os.Stdout, "moving ec volume %s%d.%d %s => %s\n", collectionPrefix, ecShardInfo.Id, shardId, thisNode.info.Id, emptyNode.info.Id) + err = moveMountedShardToEcNode(commandEnv, thisNode, ecShardInfo.Collection, needle.VolumeId(ecShardInfo.Id), shardId, emptyNode, applyChange) + if err != nil { + return + } else { + hasMoved = true + break + } + } + if !hasMoved { + return + } + } + + return +} + +func moveAwayOneNormalVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, vol *master_pb.VolumeInformationMessage, thisNode *Node, otherNodes []*Node, applyChange bool) (hasMoved bool, err error) { + fn := capacityByFreeVolumeCount(types.ToDiskType(vol.DiskType)) + for _, n := range otherNodes { + n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { + return v.DiskType == vol.DiskType + }) + } + sort.Slice(otherNodes, func(i, j int) bool { + return otherNodes[i].localVolumeRatio(fn) > otherNodes[j].localVolumeRatio(fn) + }) + + for i := 0; i < len(otherNodes); i++ { + emptyNode := otherNodes[i] + hasMoved, err = maybeMoveOneVolume(commandEnv, volumeReplicas, thisNode, vol, emptyNode, applyChange) + if err != nil { + return + } + if hasMoved { + break + } + } + return +} + +func nodesOtherThan(volumeServers []*Node, thisServer string) (thisNode *Node, otherNodes []*Node) { + for _, node := range volumeServers { + if node.info.Id == thisServer { + thisNode = node + continue + } + otherNodes = append(otherNodes, node) + } + return +} + +func ecNodesOtherThan(volumeServers []*EcNode, thisServer string) (thisNode *EcNode, otherNodes []*EcNode) { + for _, node := range volumeServers { + if node.info.Id == thisServer { + thisNode = node + continue + } + otherNodes = append(otherNodes, node) + } + return +} |
