diff options
| author | bingoohuang <bingoo.huang@gmail.com> | 2021-04-26 17:19:35 +0800 |
|---|---|---|
| committer | bingoohuang <bingoo.huang@gmail.com> | 2021-04-26 17:19:35 +0800 |
| commit | d861cbd81b75b6684c971ac00e33685e6575b833 (patch) | |
| tree | 301805fef4aa5d0096bfb1510536f7a009b661e7 /weed/shell/command_volume_fix_replication.go | |
| parent | 70da715d8d917527291b35fb069fac077d17b868 (diff) | |
| parent | 4ee58922eff61a5a4ca29c0b4829b097a498549e (diff) | |
| download | seaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.tar.xz seaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.zip | |
Merge branch 'master' of https://github.com/bingoohuang/seaweedfs
Diffstat (limited to 'weed/shell/command_volume_fix_replication.go')
| -rw-r--r-- | weed/shell/command_volume_fix_replication.go | 369 |
1 files changed, 293 insertions, 76 deletions
diff --git a/weed/shell/command_volume_fix_replication.go b/weed/shell/command_volume_fix_replication.go index 6f35dd5d2..538351fd0 100644 --- a/weed/shell/command_volume_fix_replication.go +++ b/weed/shell/command_volume_fix_replication.go @@ -2,9 +2,12 @@ package shell import ( "context" + "flag" "fmt" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/types" "io" - "math/rand" + "path/filepath" "sort" "github.com/chrislusf/seaweedfs/weed/operation" @@ -18,6 +21,7 @@ func init() { } type commandVolumeFixReplication struct { + collectionPattern *string } func (c *commandVolumeFixReplication) Name() string { @@ -27,16 +31,19 @@ func (c *commandVolumeFixReplication) Name() string { func (c *commandVolumeFixReplication) Help() string { return `add replicas to volumes that are missing replicas - This command file all under-replicated volumes, and find volume servers with free slots. + This command finds all over-replicated volumes. If found, it will purge the oldest copies and stop. + + This command also finds all under-replicated volumes, and finds volume servers with free slots. If the free slots satisfy the replication requirement, the volume content is copied over and mounted. - volume.fix.replication -n # do not take action - volume.fix.replication # actually copying the volume files and mount the volume + volume.fix.replication -n # do not take action + volume.fix.replication # actually deleting or copying the volume files and mount the volume + volume.fix.replication -collectionPattern=important* # fix any collections with prefix "important" Note: - * each time this will only add back one replica for one volume id. If there are multiple replicas - are missing, e.g. multiple volume servers are new, you may need to run this multiple times. - * do not run this too quick within seconds, since the new volume replica may take a few seconds + * each time this will only add back one replica for each volume id that is under replicated. + If there are multiple replicas are missing, e.g. replica count is > 2, you may need to run this multiple times. + * do not run this too quickly within seconds, since the new volume replica may take a few seconds to register itself to the master. ` @@ -44,81 +51,151 @@ func (c *commandVolumeFixReplication) Help() string { func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { - takeAction := true - if len(args) > 0 && args[0] == "-n" { - takeAction = false + if err = commandEnv.confirmIsLocked(); err != nil { + return } - var resp *master_pb.VolumeListResponse - ctx := context.Background() - err = commandEnv.MasterClient.WithClient(ctx, func(client master_pb.SeaweedClient) error { - resp, err = client.VolumeList(ctx, &master_pb.VolumeListRequest{}) - return err - }) + volFixReplicationCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.collectionPattern = volFixReplicationCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'") + skipChange := volFixReplicationCommand.Bool("n", false, "skip the changes") + if err = volFixReplicationCommand.Parse(args); err != nil { + return nil + } + + takeAction := !*skipChange + + // collect topology information + topologyInfo, _, err := collectTopologyInfo(commandEnv) if err != nil { return err } // find all volumes that needs replication // collect all data nodes - replicatedVolumeLocations := make(map[uint32][]location) - replicatedVolumeInfo := make(map[uint32]*master_pb.VolumeInformationMessage) + volumeReplicas, allLocations := collectVolumeReplicaLocations(topologyInfo) + + if len(allLocations) == 0 { + return fmt.Errorf("no data nodes at all") + } + + // find all under replicated volumes + var underReplicatedVolumeIds, overReplicatedVolumeIds []uint32 + for vid, replicas := range volumeReplicas { + replica := replicas[0] + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replica.info.ReplicaPlacement)) + if replicaPlacement.GetCopyCount() > len(replicas) { + underReplicatedVolumeIds = append(underReplicatedVolumeIds, vid) + } else if replicaPlacement.GetCopyCount() < len(replicas) { + overReplicatedVolumeIds = append(overReplicatedVolumeIds, vid) + fmt.Fprintf(writer, "volume %d replication %s, but over replicated %+d\n", replica.info.Id, replicaPlacement, len(replicas)) + } + } + + if len(overReplicatedVolumeIds) > 0 { + return c.fixOverReplicatedVolumes(commandEnv, writer, takeAction, overReplicatedVolumeIds, volumeReplicas, allLocations) + } + + if len(underReplicatedVolumeIds) == 0 { + return nil + } + + // find the most under populated data nodes + return c.fixUnderReplicatedVolumes(commandEnv, writer, takeAction, underReplicatedVolumeIds, volumeReplicas, allLocations) + +} + +func collectVolumeReplicaLocations(topologyInfo *master_pb.TopologyInfo) (map[uint32][]*VolumeReplica, []location) { + volumeReplicas := make(map[uint32][]*VolumeReplica) var allLocations []location - eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { loc := newLocation(dc, string(rack), dn) - for _, v := range dn.VolumeInfos { - if v.ReplicaPlacement > 0 { - replicatedVolumeLocations[v.Id] = append(replicatedVolumeLocations[v.Id], loc) - replicatedVolumeInfo[v.Id] = v + for _, diskInfo := range dn.DiskInfos { + for _, v := range diskInfo.VolumeInfos { + volumeReplicas[v.Id] = append(volumeReplicas[v.Id], &VolumeReplica{ + location: &loc, + info: v, + }) } } allLocations = append(allLocations, loc) }) + return volumeReplicas, allLocations +} - // find all under replicated volumes - underReplicatedVolumeLocations := make(map[uint32][]location) - for vid, locations := range replicatedVolumeLocations { - volumeInfo := replicatedVolumeInfo[vid] - replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement)) - if replicaPlacement.GetCopyCount() > len(locations) { - underReplicatedVolumeLocations[vid] = locations +func (c *commandVolumeFixReplication) fixOverReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location) error { + for _, vid := range overReplicatedVolumeIds { + replicas := volumeReplicas[vid] + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replicas[0].info.ReplicaPlacement)) + + replica := pickOneReplicaToDelete(replicas, replicaPlacement) + + // check collection name pattern + if *c.collectionPattern != "" { + matched, err := filepath.Match(*c.collectionPattern, replica.info.Collection) + if err != nil { + return fmt.Errorf("match pattern %s with collection %s: %v", *c.collectionPattern, replica.info.Collection, err) + } + if !matched { + break + } } - } - if len(underReplicatedVolumeLocations) == 0 { - return fmt.Errorf("no under replicated volumes") - } + fmt.Fprintf(writer, "deleting volume %d from %s ...\n", replica.info.Id, replica.location.dataNode.Id) + + if !takeAction { + break + } + + if err := deleteVolume(commandEnv.option.GrpcDialOption, needle.VolumeId(replica.info.Id), replica.location.dataNode.Id); err != nil { + return fmt.Errorf("deleting volume %d from %s : %v", replica.info.Id, replica.location.dataNode.Id, err) + } - if len(allLocations) == 0 { - return fmt.Errorf("no data nodes at all") } + return nil +} - // find the most under populated data nodes - keepDataNodesSorted(allLocations) +func (c *commandVolumeFixReplication) fixUnderReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, underReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location) error { - for vid, locations := range underReplicatedVolumeLocations { - volumeInfo := replicatedVolumeInfo[vid] - replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement)) + for _, vid := range underReplicatedVolumeIds { + replicas := volumeReplicas[vid] + replica := pickOneReplicaToCopyFrom(replicas) + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replica.info.ReplicaPlacement)) foundNewLocation := false + hasSkippedCollection := false + keepDataNodesSorted(allLocations, types.ToDiskType(replica.info.DiskType)) + fn := capacityByFreeVolumeCount(types.ToDiskType(replica.info.DiskType)) for _, dst := range allLocations { // check whether data nodes satisfy the constraints - if dst.dataNode.FreeVolumeCount > 0 && satisfyReplicaPlacement(replicaPlacement, locations, dst) { + if fn(dst.dataNode) > 0 && satisfyReplicaPlacement(replicaPlacement, replicas, dst) { + // check collection name pattern + if *c.collectionPattern != "" { + matched, err := filepath.Match(*c.collectionPattern, replica.info.Collection) + if err != nil { + return fmt.Errorf("match pattern %s with collection %s: %v", *c.collectionPattern, replica.info.Collection, err) + } + if !matched { + hasSkippedCollection = true + break + } + } + // ask the volume server to replicate the volume - sourceNodes := underReplicatedVolumeLocations[vid] - sourceNode := sourceNodes[rand.Intn(len(sourceNodes))] foundNewLocation = true - fmt.Fprintf(writer, "replicating volume %d %s from %s to dataNode %s ...\n", volumeInfo.Id, replicaPlacement, sourceNode.dataNode.Id, dst.dataNode.Id) + fmt.Fprintf(writer, "replicating volume %d %s from %s to dataNode %s ...\n", replica.info.Id, replicaPlacement, replica.location.dataNode.Id, dst.dataNode.Id) if !takeAction { break } err := operation.WithVolumeServerClient(dst.dataNode.Id, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { - _, replicateErr := volumeServerClient.VolumeCopy(ctx, &volume_server_pb.VolumeCopyRequest{ - VolumeId: volumeInfo.Id, - SourceDataNode: sourceNode.dataNode.Id, + _, replicateErr := volumeServerClient.VolumeCopy(context.Background(), &volume_server_pb.VolumeCopyRequest{ + VolumeId: replica.info.Id, + SourceDataNode: replica.location.dataNode.Id, }) - return replicateErr + if replicateErr != nil { + return fmt.Errorf("copying from %s => %s : %v", replica.location.dataNode.Id, dst.dataNode.Id, replicateErr) + } + return nil }) if err != nil { @@ -126,54 +203,152 @@ func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv, } // adjust free volume count - dst.dataNode.FreeVolumeCount-- - keepDataNodesSorted(allLocations) + dst.dataNode.DiskInfos[replica.info.DiskType].FreeVolumeCount-- break } } - if !foundNewLocation { - fmt.Fprintf(writer, "failed to place volume %d replica as %s, existing:%+v\n", volumeInfo.Id, replicaPlacement, locations) + + if !foundNewLocation && !hasSkippedCollection { + fmt.Fprintf(writer, "failed to place volume %d replica as %s, existing:%+v\n", replica.info.Id, replicaPlacement, len(replicas)) } } - return nil } -func keepDataNodesSorted(dataNodes []location) { +func keepDataNodesSorted(dataNodes []location, diskType types.DiskType) { + fn := capacityByFreeVolumeCount(diskType) sort.Slice(dataNodes, func(i, j int) bool { - return dataNodes[i].dataNode.FreeVolumeCount > dataNodes[j].dataNode.FreeVolumeCount + return fn(dataNodes[i].dataNode) > fn(dataNodes[j].dataNode) }) } -func satisfyReplicaPlacement(replicaPlacement *super_block.ReplicaPlacement, existingLocations []location, possibleLocation location) bool { +/* + if on an existing data node { + return false + } + if different from existing dcs { + if lack on different dcs { + return true + }else{ + return false + } + } + if not on primary dc { + return false + } + if different from existing racks { + if lack on different racks { + return true + }else{ + return false + } + } + if not on primary rack { + return false + } + if lacks on same rack { + return true + } else { + return false + } +*/ +func satisfyReplicaPlacement(replicaPlacement *super_block.ReplicaPlacement, replicas []*VolumeReplica, possibleLocation location) bool { + + existingDataCenters, _, existingDataNodes := countReplicas(replicas) + + if _, found := existingDataNodes[possibleLocation.String()]; found { + // avoid duplicated volume on the same data node + return false + } - existingDataCenters := make(map[string]bool) - existingRacks := make(map[string]bool) - existingDataNodes := make(map[string]bool) - for _, loc := range existingLocations { - existingDataCenters[loc.DataCenter()] = true - existingRacks[loc.Rack()] = true - existingDataNodes[loc.String()] = true + primaryDataCenters, _ := findTopKeys(existingDataCenters) + + // ensure data center count is within limit + if _, found := existingDataCenters[possibleLocation.DataCenter()]; !found { + // different from existing dcs + if len(existingDataCenters) < replicaPlacement.DiffDataCenterCount+1 { + // lack on different dcs + return true + } else { + // adding this would go over the different dcs limit + return false + } + } + // now this is same as one of the existing data center + if !isAmong(possibleLocation.DataCenter(), primaryDataCenters) { + // not on one of the primary dcs + return false } - if replicaPlacement.DiffDataCenterCount >= len(existingDataCenters) { - // check dc, good if different from any existing data centers - _, found := existingDataCenters[possibleLocation.DataCenter()] - return !found - } else if replicaPlacement.DiffRackCount >= len(existingRacks) { - // check rack, good if different from any existing racks - _, found := existingRacks[possibleLocation.Rack()] - return !found - } else if replicaPlacement.SameRackCount >= len(existingDataNodes) { - // check data node, good if different from any existing data nodes - _, found := existingDataNodes[possibleLocation.String()] - return !found + // now this is one of the primary dcs + primaryDcRacks := make(map[string]int) + for _, replica := range replicas { + if replica.location.DataCenter() != possibleLocation.DataCenter() { + continue + } + primaryDcRacks[replica.location.Rack()] += 1 + } + primaryRacks, _ := findTopKeys(primaryDcRacks) + sameRackCount := primaryDcRacks[possibleLocation.Rack()] + + // ensure rack count is within limit + if _, found := primaryDcRacks[possibleLocation.Rack()]; !found { + // different from existing racks + if len(primaryDcRacks) < replicaPlacement.DiffRackCount+1 { + // lack on different racks + return true + } else { + // adding this would go over the different racks limit + return false + } } + // now this is same as one of the existing racks + if !isAmong(possibleLocation.Rack(), primaryRacks) { + // not on the primary rack + return false + } + + // now this is on the primary rack + + // different from existing data nodes + if sameRackCount < replicaPlacement.SameRackCount+1 { + // lack on same rack + return true + } else { + // adding this would go over the same data node limit + return false + } + +} + +func findTopKeys(m map[string]int) (topKeys []string, max int) { + for k, c := range m { + if max < c { + topKeys = topKeys[:0] + topKeys = append(topKeys, k) + max = c + } else if max == c { + topKeys = append(topKeys, k) + } + } + return +} +func isAmong(key string, keys []string) bool { + for _, k := range keys { + if k == key { + return true + } + } return false } +type VolumeReplica struct { + location *location + info *master_pb.VolumeInformationMessage +} + type location struct { dc string rack string @@ -199,3 +374,45 @@ func (l location) Rack() string { func (l location) DataCenter() string { return l.dc } + +func pickOneReplicaToCopyFrom(replicas []*VolumeReplica) *VolumeReplica { + mostRecent := replicas[0] + for _, replica := range replicas { + if replica.info.ModifiedAtSecond > mostRecent.info.ModifiedAtSecond { + mostRecent = replica + } + } + return mostRecent +} + +func countReplicas(replicas []*VolumeReplica) (diffDc, diffRack, diffNode map[string]int) { + diffDc = make(map[string]int) + diffRack = make(map[string]int) + diffNode = make(map[string]int) + for _, replica := range replicas { + diffDc[replica.location.DataCenter()] += 1 + diffRack[replica.location.Rack()] += 1 + diffNode[replica.location.String()] += 1 + } + return +} + +func pickOneReplicaToDelete(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) *VolumeReplica { + + sort.Slice(replicas, func(i, j int) bool { + a, b := replicas[i], replicas[j] + if a.info.CompactRevision != b.info.CompactRevision { + return a.info.CompactRevision < b.info.CompactRevision + } + if a.info.ModifiedAtSecond != b.info.ModifiedAtSecond { + return a.info.ModifiedAtSecond < b.info.ModifiedAtSecond + } + if a.info.Size != b.info.Size { + return a.info.Size < b.info.Size + } + return false + }) + + return replicas[0] + +} |
