aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chrislu@roblox.com>2021-12-05 16:56:25 -0800
committerchrislu <chrislu@roblox.com>2021-12-05 16:56:25 -0800
commite6c026db65eacbfd7fb10fc95ca3b5452c3efebc (patch)
treec69ef50827b19160f800c1cdbaabc104e68fe7c8
parent53e2dee1775d971ebc217902a7560c60eb44c1ea (diff)
downloadseaweedfs-e6c026db65eacbfd7fb10fc95ca3b5452c3efebc.tar.xz
seaweedfs-e6c026db65eacbfd7fb10fc95ca3b5452c3efebc.zip
volume.fix.replication: fix misplaced volumes
fix https://github.com/chrislusf/seaweedfs/issues/2416
-rw-r--r--weed/shell/command_volume_fix_replication.go60
-rw-r--r--weed/shell/command_volume_fix_replication_test.go138
2 files changed, 194 insertions, 4 deletions
diff --git a/weed/shell/command_volume_fix_replication.go b/weed/shell/command_volume_fix_replication.go
index c003eea91..4a4ed5f9f 100644
--- a/weed/shell/command_volume_fix_replication.go
+++ b/weed/shell/command_volume_fix_replication.go
@@ -89,7 +89,7 @@ func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv,
}
// find all under replicated volumes
- var underReplicatedVolumeIds, overReplicatedVolumeIds []uint32
+ var underReplicatedVolumeIds, overReplicatedVolumeIds, misplacedVolumeIds []uint32
for vid, replicas := range volumeReplicas {
replica := replicas[0]
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replica.info.ReplicaPlacement))
@@ -98,11 +98,20 @@ func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv,
} else if replicaPlacement.GetCopyCount() < len(replicas) {
overReplicatedVolumeIds = append(overReplicatedVolumeIds, vid)
fmt.Fprintf(writer, "volume %d replication %s, but over replicated %+d\n", replica.info.Id, replicaPlacement, len(replicas))
+ } else if isMisplaced(replicas, replicaPlacement) {
+ misplacedVolumeIds = append(misplacedVolumeIds, vid)
+ fmt.Fprintf(writer, "volume %d replication %s is not well placed %+v\n", replica.info.Id, replicaPlacement, replicas)
}
}
if len(overReplicatedVolumeIds) > 0 {
- if err := c.fixOverReplicatedVolumes(commandEnv, writer, takeAction, overReplicatedVolumeIds, volumeReplicas, allLocations); err != nil {
+ if err := c.deleteOneVolume(commandEnv, writer, takeAction, overReplicatedVolumeIds, volumeReplicas, allLocations, pickOneReplicaToDelete); err != nil {
+ return err
+ }
+ }
+
+ if len(misplacedVolumeIds) > 0 {
+ if err := c.deleteOneVolume(commandEnv, writer, takeAction, misplacedVolumeIds, volumeReplicas, allLocations, pickOneMisplacedVolume); err != nil {
return err
}
}
@@ -171,12 +180,14 @@ func collectVolumeReplicaLocations(topologyInfo *master_pb.TopologyInfo) (map[ui
return volumeReplicas, allLocations
}
-func (c *commandVolumeFixReplication) fixOverReplicatedVolumes(commandEnv *CommandEnv, writer io.Writer, takeAction bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location) error {
+type SelectOneVolumeFunc func(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) *VolumeReplica
+
+func (c *commandVolumeFixReplication) deleteOneVolume(commandEnv *CommandEnv, writer io.Writer, takeAction bool, overReplicatedVolumeIds []uint32, volumeReplicas map[uint32][]*VolumeReplica, allLocations []location, selectOneVolumeFn SelectOneVolumeFunc) error {
for _, vid := range overReplicatedVolumeIds {
replicas := volumeReplicas[vid]
replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(replicas[0].info.ReplicaPlacement))
- replica := pickOneReplicaToDelete(replicas, replicaPlacement)
+ replica := selectOneVolumeFn(replicas, replicaPlacement)
// check collection name pattern
if *c.collectionPattern != "" {
@@ -495,3 +506,44 @@ func pickOneReplicaToDelete(replicas []*VolumeReplica, replicaPlacement *super_b
return replicas[0]
}
+
+// check and fix misplaced volumes
+
+func isMisplaced(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) bool {
+
+ for i := 0; i < len(replicas); i++ {
+ others := otherThan(replicas, i)
+ if satisfyReplicaPlacement(replicaPlacement, others, *replicas[i].location) {
+ return false
+ }
+ }
+
+ return true
+
+}
+
+func otherThan(replicas []*VolumeReplica, index int) (others []*VolumeReplica) {
+ for i := 0; i < len(replicas); i++ {
+ if index != i {
+ others = append(others, replicas[i])
+ }
+ }
+ return
+}
+
+func pickOneMisplacedVolume(replicas []*VolumeReplica, replicaPlacement *super_block.ReplicaPlacement) (toDelete *VolumeReplica) {
+
+ var deletionCandidates []*VolumeReplica
+ for i := 0; i < len(replicas); i++ {
+ others := otherThan(replicas, i)
+ if !isMisplaced(others, replicaPlacement) {
+ deletionCandidates = append(deletionCandidates, replicas[i])
+ }
+ }
+ if len(deletionCandidates) > 0 {
+ return pickOneReplicaToDelete(deletionCandidates, replicaPlacement)
+ }
+
+ return pickOneReplicaToDelete(replicas, replicaPlacement)
+
+}
diff --git a/weed/shell/command_volume_fix_replication_test.go b/weed/shell/command_volume_fix_replication_test.go
index 4d9cd8188..5212fd2ed 100644
--- a/weed/shell/command_volume_fix_replication_test.go
+++ b/weed/shell/command_volume_fix_replication_test.go
@@ -294,3 +294,141 @@ func runTests(tests []testcase, t *testing.T) {
}
}
}
+
+func TestMisplacedChecking(t *testing.T) {
+
+ var tests = []testcase{
+ {
+ name: "test 001",
+ replication: "001",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ },
+ expected: true,
+ },
+ {
+ name: "test 010",
+ replication: "010",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "test 011",
+ replication: "011",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "test 110",
+ replication: "110",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ },
+ expected: false,
+ },
+ {
+ name: "test 100",
+ replication: "100",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ },
+ expected: true,
+ },
+ }
+
+ for _, tt := range tests {
+ replicaPlacement, _ := super_block.NewReplicaPlacementFromString(tt.replication)
+ println("replication:", tt.replication, "expected", tt.expected, "name:", tt.name)
+ if isMisplaced(tt.replicas, replicaPlacement) != tt.expected {
+ t.Errorf("%s: expect %v %v %+v",
+ tt.name, tt.expected, tt.replication, tt.replicas)
+ }
+ }
+
+}
+
+func TestPickingMisplacedVolumeToDelete(t *testing.T) {
+
+ var tests = []testcase{
+ {
+ name: "test 001",
+ replication: "001",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ info: &master_pb.VolumeInformationMessage{
+ Size: 100,
+ },
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ info: &master_pb.VolumeInformationMessage{
+ Size: 99,
+ },
+ },
+ },
+ possibleLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ {
+ name: "test 100",
+ replication: "100",
+ replicas: []*VolumeReplica{
+ {
+ location: &location{"dc1", "r1", &master_pb.DataNodeInfo{Id: "dn1"}},
+ info: &master_pb.VolumeInformationMessage{
+ Size: 100,
+ },
+ },
+ {
+ location: &location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ info: &master_pb.VolumeInformationMessage{
+ Size: 99,
+ },
+ },
+ },
+ possibleLocation: location{"dc1", "r2", &master_pb.DataNodeInfo{Id: "dn2"}},
+ },
+ }
+
+ for _, tt := range tests {
+ replicaPlacement, _ := super_block.NewReplicaPlacementFromString(tt.replication)
+ println("replication:", tt.replication, "name:", tt.name)
+ if x := pickOneMisplacedVolume(tt.replicas, replicaPlacement); x.location.dataNode.Id != tt.possibleLocation.dataNode.Id {
+ t.Errorf("%s: picked %+v for replication %v",
+ tt.name, x.location.dataNode.Id, tt.replication)
+ } else {
+ t.Logf("%s: picked %+v %v",
+ tt.name, x.location.dataNode.Id, tt.replication)
+ }
+ }
+
+}