aboutsummaryrefslogtreecommitdiff
path: root/weed/topology
diff options
context:
space:
mode:
authorbingoohuang <bingoo.huang@gmail.com>2021-04-26 17:19:35 +0800
committerbingoohuang <bingoo.huang@gmail.com>2021-04-26 17:19:35 +0800
commitd861cbd81b75b6684c971ac00e33685e6575b833 (patch)
tree301805fef4aa5d0096bfb1510536f7a009b661e7 /weed/topology
parent70da715d8d917527291b35fb069fac077d17b868 (diff)
parent4ee58922eff61a5a4ca29c0b4829b097a498549e (diff)
downloadseaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.tar.xz
seaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.zip
Merge branch 'master' of https://github.com/bingoohuang/seaweedfs
Diffstat (limited to 'weed/topology')
-rw-r--r--weed/topology/allocate_volume.go1
-rw-r--r--weed/topology/collection.go28
-rw-r--r--weed/topology/data_center.go15
-rw-r--r--weed/topology/data_node.go232
-rw-r--r--weed/topology/data_node_ec.go129
-rw-r--r--weed/topology/disk.go270
-rw-r--r--weed/topology/disk_ec.go84
-rw-r--r--weed/topology/node.go205
-rw-r--r--weed/topology/rack.go20
-rw-r--r--weed/topology/store_replicate.go84
-rw-r--r--weed/topology/topology.go79
-rw-r--r--weed/topology/topology_ec.go3
-rw-r--r--weed/topology/topology_event_handling.go23
-rw-r--r--weed/topology/topology_map.go16
-rw-r--r--weed/topology/topology_test.go68
-rw-r--r--weed/topology/topology_vacuum.go62
-rw-r--r--weed/topology/volume_growth.go68
-rw-r--r--weed/topology/volume_growth_test.go219
-rw-r--r--weed/topology/volume_layout.go181
-rw-r--r--weed/topology/volume_layout_test.go116
-rw-r--r--weed/topology/volume_location_list.go13
21 files changed, 1455 insertions, 461 deletions
diff --git a/weed/topology/allocate_volume.go b/weed/topology/allocate_volume.go
index e5dc48652..39c24ab04 100644
--- a/weed/topology/allocate_volume.go
+++ b/weed/topology/allocate_volume.go
@@ -24,6 +24,7 @@ func AllocateVolume(dn *DataNode, grpcDialOption grpc.DialOption, vid needle.Vol
Ttl: option.Ttl.String(),
Preallocate: option.Prealloacte,
MemoryMapMaxSizeMb: option.MemoryMapMaxSizeMb,
+ DiskType: string(option.DiskType),
})
return deleteErr
})
diff --git a/weed/topology/collection.go b/weed/topology/collection.go
index 7a611d904..a14b68851 100644
--- a/weed/topology/collection.go
+++ b/weed/topology/collection.go
@@ -2,6 +2,7 @@ package topology
import (
"fmt"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
@@ -11,11 +12,16 @@ import (
type Collection struct {
Name string
volumeSizeLimit uint64
+ replicationAsMin bool
storageType2VolumeLayout *util.ConcurrentReadMap
}
-func NewCollection(name string, volumeSizeLimit uint64) *Collection {
- c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit}
+func NewCollection(name string, volumeSizeLimit uint64, replicationAsMin bool) *Collection {
+ c := &Collection{
+ Name: name,
+ volumeSizeLimit: volumeSizeLimit,
+ replicationAsMin: replicationAsMin,
+ }
c.storageType2VolumeLayout = util.NewConcurrentReadMap()
return c
}
@@ -24,17 +30,31 @@ func (c *Collection) String() string {
return fmt.Sprintf("Name:%s, volumeSizeLimit:%d, storageType2VolumeLayout:%v", c.Name, c.volumeSizeLimit, c.storageType2VolumeLayout)
}
-func (c *Collection) GetOrCreateVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL) *VolumeLayout {
+func (c *Collection) GetOrCreateVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) *VolumeLayout {
keyString := rp.String()
if ttl != nil {
keyString += ttl.String()
}
+ if diskType != types.HardDriveType {
+ keyString += string(diskType)
+ }
vl := c.storageType2VolumeLayout.Get(keyString, func() interface{} {
- return NewVolumeLayout(rp, ttl, c.volumeSizeLimit)
+ return NewVolumeLayout(rp, ttl, diskType, c.volumeSizeLimit, c.replicationAsMin)
})
return vl.(*VolumeLayout)
}
+func (c *Collection) DeleteVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) {
+ keyString := rp.String()
+ if ttl != nil {
+ keyString += ttl.String()
+ }
+ if diskType != types.HardDriveType {
+ keyString += string(diskType)
+ }
+ c.storageType2VolumeLayout.Delete(keyString)
+}
+
func (c *Collection) Lookup(vid needle.VolumeId) []*DataNode {
for _, vl := range c.storageType2VolumeLayout.Items() {
if vl != nil {
diff --git a/weed/topology/data_center.go b/weed/topology/data_center.go
index dc3accb71..60d91ba6d 100644
--- a/weed/topology/data_center.go
+++ b/weed/topology/data_center.go
@@ -1,6 +1,8 @@
package topology
-import "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
+import (
+ "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
+)
type DataCenter struct {
NodeImpl
@@ -10,6 +12,7 @@ func NewDataCenter(id string) *DataCenter {
dc := &DataCenter{}
dc.id = NodeId(id)
dc.nodeType = "DataCenter"
+ dc.diskUsages = newDiskUsages()
dc.children = make(map[NodeId]Node)
dc.NodeImpl.value = dc
return dc
@@ -30,8 +33,6 @@ func (dc *DataCenter) GetOrCreateRack(rackName string) *Rack {
func (dc *DataCenter) ToMap() interface{} {
m := make(map[string]interface{})
m["Id"] = dc.Id()
- m["Max"] = dc.GetMaxVolumeCount()
- m["Free"] = dc.FreeSpace()
var racks []interface{}
for _, c := range dc.Children() {
rack := c.(*Rack)
@@ -43,12 +44,8 @@ func (dc *DataCenter) ToMap() interface{} {
func (dc *DataCenter) ToDataCenterInfo() *master_pb.DataCenterInfo {
m := &master_pb.DataCenterInfo{
- Id: string(dc.Id()),
- VolumeCount: uint64(dc.GetVolumeCount()),
- MaxVolumeCount: uint64(dc.GetMaxVolumeCount()),
- FreeVolumeCount: uint64(dc.FreeSpace()),
- ActiveVolumeCount: uint64(dc.GetActiveVolumeCount()),
- RemoteVolumeCount: uint64(dc.GetRemoteVolumeCount()),
+ Id: string(dc.Id()),
+ DiskInfos: dc.diskUsages.ToDiskInfo(),
}
for _, c := range dc.Children() {
rack := c.(*Rack)
diff --git a/weed/topology/data_node.go b/weed/topology/data_node.go
index 617341e54..69f739dd5 100644
--- a/weed/topology/data_node.go
+++ b/weed/topology/data_node.go
@@ -2,12 +2,11 @@ package topology
import (
"fmt"
- "strconv"
- "sync"
-
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
- "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "strconv"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage"
@@ -15,122 +14,161 @@ import (
type DataNode struct {
NodeImpl
- volumes map[needle.VolumeId]storage.VolumeInfo
- Ip string
- Port int
- PublicUrl string
- LastSeen int64 // unix time in seconds
- ecShards map[needle.VolumeId]*erasure_coding.EcVolumeInfo
- ecShardsLock sync.RWMutex
+ Ip string
+ Port int
+ PublicUrl string
+ LastSeen int64 // unix time in seconds
}
func NewDataNode(id string) *DataNode {
- s := &DataNode{}
- s.id = NodeId(id)
- s.nodeType = "DataNode"
- s.volumes = make(map[needle.VolumeId]storage.VolumeInfo)
- s.ecShards = make(map[needle.VolumeId]*erasure_coding.EcVolumeInfo)
- s.NodeImpl.value = s
- return s
+ dn := &DataNode{}
+ dn.id = NodeId(id)
+ dn.nodeType = "DataNode"
+ dn.diskUsages = newDiskUsages()
+ dn.children = make(map[NodeId]Node)
+ dn.NodeImpl.value = dn
+ return dn
}
func (dn *DataNode) String() string {
dn.RLock()
defer dn.RUnlock()
- return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl)
+ return fmt.Sprintf("Node:%s, Ip:%s, Port:%d, PublicUrl:%s", dn.NodeImpl.String(), dn.Ip, dn.Port, dn.PublicUrl)
}
-func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (isNew bool) {
+func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (isNew, isChangedRO bool) {
dn.Lock()
defer dn.Unlock()
- if oldV, ok := dn.volumes[v.Id]; !ok {
- dn.volumes[v.Id] = v
- dn.UpAdjustVolumeCountDelta(1)
- if v.IsRemote() {
- dn.UpAdjustRemoteVolumeCountDelta(1)
- }
- if !v.ReadOnly {
- dn.UpAdjustActiveVolumeCountDelta(1)
- }
- dn.UpAdjustMaxVolumeId(v.Id)
- isNew = true
- } else {
- if oldV.IsRemote() != v.IsRemote() {
- if v.IsRemote() {
- dn.UpAdjustRemoteVolumeCountDelta(1)
- }
- if oldV.IsRemote() {
- dn.UpAdjustRemoteVolumeCountDelta(-1)
- }
- }
- dn.volumes[v.Id] = v
+ return dn.doAddOrUpdateVolume(v)
+}
+
+func (dn *DataNode) getOrCreateDisk(diskType string) *Disk {
+ c, found := dn.children[NodeId(diskType)]
+ if !found {
+ c = NewDisk(diskType)
+ dn.doLinkChildNode(c)
}
- return
+ disk := c.(*Disk)
+ return disk
+}
+
+func (dn *DataNode) doAddOrUpdateVolume(v storage.VolumeInfo) (isNew, isChangedRO bool) {
+ disk := dn.getOrCreateDisk(v.DiskType)
+ return disk.AddOrUpdateVolume(v)
}
-func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (newVolumes, deletedVolumes []storage.VolumeInfo) {
+// UpdateVolumes detects new/deleted/changed volumes on a volume server
+// used in master to notify master clients of these changes.
+func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (newVolumes, deletedVolumes, changeRO []storage.VolumeInfo) {
+
actualVolumeMap := make(map[needle.VolumeId]storage.VolumeInfo)
for _, v := range actualVolumes {
actualVolumeMap[v.Id] = v
}
+
dn.Lock()
- for vid, v := range dn.volumes {
+ defer dn.Unlock()
+
+ existingVolumes := dn.getVolumes()
+
+ for _, v := range existingVolumes {
+ vid := v.Id
if _, ok := actualVolumeMap[vid]; !ok {
glog.V(0).Infoln("Deleting volume id:", vid)
- delete(dn.volumes, vid)
+ disk := dn.getOrCreateDisk(v.DiskType)
+ delete(disk.volumes, vid)
deletedVolumes = append(deletedVolumes, v)
- dn.UpAdjustVolumeCountDelta(-1)
+
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(v.DiskType))
+ deltaDiskUsage.volumeCount = -1
if v.IsRemote() {
- dn.UpAdjustRemoteVolumeCountDelta(-1)
+ deltaDiskUsage.remoteVolumeCount = -1
}
if !v.ReadOnly {
- dn.UpAdjustActiveVolumeCountDelta(-1)
+ deltaDiskUsage.activeVolumeCount = -1
}
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
}
}
- dn.Unlock()
for _, v := range actualVolumes {
- isNew := dn.AddOrUpdateVolume(v)
+ isNew, isChangedRO := dn.doAddOrUpdateVolume(v)
if isNew {
newVolumes = append(newVolumes, v)
}
+ if isChangedRO {
+ changeRO = append(changeRO, v)
+ }
}
return
}
-func (dn *DataNode) DeltaUpdateVolumes(newlVolumes, deletedVolumes []storage.VolumeInfo) {
+func (dn *DataNode) DeltaUpdateVolumes(newVolumes, deletedVolumes []storage.VolumeInfo) {
dn.Lock()
+ defer dn.Unlock()
+
for _, v := range deletedVolumes {
- delete(dn.volumes, v.Id)
- dn.UpAdjustVolumeCountDelta(-1)
+ disk := dn.getOrCreateDisk(v.DiskType)
+ delete(disk.volumes, v.Id)
+
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(v.DiskType))
+ deltaDiskUsage.volumeCount = -1
if v.IsRemote() {
- dn.UpAdjustRemoteVolumeCountDelta(-1)
+ deltaDiskUsage.remoteVolumeCount = -1
}
if !v.ReadOnly {
- dn.UpAdjustActiveVolumeCountDelta(-1)
+ deltaDiskUsage.activeVolumeCount = -1
}
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
}
- dn.Unlock()
- for _, v := range newlVolumes {
- dn.AddOrUpdateVolume(v)
+ for _, v := range newVolumes {
+ dn.doAddOrUpdateVolume(v)
}
return
}
+func (dn *DataNode) AdjustMaxVolumeCounts(maxVolumeCounts map[string]uint32) {
+ deltaDiskUsages := newDiskUsages()
+ for diskType, maxVolumeCount := range maxVolumeCounts {
+ if maxVolumeCount == 0 {
+ // the volume server may have set the max to zero
+ continue
+ }
+ dt := types.ToDiskType(diskType)
+ currentDiskUsage := dn.diskUsages.getOrCreateDisk(dt)
+ if currentDiskUsage.maxVolumeCount == int64(maxVolumeCount) {
+ continue
+ }
+ disk := dn.getOrCreateDisk(dt.String())
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(dt)
+ deltaDiskUsage.maxVolumeCount = int64(maxVolumeCount) - currentDiskUsage.maxVolumeCount
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
+ }
+}
+
func (dn *DataNode) GetVolumes() (ret []storage.VolumeInfo) {
dn.RLock()
- for _, v := range dn.volumes {
- ret = append(ret, v)
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ ret = append(ret, disk.GetVolumes()...)
}
dn.RUnlock()
return ret
}
-func (dn *DataNode) GetVolumesById(id needle.VolumeId) (storage.VolumeInfo, error) {
+func (dn *DataNode) GetVolumesById(id needle.VolumeId) (vInfo storage.VolumeInfo, err error) {
dn.RLock()
defer dn.RUnlock()
- vInfo, ok := dn.volumes[id]
- if ok {
+ found := false
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ vInfo, found = disk.volumes[id]
+ if found {
+ break
+ }
+ }
+ if found {
return vInfo, nil
} else {
return storage.VolumeInfo{}, fmt.Errorf("volumeInfo not found")
@@ -138,7 +176,10 @@ func (dn *DataNode) GetVolumesById(id needle.VolumeId) (storage.VolumeInfo, erro
}
func (dn *DataNode) GetDataCenter() *DataCenter {
- return dn.Parent().Parent().(*NodeImpl).value.(*DataCenter)
+ rack := dn.Parent()
+ dcNode := rack.Parent()
+ dcValue := dcNode.GetValue()
+ return dcValue.(*DataCenter)
}
func (dn *DataNode) GetRack() *Rack {
@@ -165,28 +206,61 @@ func (dn *DataNode) Url() string {
func (dn *DataNode) ToMap() interface{} {
ret := make(map[string]interface{})
ret["Url"] = dn.Url()
- ret["Volumes"] = dn.GetVolumeCount()
- ret["EcShards"] = dn.GetEcShardCount()
- ret["Max"] = dn.GetMaxVolumeCount()
- ret["Free"] = dn.FreeSpace()
ret["PublicUrl"] = dn.PublicUrl
+
+ // aggregated volume info
+ var volumeCount, ecShardCount, maxVolumeCount int64
+ var volumeIds string
+ for _, diskUsage := range dn.diskUsages.usages {
+ volumeCount += diskUsage.volumeCount
+ ecShardCount += diskUsage.ecShardCount
+ maxVolumeCount += diskUsage.maxVolumeCount
+ }
+
+ for _, disk := range dn.Children() {
+ d := disk.(*Disk)
+ volumeIds += " " + d.GetVolumeIds()
+ }
+
+ ret["Volumes"] = volumeCount
+ ret["EcShards"] = ecShardCount
+ ret["Max"] = maxVolumeCount
+ ret["VolumeIds"] = volumeIds
+
return ret
}
func (dn *DataNode) ToDataNodeInfo() *master_pb.DataNodeInfo {
m := &master_pb.DataNodeInfo{
- Id: string(dn.Id()),
- VolumeCount: uint64(dn.GetVolumeCount()),
- MaxVolumeCount: uint64(dn.GetMaxVolumeCount()),
- FreeVolumeCount: uint64(dn.FreeSpace()),
- ActiveVolumeCount: uint64(dn.GetActiveVolumeCount()),
- RemoteVolumeCount: uint64(dn.GetRemoteVolumeCount()),
- }
- for _, v := range dn.GetVolumes() {
- m.VolumeInfos = append(m.VolumeInfos, v.ToVolumeInformationMessage())
+ Id: string(dn.Id()),
+ DiskInfos: make(map[string]*master_pb.DiskInfo),
}
- for _, ecv := range dn.GetEcShards() {
- m.EcShardInfos = append(m.EcShardInfos, ecv.ToVolumeEcShardInformationMessage())
+ for _, c := range dn.Children() {
+ disk := c.(*Disk)
+ m.DiskInfos[string(disk.Id())] = disk.ToDiskInfo()
}
return m
}
+
+// GetVolumeIds returns the human readable volume ids limited to count of max 100.
+func (dn *DataNode) GetVolumeIds() string {
+ dn.RLock()
+ defer dn.RUnlock()
+ existingVolumes := dn.getVolumes()
+ ids := make([]int, 0, len(existingVolumes))
+
+ for k := range existingVolumes {
+ ids = append(ids, int(k))
+ }
+
+ return util.HumanReadableIntsMax(100, ids...)
+}
+
+func (dn *DataNode) getVolumes() []storage.VolumeInfo {
+ var existingVolumes []storage.VolumeInfo
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ existingVolumes = append(existingVolumes, disk.GetVolumes()...)
+ }
+ return existingVolumes
+}
diff --git a/weed/topology/data_node_ec.go b/weed/topology/data_node_ec.go
index 75c8784fe..330b16b24 100644
--- a/weed/topology/data_node_ec.go
+++ b/weed/topology/data_node_ec.go
@@ -3,12 +3,14 @@ package topology
import (
"github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
)
func (dn *DataNode) GetEcShards() (ret []*erasure_coding.EcVolumeInfo) {
dn.RLock()
- for _, ecVolumeInfo := range dn.ecShards {
- ret = append(ret, ecVolumeInfo)
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ ret = append(ret, disk.GetEcShards()...)
}
dn.RUnlock()
return ret
@@ -21,10 +23,17 @@ func (dn *DataNode) UpdateEcShards(actualShards []*erasure_coding.EcVolumeInfo)
actualEcShardMap[ecShards.VolumeId] = ecShards
}
- // found out the newShards and deletedShards
+ existingEcShards := dn.GetEcShards()
+
+ // find out the newShards and deletedShards
var newShardCount, deletedShardCount int
- dn.ecShardsLock.RLock()
- for vid, ecShards := range dn.ecShards {
+ for _, ecShards := range existingEcShards {
+
+ disk := dn.getOrCreateDisk(ecShards.DiskType)
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(ecShards.DiskType))
+
+ vid := ecShards.VolumeId
if actualEcShards, ok := actualEcShardMap[vid]; !ok {
// dn registered ec shards not found in the new set of ec shards
deletedShards = append(deletedShards, ecShards)
@@ -42,26 +51,60 @@ func (dn *DataNode) UpdateEcShards(actualShards []*erasure_coding.EcVolumeInfo)
deletedShardCount += d.ShardIdCount()
}
}
+
+ deltaDiskUsage.ecShardCount = int64(newShardCount - deletedShardCount)
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
+
}
+
for _, ecShards := range actualShards {
- if _, found := dn.ecShards[ecShards.VolumeId]; !found {
- newShards = append(newShards, ecShards)
- newShardCount += ecShards.ShardIdCount()
+ if dn.hasEcShards(ecShards.VolumeId) {
+ continue
}
+
+ newShards = append(newShards, ecShards)
+
+ disk := dn.getOrCreateDisk(ecShards.DiskType)
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(ecShards.DiskType))
+ deltaDiskUsage.ecShardCount = int64(ecShards.ShardIdCount())
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
}
- dn.ecShardsLock.RUnlock()
if len(newShards) > 0 || len(deletedShards) > 0 {
// if changed, set to the new ec shard map
- dn.ecShardsLock.Lock()
- dn.ecShards = actualEcShardMap
- dn.UpAdjustEcShardCountDelta(int64(newShardCount - deletedShardCount))
- dn.ecShardsLock.Unlock()
+ dn.doUpdateEcShards(actualShards)
}
return
}
+func (dn *DataNode) hasEcShards(volumeId needle.VolumeId) (found bool) {
+ dn.RLock()
+ defer dn.RUnlock()
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ _, found = disk.ecShards[volumeId]
+ if found {
+ return
+ }
+ }
+ return
+}
+
+func (dn *DataNode) doUpdateEcShards(actualShards []*erasure_coding.EcVolumeInfo) {
+ dn.Lock()
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ disk.ecShards = make(map[needle.VolumeId]*erasure_coding.EcVolumeInfo)
+ }
+ for _, shard := range actualShards {
+ disk := dn.getOrCreateDisk(shard.DiskType)
+ disk.ecShards[shard.VolumeId] = shard
+ }
+ dn.Unlock()
+}
+
func (dn *DataNode) DeltaUpdateEcShards(newShards, deletedShards []*erasure_coding.EcVolumeInfo) {
for _, newShard := range newShards {
@@ -75,61 +118,25 @@ func (dn *DataNode) DeltaUpdateEcShards(newShards, deletedShards []*erasure_codi
}
func (dn *DataNode) AddOrUpdateEcShard(s *erasure_coding.EcVolumeInfo) {
- dn.ecShardsLock.Lock()
- defer dn.ecShardsLock.Unlock()
-
- delta := 0
- if existing, ok := dn.ecShards[s.VolumeId]; !ok {
- dn.ecShards[s.VolumeId] = s
- delta = s.ShardBits.ShardIdCount()
- } else {
- oldCount := existing.ShardBits.ShardIdCount()
- existing.ShardBits = existing.ShardBits.Plus(s.ShardBits)
- delta = existing.ShardBits.ShardIdCount() - oldCount
- }
-
- dn.UpAdjustEcShardCountDelta(int64(delta))
-
+ disk := dn.getOrCreateDisk(s.DiskType)
+ disk.AddOrUpdateEcShard(s)
}
func (dn *DataNode) DeleteEcShard(s *erasure_coding.EcVolumeInfo) {
- dn.ecShardsLock.Lock()
- defer dn.ecShardsLock.Unlock()
-
- if existing, ok := dn.ecShards[s.VolumeId]; ok {
- oldCount := existing.ShardBits.ShardIdCount()
- existing.ShardBits = existing.ShardBits.Minus(s.ShardBits)
- delta := existing.ShardBits.ShardIdCount() - oldCount
- dn.UpAdjustEcShardCountDelta(int64(delta))
- if existing.ShardBits.ShardIdCount() == 0 {
- delete(dn.ecShards, s.VolumeId)
- }
- }
-
+ disk := dn.getOrCreateDisk(s.DiskType)
+ disk.DeleteEcShard(s)
}
-func (dn *DataNode) HasVolumesById(id needle.VolumeId) (hasVolumeId bool) {
+func (dn *DataNode) HasVolumesById(volumeId needle.VolumeId) (hasVolumeId bool) {
- // check whether normal volumes has this volume id
dn.RLock()
- _, ok := dn.volumes[id]
- if ok {
- hasVolumeId = true
- }
- dn.RUnlock()
-
- if hasVolumeId {
- return
- }
-
- // check whether ec shards has this volume id
- dn.ecShardsLock.RLock()
- _, ok = dn.ecShards[id]
- if ok {
- hasVolumeId = true
+ defer dn.RUnlock()
+ for _, c := range dn.children {
+ disk := c.(*Disk)
+ if disk.HasVolumesById(volumeId) {
+ return true
+ }
}
- dn.ecShardsLock.RUnlock()
-
- return
+ return false
}
diff --git a/weed/topology/disk.go b/weed/topology/disk.go
new file mode 100644
index 000000000..a085f8dff
--- /dev/null
+++ b/weed/topology/disk.go
@@ -0,0 +1,270 @@
+package topology
+
+import (
+ "fmt"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
+ "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
+ "github.com/chrislusf/seaweedfs/weed/storage/needle"
+
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+type Disk struct {
+ NodeImpl
+ volumes map[needle.VolumeId]storage.VolumeInfo
+ ecShards map[needle.VolumeId]*erasure_coding.EcVolumeInfo
+ ecShardsLock sync.RWMutex
+}
+
+func NewDisk(diskType string) *Disk {
+ s := &Disk{}
+ s.id = NodeId(diskType)
+ s.nodeType = "Disk"
+ s.diskUsages = newDiskUsages()
+ s.volumes = make(map[needle.VolumeId]storage.VolumeInfo, 2)
+ s.ecShards = make(map[needle.VolumeId]*erasure_coding.EcVolumeInfo, 2)
+ s.NodeImpl.value = s
+ return s
+}
+
+type DiskUsages struct {
+ sync.RWMutex
+ usages map[types.DiskType]*DiskUsageCounts
+}
+
+func newDiskUsages() *DiskUsages {
+ return &DiskUsages{
+ usages: make(map[types.DiskType]*DiskUsageCounts),
+ }
+}
+
+func (d *DiskUsages) negative() *DiskUsages {
+ d.RLock()
+ defer d.RUnlock()
+ t := newDiskUsages()
+ for diskType, b := range d.usages {
+ a := t.getOrCreateDisk(diskType)
+ a.volumeCount = -b.volumeCount
+ a.remoteVolumeCount = -b.remoteVolumeCount
+ a.activeVolumeCount = -b.activeVolumeCount
+ a.ecShardCount = -b.ecShardCount
+ a.maxVolumeCount = -b.maxVolumeCount
+
+ }
+ return t
+}
+
+func (d *DiskUsages) ToDiskInfo() map[string]*master_pb.DiskInfo {
+ ret := make(map[string]*master_pb.DiskInfo)
+ for diskType, diskUsageCounts := range d.usages {
+ m := &master_pb.DiskInfo{
+ VolumeCount: uint64(diskUsageCounts.volumeCount),
+ MaxVolumeCount: uint64(diskUsageCounts.maxVolumeCount),
+ FreeVolumeCount: uint64(diskUsageCounts.maxVolumeCount - diskUsageCounts.volumeCount),
+ ActiveVolumeCount: uint64(diskUsageCounts.activeVolumeCount),
+ RemoteVolumeCount: uint64(diskUsageCounts.remoteVolumeCount),
+ }
+ ret[string(diskType)] = m
+ }
+ return ret
+}
+
+func (d *DiskUsages) FreeSpace() (freeSpace int64) {
+ d.RLock()
+ defer d.RUnlock()
+ for _, diskUsage := range d.usages {
+ freeSpace += diskUsage.FreeSpace()
+ }
+ return
+}
+
+func (d *DiskUsages) GetMaxVolumeCount() (maxVolumeCount int64) {
+ d.RLock()
+ defer d.RUnlock()
+ for _, diskUsage := range d.usages {
+ maxVolumeCount += diskUsage.maxVolumeCount
+ }
+ return
+}
+
+type DiskUsageCounts struct {
+ volumeCount int64
+ remoteVolumeCount int64
+ activeVolumeCount int64
+ ecShardCount int64
+ maxVolumeCount int64
+}
+
+func (a *DiskUsageCounts) addDiskUsageCounts(b *DiskUsageCounts) {
+ a.volumeCount += b.volumeCount
+ a.remoteVolumeCount += b.remoteVolumeCount
+ a.activeVolumeCount += b.activeVolumeCount
+ a.ecShardCount += b.ecShardCount
+ a.maxVolumeCount += b.maxVolumeCount
+}
+
+func (a *DiskUsageCounts) FreeSpace() int64 {
+ freeVolumeSlotCount := a.maxVolumeCount + a.remoteVolumeCount - a.volumeCount
+ if a.ecShardCount > 0 {
+ freeVolumeSlotCount = freeVolumeSlotCount - a.ecShardCount/erasure_coding.DataShardsCount - 1
+ }
+ return freeVolumeSlotCount
+}
+
+func (a *DiskUsageCounts) minus(b *DiskUsageCounts) *DiskUsageCounts {
+ return &DiskUsageCounts{
+ volumeCount: a.volumeCount - b.volumeCount,
+ remoteVolumeCount: a.remoteVolumeCount - b.remoteVolumeCount,
+ activeVolumeCount: a.activeVolumeCount - b.activeVolumeCount,
+ ecShardCount: a.ecShardCount - b.ecShardCount,
+ maxVolumeCount: a.maxVolumeCount - b.maxVolumeCount,
+ }
+}
+
+func (du *DiskUsages) getOrCreateDisk(diskType types.DiskType) *DiskUsageCounts {
+ du.Lock()
+ defer du.Unlock()
+ t, found := du.usages[diskType]
+ if found {
+ return t
+ }
+ t = &DiskUsageCounts{}
+ du.usages[diskType] = t
+ return t
+}
+
+func (d *Disk) String() string {
+ d.RLock()
+ defer d.RUnlock()
+ return fmt.Sprintf("Disk:%s, volumes:%v, ecShards:%v", d.NodeImpl.String(), d.volumes, d.ecShards)
+}
+
+func (d *Disk) AddOrUpdateVolume(v storage.VolumeInfo) (isNew, isChangedRO bool) {
+ d.Lock()
+ defer d.Unlock()
+ return d.doAddOrUpdateVolume(v)
+}
+
+func (d *Disk) doAddOrUpdateVolume(v storage.VolumeInfo) (isNew, isChangedRO bool) {
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(v.DiskType))
+ if oldV, ok := d.volumes[v.Id]; !ok {
+ d.volumes[v.Id] = v
+ deltaDiskUsage.volumeCount = 1
+ if v.IsRemote() {
+ deltaDiskUsage.remoteVolumeCount = 1
+ }
+ if !v.ReadOnly {
+ deltaDiskUsage.activeVolumeCount = 1
+ }
+ d.UpAdjustMaxVolumeId(v.Id)
+ d.UpAdjustDiskUsageDelta(deltaDiskUsages)
+ isNew = true
+ } else {
+ if oldV.IsRemote() != v.IsRemote() {
+ if v.IsRemote() {
+ deltaDiskUsage.remoteVolumeCount = 1
+ }
+ if oldV.IsRemote() {
+ deltaDiskUsage.remoteVolumeCount = -1
+ }
+ d.UpAdjustDiskUsageDelta(deltaDiskUsages)
+ }
+ isChangedRO = d.volumes[v.Id].ReadOnly != v.ReadOnly
+ d.volumes[v.Id] = v
+ }
+ return
+}
+
+func (d *Disk) GetVolumes() (ret []storage.VolumeInfo) {
+ d.RLock()
+ for _, v := range d.volumes {
+ ret = append(ret, v)
+ }
+ d.RUnlock()
+ return ret
+}
+
+func (d *Disk) GetVolumesById(id needle.VolumeId) (storage.VolumeInfo, error) {
+ d.RLock()
+ defer d.RUnlock()
+ vInfo, ok := d.volumes[id]
+ if ok {
+ return vInfo, nil
+ } else {
+ return storage.VolumeInfo{}, fmt.Errorf("volumeInfo not found")
+ }
+}
+
+func (d *Disk) GetDataCenter() *DataCenter {
+ dn := d.Parent()
+ rack := dn.Parent()
+ dcNode := rack.Parent()
+ dcValue := dcNode.GetValue()
+ return dcValue.(*DataCenter)
+}
+
+func (d *Disk) GetRack() *Rack {
+ return d.Parent().Parent().(*NodeImpl).value.(*Rack)
+}
+
+func (d *Disk) GetTopology() *Topology {
+ p := d.Parent()
+ for p.Parent() != nil {
+ p = p.Parent()
+ }
+ t := p.(*Topology)
+ return t
+}
+
+func (d *Disk) ToMap() interface{} {
+ ret := make(map[string]interface{})
+ diskUsage := d.diskUsages.getOrCreateDisk(types.ToDiskType(string(d.Id())))
+ ret["Volumes"] = diskUsage.volumeCount
+ ret["VolumeIds"] = d.GetVolumeIds()
+ ret["EcShards"] = diskUsage.ecShardCount
+ ret["Max"] = diskUsage.maxVolumeCount
+ ret["Free"] = d.FreeSpace()
+ return ret
+}
+
+func (d *Disk) FreeSpace() int64 {
+ t := d.diskUsages.getOrCreateDisk(types.ToDiskType(string(d.Id())))
+ return t.FreeSpace()
+}
+
+func (d *Disk) ToDiskInfo() *master_pb.DiskInfo {
+ diskUsage := d.diskUsages.getOrCreateDisk(types.ToDiskType(string(d.Id())))
+ m := &master_pb.DiskInfo{
+ Type: string(d.Id()),
+ VolumeCount: uint64(diskUsage.volumeCount),
+ MaxVolumeCount: uint64(diskUsage.maxVolumeCount),
+ FreeVolumeCount: uint64(diskUsage.maxVolumeCount - diskUsage.volumeCount),
+ ActiveVolumeCount: uint64(diskUsage.activeVolumeCount),
+ RemoteVolumeCount: uint64(diskUsage.remoteVolumeCount),
+ }
+ for _, v := range d.GetVolumes() {
+ m.VolumeInfos = append(m.VolumeInfos, v.ToVolumeInformationMessage())
+ }
+ for _, ecv := range d.GetEcShards() {
+ m.EcShardInfos = append(m.EcShardInfos, ecv.ToVolumeEcShardInformationMessage())
+ }
+ return m
+}
+
+// GetVolumeIds returns the human readable volume ids limited to count of max 100.
+func (d *Disk) GetVolumeIds() string {
+ d.RLock()
+ defer d.RUnlock()
+ ids := make([]int, 0, len(d.volumes))
+
+ for k := range d.volumes {
+ ids = append(ids, int(k))
+ }
+
+ return util.HumanReadableIntsMax(100, ids...)
+}
diff --git a/weed/topology/disk_ec.go b/weed/topology/disk_ec.go
new file mode 100644
index 000000000..74a06b47f
--- /dev/null
+++ b/weed/topology/disk_ec.go
@@ -0,0 +1,84 @@
+package topology
+
+import (
+ "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
+ "github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
+)
+
+func (d *Disk) GetEcShards() (ret []*erasure_coding.EcVolumeInfo) {
+ d.RLock()
+ for _, ecVolumeInfo := range d.ecShards {
+ ret = append(ret, ecVolumeInfo)
+ }
+ d.RUnlock()
+ return ret
+}
+
+func (d *Disk) AddOrUpdateEcShard(s *erasure_coding.EcVolumeInfo) {
+ d.ecShardsLock.Lock()
+ defer d.ecShardsLock.Unlock()
+
+ delta := 0
+ if existing, ok := d.ecShards[s.VolumeId]; !ok {
+ d.ecShards[s.VolumeId] = s
+ delta = s.ShardBits.ShardIdCount()
+ } else {
+ oldCount := existing.ShardBits.ShardIdCount()
+ existing.ShardBits = existing.ShardBits.Plus(s.ShardBits)
+ delta = existing.ShardBits.ShardIdCount() - oldCount
+ }
+
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(string(d.Id())))
+ deltaDiskUsage.ecShardCount = int64(delta)
+ d.UpAdjustDiskUsageDelta(deltaDiskUsages)
+
+}
+
+func (d *Disk) DeleteEcShard(s *erasure_coding.EcVolumeInfo) {
+ d.ecShardsLock.Lock()
+ defer d.ecShardsLock.Unlock()
+
+ if existing, ok := d.ecShards[s.VolumeId]; ok {
+ oldCount := existing.ShardBits.ShardIdCount()
+ existing.ShardBits = existing.ShardBits.Minus(s.ShardBits)
+ delta := existing.ShardBits.ShardIdCount() - oldCount
+
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(string(d.Id())))
+ deltaDiskUsage.ecShardCount = int64(delta)
+ d.UpAdjustDiskUsageDelta(deltaDiskUsages)
+
+ if existing.ShardBits.ShardIdCount() == 0 {
+ delete(d.ecShards, s.VolumeId)
+ }
+ }
+
+}
+
+func (d *Disk) HasVolumesById(id needle.VolumeId) (hasVolumeId bool) {
+
+ // check whether normal volumes has this volume id
+ d.RLock()
+ _, ok := d.volumes[id]
+ if ok {
+ hasVolumeId = true
+ }
+ d.RUnlock()
+
+ if hasVolumeId {
+ return
+ }
+
+ // check whether ec shards has this volume id
+ d.ecShardsLock.RLock()
+ _, ok = d.ecShards[id]
+ if ok {
+ hasVolumeId = true
+ }
+ d.ecShardsLock.RUnlock()
+
+ return
+
+}
diff --git a/weed/topology/node.go b/weed/topology/node.go
index 572a89d4d..95d63972e 100644
--- a/weed/topology/node.go
+++ b/weed/topology/node.go
@@ -2,34 +2,25 @@ package topology
import (
"errors"
- "math/rand"
- "strings"
- "sync"
- "sync/atomic"
-
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage/erasure_coding"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
+ "math/rand"
+ "strings"
+ "sync"
)
type NodeId string
type Node interface {
Id() NodeId
String() string
- FreeSpace() int64
- ReserveOneVolume(r int64) (*DataNode, error)
- UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int64)
- UpAdjustVolumeCountDelta(volumeCountDelta int64)
- UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta int64)
- UpAdjustEcShardCountDelta(ecShardCountDelta int64)
- UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int64)
+ AvailableSpaceFor(option *VolumeGrowOption) int64
+ ReserveOneVolume(r int64, option *VolumeGrowOption) (*DataNode, error)
+ UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages)
UpAdjustMaxVolumeId(vid needle.VolumeId)
+ GetDiskUsages() *DiskUsages
- GetVolumeCount() int64
- GetEcShardCount() int64
- GetActiveVolumeCount() int64
- GetRemoteVolumeCount() int64
- GetMaxVolumeCount() int64
GetMaxVolumeId() needle.VolumeId
SetParent(Node)
LinkChildNode(node Node)
@@ -45,73 +36,81 @@ type Node interface {
GetValue() interface{} //get reference to the topology,dc,rack,datanode
}
type NodeImpl struct {
- volumeCount int64
- remoteVolumeCount int64
- activeVolumeCount int64
- ecShardCount int64
- maxVolumeCount int64
- id NodeId
- parent Node
- sync.RWMutex // lock children
- children map[NodeId]Node
- maxVolumeId needle.VolumeId
+ diskUsages *DiskUsages
+ id NodeId
+ parent Node
+ sync.RWMutex // lock children
+ children map[NodeId]Node
+ maxVolumeId needle.VolumeId
//for rack, data center, topology
nodeType string
value interface{}
}
+func (n *NodeImpl) GetDiskUsages() *DiskUsages {
+ return n.diskUsages
+}
+
// the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot
-func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) {
- candidates := make([]Node, 0, len(n.children))
+func (n *NodeImpl) PickNodesByWeight(numberOfNodes int, option *VolumeGrowOption, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) {
+ var totalWeights int64
var errs []string
n.RLock()
+ candidates := make([]Node, 0, len(n.children))
+ candidatesWeights := make([]int64, 0, len(n.children))
+ //pick nodes which has enough free volumes as candidates, and use free volumes number as node weight.
for _, node := range n.children {
- if err := filterFirstNodeFn(node); err == nil {
- candidates = append(candidates, node)
- } else {
- errs = append(errs, string(node.Id())+":"+err.Error())
+ if node.AvailableSpaceFor(option) <= 0 {
+ continue
}
+ totalWeights += node.AvailableSpaceFor(option)
+ candidates = append(candidates, node)
+ candidatesWeights = append(candidatesWeights, node.AvailableSpaceFor(option))
}
n.RUnlock()
- if len(candidates) == 0 {
- return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n"))
+ if len(candidates) < numberOfNodes {
+ glog.V(0).Infoln(n.Id(), "failed to pick", numberOfNodes, "from ", len(candidates), "node candidates")
+ return nil, nil, errors.New("No enough data node found!")
}
- firstNode = candidates[rand.Intn(len(candidates))]
- glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id())
- restNodes = make([]Node, numberOfNodes-1)
- candidates = candidates[:0]
- n.RLock()
- for _, node := range n.children {
- if node.Id() == firstNode.Id() {
- continue
- }
- if node.FreeSpace() <= 0 {
- continue
+ //pick nodes randomly by weights, the node picked earlier has higher final weights
+ sortedCandidates := make([]Node, 0, len(candidates))
+ for i := 0; i < len(candidates); i++ {
+ weightsInterval := rand.Int63n(totalWeights)
+ lastWeights := int64(0)
+ for k, weights := range candidatesWeights {
+ if (weightsInterval >= lastWeights) && (weightsInterval < lastWeights+weights) {
+ sortedCandidates = append(sortedCandidates, candidates[k])
+ candidatesWeights[k] = 0
+ totalWeights -= weights
+ break
+ }
+ lastWeights += weights
}
- glog.V(2).Infoln("select rest node candidate:", node.Id())
- candidates = append(candidates, node)
}
- n.RUnlock()
- glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates")
- ret := len(restNodes) == 0
- for k, node := range candidates {
- if k < len(restNodes) {
- restNodes[k] = node
- if k == len(restNodes)-1 {
- ret = true
+
+ restNodes = make([]Node, 0, numberOfNodes-1)
+ ret := false
+ n.RLock()
+ for k, node := range sortedCandidates {
+ if err := filterFirstNodeFn(node); err == nil {
+ firstNode = node
+ if k >= numberOfNodes-1 {
+ restNodes = sortedCandidates[:numberOfNodes-1]
+ } else {
+ restNodes = append(restNodes, sortedCandidates[:k]...)
+ restNodes = append(restNodes, sortedCandidates[k+1:numberOfNodes]...)
}
+ ret = true
+ break
} else {
- r := rand.Intn(k + 1)
- if r < len(restNodes) {
- restNodes[r] = node
- }
+ errs = append(errs, string(node.Id())+":"+err.Error())
}
}
+ n.RUnlock()
if !ret {
- glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates")
- err = errors.New("No enough data node found!")
+ return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n"))
}
return
}
@@ -134,10 +133,14 @@ func (n *NodeImpl) String() string {
func (n *NodeImpl) Id() NodeId {
return n.id
}
-func (n *NodeImpl) FreeSpace() int64 {
- freeVolumeSlotCount := n.maxVolumeCount + n.remoteVolumeCount - n.volumeCount
- if n.ecShardCount > 0 {
- freeVolumeSlotCount = freeVolumeSlotCount - n.ecShardCount/erasure_coding.DataShardsCount - 1
+func (n *NodeImpl) getOrCreateDisk(diskType types.DiskType) *DiskUsageCounts {
+ return n.diskUsages.getOrCreateDisk(diskType)
+}
+func (n *NodeImpl) AvailableSpaceFor(option *VolumeGrowOption) int64 {
+ t := n.getOrCreateDisk(option.DiskType)
+ freeVolumeSlotCount := t.maxVolumeCount + t.remoteVolumeCount - t.volumeCount
+ if t.ecShardCount > 0 {
+ freeVolumeSlotCount = freeVolumeSlotCount - t.ecShardCount/erasure_coding.DataShardsCount - 1
}
return freeVolumeSlotCount
}
@@ -158,11 +161,11 @@ func (n *NodeImpl) Parent() Node {
func (n *NodeImpl) GetValue() interface{} {
return n.value
}
-func (n *NodeImpl) ReserveOneVolume(r int64) (assignedNode *DataNode, err error) {
+func (n *NodeImpl) ReserveOneVolume(r int64, option *VolumeGrowOption) (assignedNode *DataNode, err error) {
n.RLock()
defer n.RUnlock()
for _, node := range n.children {
- freeSpace := node.FreeSpace()
+ freeSpace := node.AvailableSpaceFor(option)
// fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace)
if freeSpace <= 0 {
continue
@@ -170,11 +173,11 @@ func (n *NodeImpl) ReserveOneVolume(r int64) (assignedNode *DataNode, err error)
if r >= freeSpace {
r -= freeSpace
} else {
- if node.IsDataNode() && node.FreeSpace() > 0 {
+ if node.IsDataNode() && node.AvailableSpaceFor(option) > 0 {
// fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace())
return node.(*DataNode), nil
}
- assignedNode, err = node.ReserveOneVolume(r)
+ assignedNode, err = node.ReserveOneVolume(r, option)
if err == nil {
return
}
@@ -183,34 +186,13 @@ func (n *NodeImpl) ReserveOneVolume(r int64) (assignedNode *DataNode, err error)
return nil, errors.New("No free volume slot found!")
}
-func (n *NodeImpl) UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int64) { //can be negative
- atomic.AddInt64(&n.maxVolumeCount, maxVolumeCountDelta)
- if n.parent != nil {
- n.parent.UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta)
- }
-}
-func (n *NodeImpl) UpAdjustVolumeCountDelta(volumeCountDelta int64) { //can be negative
- atomic.AddInt64(&n.volumeCount, volumeCountDelta)
- if n.parent != nil {
- n.parent.UpAdjustVolumeCountDelta(volumeCountDelta)
+func (n *NodeImpl) UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages) { //can be negative
+ for diskType, diskUsage := range deltaDiskUsages.usages {
+ existingDisk := n.getOrCreateDisk(diskType)
+ existingDisk.addDiskUsageCounts(diskUsage)
}
-}
-func (n *NodeImpl) UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta int64) { //can be negative
- atomic.AddInt64(&n.remoteVolumeCount, remoteVolumeCountDelta)
if n.parent != nil {
- n.parent.UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta)
- }
-}
-func (n *NodeImpl) UpAdjustEcShardCountDelta(ecShardCountDelta int64) { //can be negative
- atomic.AddInt64(&n.ecShardCount, ecShardCountDelta)
- if n.parent != nil {
- n.parent.UpAdjustEcShardCountDelta(ecShardCountDelta)
- }
-}
-func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int64) { //can be negative
- atomic.AddInt64(&n.activeVolumeCount, activeVolumeCountDelta)
- if n.parent != nil {
- n.parent.UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta)
+ n.parent.UpAdjustDiskUsageDelta(deltaDiskUsages)
}
}
func (n *NodeImpl) UpAdjustMaxVolumeId(vid needle.VolumeId) { //can be negative
@@ -224,33 +206,18 @@ func (n *NodeImpl) UpAdjustMaxVolumeId(vid needle.VolumeId) { //can be negative
func (n *NodeImpl) GetMaxVolumeId() needle.VolumeId {
return n.maxVolumeId
}
-func (n *NodeImpl) GetVolumeCount() int64 {
- return n.volumeCount
-}
-func (n *NodeImpl) GetEcShardCount() int64 {
- return n.ecShardCount
-}
-func (n *NodeImpl) GetRemoteVolumeCount() int64 {
- return n.remoteVolumeCount
-}
-func (n *NodeImpl) GetActiveVolumeCount() int64 {
- return n.activeVolumeCount
-}
-func (n *NodeImpl) GetMaxVolumeCount() int64 {
- return n.maxVolumeCount
-}
func (n *NodeImpl) LinkChildNode(node Node) {
n.Lock()
defer n.Unlock()
+ n.doLinkChildNode(node)
+}
+
+func (n *NodeImpl) doLinkChildNode(node Node) {
if n.children[node.Id()] == nil {
n.children[node.Id()] = node
- n.UpAdjustMaxVolumeCountDelta(node.GetMaxVolumeCount())
+ n.UpAdjustDiskUsageDelta(node.GetDiskUsages())
n.UpAdjustMaxVolumeId(node.GetMaxVolumeId())
- n.UpAdjustVolumeCountDelta(node.GetVolumeCount())
- n.UpAdjustRemoteVolumeCountDelta(node.GetRemoteVolumeCount())
- n.UpAdjustEcShardCountDelta(node.GetEcShardCount())
- n.UpAdjustActiveVolumeCountDelta(node.GetActiveVolumeCount())
node.SetParent(n)
glog.V(0).Infoln(n, "adds child", node.Id())
}
@@ -263,11 +230,7 @@ func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) {
if node != nil {
node.SetParent(nil)
delete(n.children, node.Id())
- n.UpAdjustVolumeCountDelta(-node.GetVolumeCount())
- n.UpAdjustRemoteVolumeCountDelta(-node.GetRemoteVolumeCount())
- n.UpAdjustEcShardCountDelta(-node.GetEcShardCount())
- n.UpAdjustActiveVolumeCountDelta(-node.GetActiveVolumeCount())
- n.UpAdjustMaxVolumeCountDelta(-node.GetMaxVolumeCount())
+ n.UpAdjustDiskUsageDelta(node.GetDiskUsages().negative())
glog.V(0).Infoln(n, "removes", node.Id())
}
}
diff --git a/weed/topology/rack.go b/weed/topology/rack.go
index 1921c0c05..8eb2a717c 100644
--- a/weed/topology/rack.go
+++ b/weed/topology/rack.go
@@ -2,6 +2,7 @@ package topology
import (
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"strconv"
"time"
)
@@ -14,6 +15,7 @@ func NewRack(id string) *Rack {
r := &Rack{}
r.id = NodeId(id)
r.nodeType = "Rack"
+ r.diskUsages = newDiskUsages()
r.children = make(map[NodeId]Node)
r.NodeImpl.value = r
return r
@@ -28,7 +30,7 @@ func (r *Rack) FindDataNode(ip string, port int) *DataNode {
}
return nil
}
-func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCount int64) *DataNode {
+func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCounts map[string]uint32) *DataNode {
for _, c := range r.Children() {
dn := c.(*DataNode)
if dn.MatchLocation(ip, port) {
@@ -40,17 +42,19 @@ func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVol
dn.Ip = ip
dn.Port = port
dn.PublicUrl = publicUrl
- dn.maxVolumeCount = maxVolumeCount
dn.LastSeen = time.Now().Unix()
r.LinkChildNode(dn)
+ for diskType, maxVolumeCount := range maxVolumeCounts {
+ disk := NewDisk(diskType)
+ disk.diskUsages.getOrCreateDisk(types.ToDiskType(diskType)).maxVolumeCount = int64(maxVolumeCount)
+ dn.LinkChildNode(disk)
+ }
return dn
}
func (r *Rack) ToMap() interface{} {
m := make(map[string]interface{})
m["Id"] = r.Id()
- m["Max"] = r.GetMaxVolumeCount()
- m["Free"] = r.FreeSpace()
var dns []interface{}
for _, c := range r.Children() {
dn := c.(*DataNode)
@@ -62,12 +66,8 @@ func (r *Rack) ToMap() interface{} {
func (r *Rack) ToRackInfo() *master_pb.RackInfo {
m := &master_pb.RackInfo{
- Id: string(r.Id()),
- VolumeCount: uint64(r.GetVolumeCount()),
- MaxVolumeCount: uint64(r.GetMaxVolumeCount()),
- FreeVolumeCount: uint64(r.FreeSpace()),
- ActiveVolumeCount: uint64(r.GetActiveVolumeCount()),
- RemoteVolumeCount: uint64(r.GetRemoteVolumeCount()),
+ Id: string(r.Id()),
+ DiskInfos: r.diskUsages.ToDiskInfo(),
}
for _, c := range r.Children() {
dn := c.(*DataNode)
diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go
index b195b48ed..ea0a8c968 100644
--- a/weed/topology/store_replicate.go
+++ b/weed/topology/store_replicate.go
@@ -1,7 +1,6 @@
package topology
import (
- "bytes"
"encoding/json"
"errors"
"fmt"
@@ -15,30 +14,39 @@ import (
"github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
)
-func ReplicatedWrite(masterNode string, s *storage.Store,
- volumeId needle.VolumeId, n *needle.Needle,
- r *http.Request) (size uint32, isUnchanged bool, err error) {
+func ReplicatedWrite(masterFn operation.GetMasterFn, s *storage.Store, volumeId needle.VolumeId, n *needle.Needle, r *http.Request) (isUnchanged bool, err error) {
//check JWT
jwt := security.GetJwt(r)
+ // check whether this is a replicated write request
var remoteLocations []operation.Location
if r.FormValue("type") != "replicate" {
- remoteLocations, err = getWritableRemoteReplications(s, volumeId, masterNode)
+ // this is the initial request
+ remoteLocations, err = getWritableRemoteReplications(s, volumeId, masterFn)
if err != nil {
glog.V(0).Infoln(err)
return
}
}
- size, isUnchanged, err = s.WriteVolumeNeedle(volumeId, n)
- if err != nil {
- err = fmt.Errorf("failed to write to local disk: %v", err)
- glog.V(0).Infoln(err)
- return
+ // read fsync value
+ fsync := false
+ if r.FormValue("fsync") == "true" {
+ fsync = true
+ }
+
+ if s.GetVolume(volumeId) != nil {
+ isUnchanged, err = s.WriteVolumeNeedle(volumeId, n, fsync)
+ if err != nil {
+ err = fmt.Errorf("failed to write to local disk: %v", err)
+ glog.V(0).Infoln(err)
+ return
+ }
}
if len(remoteLocations) > 0 { //send to other replica locations
@@ -72,12 +80,11 @@ func ReplicatedWrite(masterNode string, s *storage.Store,
}
}
- _, err := operation.Upload(u.String(),
- string(n.Name), bytes.NewReader(n.Data), n.IsGzipped(), string(n.Mime),
- pairMap, jwt)
+ // volume server do not know about encryption
+ // TODO optimize here to compress data only once
+ _, err := operation.UploadData(u.String(), string(n.Name), false, n.Data, n.IsCompressed(), string(n.Mime), pairMap, jwt)
return err
}); err != nil {
- size = 0
err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err)
glog.V(0).Infoln(err)
}
@@ -85,16 +92,16 @@ func ReplicatedWrite(masterNode string, s *storage.Store,
return
}
-func ReplicatedDelete(masterNode string, store *storage.Store,
+func ReplicatedDelete(masterFn operation.GetMasterFn, store *storage.Store,
volumeId needle.VolumeId, n *needle.Needle,
- r *http.Request) (size uint32, err error) {
+ r *http.Request) (size types.Size, err error) {
//check JWT
jwt := security.GetJwt(r)
var remoteLocations []operation.Location
if r.FormValue("type") != "replicate" {
- remoteLocations, err = getWritableRemoteReplications(store, volumeId, masterNode)
+ remoteLocations, err = getWritableRemoteReplications(store, volumeId, masterFn)
if err != nil {
glog.V(0).Infoln(err)
return
@@ -154,25 +161,34 @@ func distributedOperation(locations []operation.Location, store *storage.Store,
return ret.Error()
}
-func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterNode string) (
+func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterFn operation.GetMasterFn) (
remoteLocations []operation.Location, err error) {
- copyCount := s.GetVolume(volumeId).ReplicaPlacement.GetCopyCount()
- if copyCount > 1 {
- if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil {
- if len(lookupResult.Locations) < copyCount {
- err = fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]",
- len(lookupResult.Locations), copyCount)
- return
- }
- selfUrl := s.Ip + ":" + strconv.Itoa(s.Port)
- for _, location := range lookupResult.Locations {
- if location.Url != selfUrl {
- remoteLocations = append(remoteLocations, location)
- }
+
+ v := s.GetVolume(volumeId)
+ if v != nil && v.ReplicaPlacement.GetCopyCount() == 1 {
+ return
+ }
+
+ // not on local store, or has replications
+ lookupResult, lookupErr := operation.Lookup(masterFn, volumeId.String())
+ if lookupErr == nil {
+ selfUrl := s.Ip + ":" + strconv.Itoa(s.Port)
+ for _, location := range lookupResult.Locations {
+ if location.Url != selfUrl {
+ remoteLocations = append(remoteLocations, location)
}
- } else {
- err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr)
- return
+ }
+ } else {
+ err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr)
+ return
+ }
+
+ if v != nil {
+ // has one local and has remote replications
+ copyCount := v.ReplicaPlacement.GetCopyCount()
+ if len(lookupResult.Locations) < copyCount {
+ err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]",
+ len(lookupResult.Locations), volumeId, copyCount)
}
}
diff --git a/weed/topology/topology.go b/weed/topology/topology.go
index e6cb44727..08ebd24fd 100644
--- a/weed/topology/topology.go
+++ b/weed/topology/topology.go
@@ -3,8 +3,10 @@ package topology
import (
"errors"
"fmt"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"math/rand"
"sync"
+ "time"
"github.com/chrislusf/raft"
@@ -27,7 +29,8 @@ type Topology struct {
pulse int64
- volumeSizeLimit uint64
+ volumeSizeLimit uint64
+ replicationAsMin bool
Sequence sequence.Sequencer
@@ -38,16 +41,18 @@ type Topology struct {
RaftServer raft.Server
}
-func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) *Topology {
+func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int, replicationAsMin bool) *Topology {
t := &Topology{}
t.id = NodeId(id)
t.nodeType = "Topology"
t.NodeImpl.value = t
+ t.diskUsages = newDiskUsages()
t.children = make(map[NodeId]Node)
t.collectionMap = util.NewConcurrentReadMap()
t.ecShardMap = make(map[needle.VolumeId]*EcShardLocations)
t.pulse = int64(pulse)
t.volumeSizeLimit = volumeSizeLimit
+ t.replicationAsMin = replicationAsMin
t.Sequence = seq
@@ -60,29 +65,32 @@ func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, puls
func (t *Topology) IsLeader() bool {
if t.RaftServer != nil {
- return t.RaftServer.State() == raft.Leader
+ if t.RaftServer.State() == raft.Leader {
+ return true
+ }
}
return false
}
func (t *Topology) Leader() (string, error) {
l := ""
- if t.RaftServer != nil {
- l = t.RaftServer.Leader()
- } else {
- return "", errors.New("Raft Server not ready yet!")
- }
-
- if l == "" {
- // We are a single node cluster, we are the leader
- return t.RaftServer.Name(), errors.New("Raft Server not initialized!")
+ for count := 0; count < 3; count++ {
+ if t.RaftServer != nil {
+ l = t.RaftServer.Leader()
+ } else {
+ return "", errors.New("Raft Server not ready yet!")
+ }
+ if l != "" {
+ break
+ } else {
+ time.Sleep(time.Duration(5+count) * time.Second)
+ }
}
-
return l, nil
}
func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) {
- //maybe an issue if lots of collections?
+ // maybe an issue if lots of collections?
if collection == "" {
for _, c := range t.collectionMap.Items() {
if list := c.(*Collection).Lookup(vid); list != nil {
@@ -115,12 +123,12 @@ func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
}
func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool {
- vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl)
+ vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType)
return vl.GetActiveVolumeCount(option) > 0
}
func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) {
- vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option)
+ vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl, option.DiskType).PickForWrite(count, option)
if err != nil {
return "", 0, nil, fmt.Errorf("failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String(), err)
}
@@ -131,10 +139,10 @@ func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string,
return needle.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil
}
-func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL) *VolumeLayout {
+func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) *VolumeLayout {
return t.collectionMap.Get(collectionName, func() interface{} {
- return NewCollection(collectionName, t.volumeSizeLimit)
- }).(*Collection).GetOrCreateVolumeLayout(rp, ttl)
+ return NewCollection(collectionName, t.volumeSizeLimit, t.replicationAsMin)
+ }).(*Collection).GetOrCreateVolumeLayout(rp, ttl, diskType)
}
func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool) (ret []string) {
@@ -152,7 +160,7 @@ func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool)
t.ecShardMapLock.RUnlock()
}
- for k, _ := range mapOfCollections {
+ for k := range mapOfCollections {
ret = append(ret, k)
}
return ret
@@ -170,15 +178,30 @@ func (t *Topology) DeleteCollection(collectionName string) {
t.collectionMap.Delete(collectionName)
}
+func (t *Topology) DeleteLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType) {
+ collection, found := t.FindCollection(collectionName)
+ if !found {
+ return
+ }
+ collection.DeleteVolumeLayout(rp, ttl, diskType)
+ if len(collection.storageType2VolumeLayout.Items()) == 0 {
+ t.DeleteCollection(collectionName)
+ }
+}
+
func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
- t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).RegisterVolume(&v, dn)
+ diskType := types.ToDiskType(v.DiskType)
+ vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
+ vl.RegisterVolume(&v, dn)
+ vl.EnsureCorrectWritables(&v)
}
func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
- glog.Infof("removing volume info:%+v", v)
- volumeLayout := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl)
+ glog.Infof("removing volume info: %+v", v)
+ diskType := types.ToDiskType(v.DiskType)
+ volumeLayout := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
volumeLayout.UnRegisterVolume(&v, dn)
if volumeLayout.isEmpty() {
- t.DeleteCollection(v.Collection)
+ t.DeleteLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
}
}
@@ -205,13 +228,19 @@ func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformati
}
}
// find out the delta volumes
- newVolumes, deletedVolumes = dn.UpdateVolumes(volumeInfos)
+ var changedVolumes []storage.VolumeInfo
+ newVolumes, deletedVolumes, changedVolumes = dn.UpdateVolumes(volumeInfos)
for _, v := range newVolumes {
t.RegisterVolumeLayout(v, dn)
}
for _, v := range deletedVolumes {
t.UnRegisterVolumeLayout(v, dn)
}
+ for _, v := range changedVolumes {
+ diskType := types.ToDiskType(v.DiskType)
+ vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
+ vl.EnsureCorrectWritables(&v)
+ }
return
}
diff --git a/weed/topology/topology_ec.go b/weed/topology/topology_ec.go
index 93b39bb5d..022eeb578 100644
--- a/weed/topology/topology_ec.go
+++ b/weed/topology/topology_ec.go
@@ -18,6 +18,7 @@ func (t *Topology) SyncDataNodeEcShards(shardInfos []*master_pb.VolumeEcShardInf
for _, shardInfo := range shardInfos {
shards = append(shards,
erasure_coding.NewEcVolumeInfo(
+ shardInfo.DiskType,
shardInfo.Collection,
needle.VolumeId(shardInfo.Id),
erasure_coding.ShardBits(shardInfo.EcIndexBits)))
@@ -39,6 +40,7 @@ func (t *Topology) IncrementalSyncDataNodeEcShards(newEcShards, deletedEcShards
for _, shardInfo := range newEcShards {
newShards = append(newShards,
erasure_coding.NewEcVolumeInfo(
+ shardInfo.DiskType,
shardInfo.Collection,
needle.VolumeId(shardInfo.Id),
erasure_coding.ShardBits(shardInfo.EcIndexBits)))
@@ -46,6 +48,7 @@ func (t *Topology) IncrementalSyncDataNodeEcShards(newEcShards, deletedEcShards
for _, shardInfo := range deletedEcShards {
deletedShards = append(deletedShards,
erasure_coding.NewEcVolumeInfo(
+ shardInfo.DiskType,
shardInfo.Collection,
needle.VolumeId(shardInfo.Id),
erasure_coding.ShardBits(shardInfo.EcIndexBits)))
diff --git a/weed/topology/topology_event_handling.go b/weed/topology/topology_event_handling.go
index 068bd401e..543dacf29 100644
--- a/weed/topology/topology_event_handling.go
+++ b/weed/topology/topology_event_handling.go
@@ -1,6 +1,7 @@
package topology
import (
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"google.golang.org/grpc"
"math/rand"
"time"
@@ -37,7 +38,8 @@ func (t *Topology) StartRefreshWritableVolumes(grpcDialOption grpc.DialOption, g
}()
}
func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool {
- vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl)
+ diskType := types.ToDiskType(volumeInfo.DiskType)
+ vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl, diskType)
if !vl.SetVolumeCapacityFull(volumeInfo.Id) {
return false
}
@@ -47,7 +49,13 @@ func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool {
for _, dn := range vl.vid2location[volumeInfo.Id].list {
if !volumeInfo.ReadOnly {
- dn.UpAdjustActiveVolumeCountDelta(-1)
+
+ disk := dn.getOrCreateDisk(volumeInfo.DiskType)
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk(types.ToDiskType(volumeInfo.DiskType))
+ deltaDiskUsage.activeVolumeCount = -1
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
+
}
}
return true
@@ -55,13 +63,14 @@ func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool {
func (t *Topology) UnRegisterDataNode(dn *DataNode) {
for _, v := range dn.GetVolumes() {
glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn.Id())
- vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl)
+ diskType := types.ToDiskType(v.DiskType)
+ vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl, diskType)
vl.SetVolumeUnavailable(dn, v.Id)
}
- dn.UpAdjustVolumeCountDelta(-dn.GetVolumeCount())
- dn.UpAdjustRemoteVolumeCountDelta(-dn.GetRemoteVolumeCount())
- dn.UpAdjustActiveVolumeCountDelta(-dn.GetActiveVolumeCount())
- dn.UpAdjustMaxVolumeCountDelta(-dn.GetMaxVolumeCount())
+
+ negativeUsages := dn.GetDiskUsages().negative()
+ dn.UpAdjustDiskUsageDelta(negativeUsages)
+
if dn.Parent() != nil {
dn.Parent().UnlinkChildNode(dn.Id())
}
diff --git a/weed/topology/topology_map.go b/weed/topology/topology_map.go
index 73c55d77d..0fedb6221 100644
--- a/weed/topology/topology_map.go
+++ b/weed/topology/topology_map.go
@@ -4,8 +4,8 @@ import "github.com/chrislusf/seaweedfs/weed/pb/master_pb"
func (t *Topology) ToMap() interface{} {
m := make(map[string]interface{})
- m["Max"] = t.GetMaxVolumeCount()
- m["Free"] = t.FreeSpace()
+ m["Max"] = t.diskUsages.GetMaxVolumeCount()
+ m["Free"] = t.diskUsages.FreeSpace()
var dcs []interface{}
for _, c := range t.Children() {
dc := c.(*DataCenter)
@@ -29,8 +29,8 @@ func (t *Topology) ToMap() interface{} {
func (t *Topology) ToVolumeMap() interface{} {
m := make(map[string]interface{})
- m["Max"] = t.GetMaxVolumeCount()
- m["Free"] = t.FreeSpace()
+ m["Max"] = t.diskUsages.GetMaxVolumeCount()
+ m["Free"] = t.diskUsages.FreeSpace()
dcs := make(map[NodeId]interface{})
for _, c := range t.Children() {
dc := c.(*DataCenter)
@@ -80,12 +80,8 @@ func (t *Topology) ToVolumeLocations() (volumeLocations []*master_pb.VolumeLocat
func (t *Topology) ToTopologyInfo() *master_pb.TopologyInfo {
m := &master_pb.TopologyInfo{
- Id: string(t.Id()),
- VolumeCount: uint64(t.GetVolumeCount()),
- MaxVolumeCount: uint64(t.GetMaxVolumeCount()),
- FreeVolumeCount: uint64(t.FreeSpace()),
- ActiveVolumeCount: uint64(t.GetActiveVolumeCount()),
- RemoteVolumeCount: uint64(t.GetRemoteVolumeCount()),
+ Id: string(t.Id()),
+ DiskInfos: t.diskUsages.ToDiskInfo(),
}
for _, c := range t.Children() {
dc := c.(*DataCenter)
diff --git a/weed/topology/topology_test.go b/weed/topology/topology_test.go
index e7676ccf7..ecfe9d8d1 100644
--- a/weed/topology/topology_test.go
+++ b/weed/topology/topology_test.go
@@ -6,6 +6,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"testing"
)
@@ -13,21 +14,24 @@ import (
func TestRemoveDataCenter(t *testing.T) {
topo := setup(topologyLayout)
topo.UnlinkChildNode(NodeId("dc2"))
- if topo.GetActiveVolumeCount() != 15 {
+ if topo.diskUsages.usages[types.HardDriveType].activeVolumeCount != 15 {
t.Fail()
}
topo.UnlinkChildNode(NodeId("dc3"))
- if topo.GetActiveVolumeCount() != 12 {
+ if topo.diskUsages.usages[types.HardDriveType].activeVolumeCount != 12 {
t.Fail()
}
}
func TestHandlingVolumeServerHeartbeat(t *testing.T) {
- topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5)
+ topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
- dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, "127.0.0.1", 25)
+ maxVolumeCounts := make(map[string]uint32)
+ maxVolumeCounts[""] = 25
+ maxVolumeCounts["ssd"] = 12
+ dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, "127.0.0.1", maxVolumeCounts)
{
volumeCount := 7
@@ -48,10 +52,30 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
volumeMessages = append(volumeMessages, volumeMessage)
}
+ for k := 1; k <= volumeCount; k++ {
+ volumeMessage := &master_pb.VolumeInformationMessage{
+ Id: uint32(volumeCount + k),
+ Size: uint64(25432),
+ Collection: "",
+ FileCount: uint64(2343),
+ DeleteCount: uint64(345),
+ DeletedByteCount: 34524,
+ ReadOnly: false,
+ ReplicaPlacement: uint32(0),
+ Version: uint32(needle.CurrentVersion),
+ Ttl: 0,
+ DiskType: "ssd",
+ }
+ volumeMessages = append(volumeMessages, volumeMessage)
+ }
+
topo.SyncDataNodeRegistration(volumeMessages, dn)
- assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount)
- assert(t, "volumeCount", int(topo.volumeCount), volumeCount)
+ usageCounts := topo.diskUsages.usages[types.HardDriveType]
+
+ assert(t, "activeVolumeCount1", int(usageCounts.activeVolumeCount), volumeCount)
+ assert(t, "volumeCount", int(usageCounts.volumeCount), volumeCount)
+ assert(t, "ssdVolumeCount", int(topo.diskUsages.usages[types.SsdType].volumeCount), volumeCount)
}
{
@@ -78,8 +102,10 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
//layout := topo.GetVolumeLayout("", rp, needle.EMPTY_TTL)
//assert(t, "writables", len(layout.writables), volumeCount)
- assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount)
- assert(t, "volumeCount", int(topo.volumeCount), volumeCount)
+ usageCounts := topo.diskUsages.usages[types.HardDriveType]
+
+ assert(t, "activeVolumeCount1", int(usageCounts.activeVolumeCount), volumeCount)
+ assert(t, "volumeCount", int(usageCounts.volumeCount), volumeCount)
}
{
@@ -96,26 +122,28 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
nil,
dn)
rp, _ := super_block.NewReplicaPlacementFromString("000")
- layout := topo.GetVolumeLayout("", rp, needle.EMPTY_TTL)
+ layout := topo.GetVolumeLayout("", rp, needle.EMPTY_TTL, types.HardDriveType)
assert(t, "writables after repeated add", len(layout.writables), volumeCount)
- assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount)
- assert(t, "volumeCount", int(topo.volumeCount), volumeCount)
+ usageCounts := topo.diskUsages.usages[types.HardDriveType]
+
+ assert(t, "activeVolumeCount1", int(usageCounts.activeVolumeCount), volumeCount)
+ assert(t, "volumeCount", int(usageCounts.volumeCount), volumeCount)
topo.IncrementalSyncDataNodeRegistration(
nil,
[]*master_pb.VolumeShortInformationMessage{newVolumeShortMessage},
dn)
assert(t, "writables after deletion", len(layout.writables), volumeCount-1)
- assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount-1)
- assert(t, "volumeCount", int(topo.volumeCount), volumeCount-1)
+ assert(t, "activeVolumeCount1", int(usageCounts.activeVolumeCount), volumeCount-1)
+ assert(t, "volumeCount", int(usageCounts.volumeCount), volumeCount-1)
topo.IncrementalSyncDataNodeRegistration(
[]*master_pb.VolumeShortInformationMessage{newVolumeShortMessage},
nil,
dn)
- for vid, _ := range layout.vid2location {
+ for vid := range layout.vid2location {
println("after add volume id", vid)
}
for _, vid := range layout.writables {
@@ -128,7 +156,9 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) {
topo.UnRegisterDataNode(dn)
- assert(t, "activeVolumeCount2", int(topo.activeVolumeCount), 0)
+ usageCounts := topo.diskUsages.usages[types.HardDriveType]
+
+ assert(t, "activeVolumeCount2", int(usageCounts.activeVolumeCount), 0)
}
@@ -140,16 +170,20 @@ func assert(t *testing.T, message string, actual, expected int) {
func TestAddRemoveVolume(t *testing.T) {
- topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5)
+ topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
dc := topo.GetOrCreateDataCenter("dc1")
rack := dc.GetOrCreateRack("rack1")
- dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, "127.0.0.1", 25)
+ maxVolumeCounts := make(map[string]uint32)
+ maxVolumeCounts[""] = 25
+ maxVolumeCounts["ssd"] = 12
+ dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, "127.0.0.1", maxVolumeCounts)
v := storage.VolumeInfo{
Id: needle.VolumeId(1),
Size: 100,
Collection: "xcollection",
+ DiskType: "ssd",
FileCount: 123,
DeleteCount: 23,
DeletedByteCount: 45,
diff --git a/weed/topology/topology_vacuum.go b/weed/topology/topology_vacuum.go
index ca626e973..9feb55b73 100644
--- a/weed/topology/topology_vacuum.go
+++ b/weed/topology/topology_vacuum.go
@@ -5,15 +5,16 @@ import (
"sync/atomic"
"time"
- "github.com/chrislusf/seaweedfs/weed/storage/needle"
"google.golang.org/grpc"
+ "github.com/chrislusf/seaweedfs/weed/storage/needle"
+
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
)
-func batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId,
+func (t *Topology) batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vid needle.VolumeId,
locationlist *VolumeLocationList, garbageThreshold float64) (*VolumeLocationList, bool) {
ch := make(chan int, locationlist.Length())
errCount := int32(0)
@@ -41,19 +42,23 @@ func batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vl *VolumeLayout, vi
}(index, dn.Url(), vid)
}
vacuumLocationList := NewVolumeLocationList()
+
+ waitTimeout := time.NewTimer(time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
+ defer waitTimeout.Stop()
+
for range locationlist.list {
select {
case index := <-ch:
if index != -1 {
vacuumLocationList.list = append(vacuumLocationList.list, locationlist.list[index])
}
- case <-time.After(30 * time.Minute):
+ case <-waitTimeout.C:
return vacuumLocationList, false
}
}
return vacuumLocationList, errCount == 0 && len(vacuumLocationList.list) > 0
}
-func batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId,
+func (t *Topology) batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId,
locationlist *VolumeLocationList, preallocate int64) bool {
vl.accessLock.Lock()
vl.removeFromWritable(vid)
@@ -65,7 +70,8 @@ func batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout,
glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url)
err := operation.WithVolumeServerClient(url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
_, err := volumeServerClient.VacuumVolumeCompact(context.Background(), &volume_server_pb.VacuumVolumeCompactRequest{
- VolumeId: uint32(vid),
+ VolumeId: uint32(vid),
+ Preallocate: preallocate,
})
return err
})
@@ -79,24 +85,32 @@ func batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout,
}(index, dn.Url(), vid)
}
isVacuumSuccess := true
+
+ waitTimeout := time.NewTimer(3 * time.Minute * time.Duration(t.volumeSizeLimit/1024/1024/1000+1))
+ defer waitTimeout.Stop()
+
for range locationlist.list {
select {
case canCommit := <-ch:
isVacuumSuccess = isVacuumSuccess && canCommit
- case <-time.After(30 * time.Minute):
+ case <-waitTimeout.C:
return false
}
}
return isVacuumSuccess
}
-func batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) bool {
+func (t *Topology) batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) bool {
isCommitSuccess := true
+ isReadOnly := false
for _, dn := range locationlist.list {
glog.V(0).Infoln("Start Committing vacuum", vid, "on", dn.Url())
err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
- _, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{
+ resp, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{
VolumeId: uint32(vid),
})
+ if resp != nil && resp.IsReadOnly {
+ isReadOnly = true
+ }
return err
})
if err != nil {
@@ -105,13 +119,15 @@ func batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, v
} else {
glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, dn.Url())
}
- if isCommitSuccess {
- vl.SetVolumeAvailable(dn, vid)
+ }
+ if isCommitSuccess {
+ for _, dn := range locationlist.list {
+ vl.SetVolumeAvailable(dn, vid, isReadOnly)
}
}
return isCommitSuccess
}
-func batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) {
+func (t *Topology) batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) {
for _, dn := range locationlist.list {
glog.V(0).Infoln("Start cleaning up", vid, "on", dn.Url())
err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
@@ -128,12 +144,12 @@ func batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout,
}
}
-func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) int {
+func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) {
// if there is vacuum going on, return immediately
swapped := atomic.CompareAndSwapInt64(&t.vacuumLockCounter, 0, 1)
if !swapped {
- return 0
+ return
}
defer atomic.StoreInt64(&t.vacuumLockCounter, 0)
@@ -145,39 +161,37 @@ func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float
for _, vl := range c.storageType2VolumeLayout.Items() {
if vl != nil {
volumeLayout := vl.(*VolumeLayout)
- vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, preallocate)
+ t.vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, preallocate)
}
}
}
- return 0
}
-func vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, preallocate int64) {
+func (t *Topology) vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, preallocate int64) {
volumeLayout.accessLock.RLock()
tmpMap := make(map[needle.VolumeId]*VolumeLocationList)
for vid, locationList := range volumeLayout.vid2location {
- tmpMap[vid] = locationList
+ tmpMap[vid] = locationList.Copy()
}
volumeLayout.accessLock.RUnlock()
for vid, locationList := range tmpMap {
volumeLayout.accessLock.RLock()
- isReadOnly, hasValue := volumeLayout.readonlyVolumes[vid]
+ isReadOnly := volumeLayout.readonlyVolumes.IsTrue(vid)
volumeLayout.accessLock.RUnlock()
- if hasValue && isReadOnly {
+ if isReadOnly {
continue
}
glog.V(2).Infof("check vacuum on collection:%s volume:%d", c.Name, vid)
- if vacuumLocationList, needVacuum := batchVacuumVolumeCheck(
- grpcDialOption, volumeLayout, vid, locationList, garbageThreshold); needVacuum {
- if batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) {
- batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList)
+ if vacuumLocationList, needVacuum := t.batchVacuumVolumeCheck(grpcDialOption, vid, locationList, garbageThreshold); needVacuum {
+ if t.batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) {
+ t.batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList)
} else {
- batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList)
+ t.batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList)
}
}
}
diff --git a/weed/topology/volume_growth.go b/weed/topology/volume_growth.go
index 80fbc86cd..8941a049b 100644
--- a/weed/topology/volume_growth.go
+++ b/weed/topology/volume_growth.go
@@ -1,12 +1,15 @@
package topology
import (
+ "encoding/json"
"fmt"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"math/rand"
"sync"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
+ "github.com/chrislusf/seaweedfs/weed/util"
"google.golang.org/grpc"
@@ -23,14 +26,15 @@ This package is created to resolve these replica placement issues:
*/
type VolumeGrowOption struct {
- Collection string
- ReplicaPlacement *super_block.ReplicaPlacement
- Ttl *needle.TTL
- Prealloacte int64
- DataCenter string
- Rack string
- DataNode string
- MemoryMapMaxSizeMb uint32
+ Collection string `json:"collection,omitempty"`
+ ReplicaPlacement *super_block.ReplicaPlacement `json:"replication,omitempty"`
+ Ttl *needle.TTL `json:"ttl,omitempty"`
+ DiskType types.DiskType `json:"disk,omitempty"`
+ Prealloacte int64 `json:"prealloacte,omitempty"`
+ DataCenter string `json:"dataCenter,omitempty"`
+ Rack string `json:"rack,omitempty"`
+ DataNode string `json:"dataNode,omitempty"`
+ MemoryMapMaxSizeMb uint32 `json:"memoryMapMaxSizeMb,omitempty"`
}
type VolumeGrowth struct {
@@ -38,7 +42,8 @@ type VolumeGrowth struct {
}
func (o *VolumeGrowOption) String() string {
- return fmt.Sprintf("Collection:%s, ReplicaPlacement:%v, Ttl:%v, DataCenter:%s, Rack:%s, DataNode:%s", o.Collection, o.ReplicaPlacement, o.Ttl, o.DataCenter, o.Rack, o.DataNode)
+ blob, _ := json.Marshal(o)
+ return string(blob)
}
func NewDefaultVolumeGrowth() *VolumeGrowth {
@@ -48,15 +53,20 @@ func NewDefaultVolumeGrowth() *VolumeGrowth {
// one replication type may need rp.GetCopyCount() actual volumes
// given copyCount, how many logical volumes to create
func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) {
+ v := util.GetViper()
+ v.SetDefault("master.volume_growth.copy_1", 7)
+ v.SetDefault("master.volume_growth.copy_2", 6)
+ v.SetDefault("master.volume_growth.copy_3", 3)
+ v.SetDefault("master.volume_growth.copy_other", 1)
switch copyCount {
case 1:
- count = 7
+ count = v.GetInt("master.volume_growth.copy_1")
case 2:
- count = 6
+ count = v.GetInt("master.volume_growth.copy_2")
case 3:
- count = 3
+ count = v.GetInt("master.volume_growth.copy_3")
default:
- count = 1
+ count = v.GetInt("master.volume_growth.copy_other")
}
return
}
@@ -79,6 +89,7 @@ func (vg *VolumeGrowth) GrowByCountAndType(grpcDialOption grpc.DialOption, targe
if c, e := vg.findAndGrow(grpcDialOption, topo, option); e == nil {
counter += c
} else {
+ glog.V(0).Infof("create %d volume, created %d: %v", targetCount, counter, e)
return counter, e
}
}
@@ -106,21 +117,21 @@ func (vg *VolumeGrowth) findAndGrow(grpcDialOption grpc.DialOption, topo *Topolo
func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) {
//find main datacenter and other data centers
rp := option.ReplicaPlacement
- mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error {
+ mainDataCenter, otherDataCenters, dc_err := topo.PickNodesByWeight(rp.DiffDataCenterCount+1, option, func(node Node) error {
if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) {
return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter)
}
if len(node.Children()) < rp.DiffRackCount+1 {
return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1)
}
- if node.FreeSpace() < int64(rp.DiffRackCount+rp.SameRackCount+1) {
- return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1)
+ if node.AvailableSpaceFor(option) < int64(rp.DiffRackCount+rp.SameRackCount+1) {
+ return fmt.Errorf("Free:%d < Expected:%d", node.AvailableSpaceFor(option), rp.DiffRackCount+rp.SameRackCount+1)
}
possibleRacksCount := 0
for _, rack := range node.Children() {
possibleDataNodesCount := 0
for _, n := range rack.Children() {
- if n.FreeSpace() >= 1 {
+ if n.AvailableSpaceFor(option) >= 1 {
possibleDataNodesCount++
}
}
@@ -138,12 +149,12 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum
}
//find main rack and other racks
- mainRack, otherRacks, rackErr := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error {
+ mainRack, otherRacks, rackErr := mainDataCenter.(*DataCenter).PickNodesByWeight(rp.DiffRackCount+1, option, func(node Node) error {
if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) {
return fmt.Errorf("Not matching preferred rack:%s", option.Rack)
}
- if node.FreeSpace() < int64(rp.SameRackCount+1) {
- return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1)
+ if node.AvailableSpaceFor(option) < int64(rp.SameRackCount+1) {
+ return fmt.Errorf("Free:%d < Expected:%d", node.AvailableSpaceFor(option), rp.SameRackCount+1)
}
if len(node.Children()) < rp.SameRackCount+1 {
// a bit faster way to test free racks
@@ -151,7 +162,7 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum
}
possibleDataNodesCount := 0
for _, n := range node.Children() {
- if n.FreeSpace() >= 1 {
+ if n.AvailableSpaceFor(option) >= 1 {
possibleDataNodesCount++
}
}
@@ -165,12 +176,12 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum
}
//find main rack and other racks
- mainServer, otherServers, serverErr := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error {
+ mainServer, otherServers, serverErr := mainRack.(*Rack).PickNodesByWeight(rp.SameRackCount+1, option, func(node Node) error {
if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) {
return fmt.Errorf("Not matching preferred data node:%s", option.DataNode)
}
- if node.FreeSpace() < 1 {
- return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1)
+ if node.AvailableSpaceFor(option) < 1 {
+ return fmt.Errorf("Free:%d < Expected:%d", node.AvailableSpaceFor(option), 1)
}
return nil
})
@@ -183,16 +194,16 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum
servers = append(servers, server.(*DataNode))
}
for _, rack := range otherRacks {
- r := rand.Int63n(rack.FreeSpace())
- if server, e := rack.ReserveOneVolume(r); e == nil {
+ r := rand.Int63n(rack.AvailableSpaceFor(option))
+ if server, e := rack.ReserveOneVolume(r, option); e == nil {
servers = append(servers, server)
} else {
return servers, e
}
}
for _, datacenter := range otherDataCenters {
- r := rand.Int63n(datacenter.FreeSpace())
- if server, e := datacenter.ReserveOneVolume(r); e == nil {
+ r := rand.Int63n(datacenter.AvailableSpaceFor(option))
+ if server, e := datacenter.ReserveOneVolume(r, option); e == nil {
servers = append(servers, server)
} else {
return servers, e
@@ -211,6 +222,7 @@ func (vg *VolumeGrowth) grow(grpcDialOption grpc.DialOption, topo *Topology, vid
ReplicaPlacement: option.ReplicaPlacement,
Ttl: option.Ttl,
Version: needle.CurrentVersion,
+ DiskType: string(option.DiskType),
}
server.AddOrUpdateVolume(vi)
topo.RegisterVolumeLayout(vi, server)
diff --git a/weed/topology/volume_growth_test.go b/weed/topology/volume_growth_test.go
index e3c5cc580..ab30cd43f 100644
--- a/weed/topology/volume_growth_test.go
+++ b/weed/topology/volume_growth_test.go
@@ -81,7 +81,7 @@ func setup(topologyLayout string) *Topology {
fmt.Println("data:", data)
//need to connect all nodes first before server adding volumes
- topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5)
+ topo := NewTopology("weedfs", sequence.NewMemorySequencer(), 32*1024, 5, false)
mTopology := data.(map[string]interface{})
for dcKey, dcValue := range mTopology {
dc := NewDataCenter(dcKey)
@@ -103,7 +103,13 @@ func setup(topologyLayout string) *Topology {
Version: needle.CurrentVersion}
server.AddOrUpdateVolume(vi)
}
- server.UpAdjustMaxVolumeCountDelta(int64(serverMap["limit"].(float64)))
+
+ disk := server.getOrCreateDisk("")
+ deltaDiskUsages := newDiskUsages()
+ deltaDiskUsage := deltaDiskUsages.getOrCreateDisk("")
+ deltaDiskUsage.maxVolumeCount = int64(serverMap["limit"].(float64))
+ disk.UpAdjustDiskUsageDelta(deltaDiskUsages)
+
}
}
}
@@ -131,3 +137,212 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) {
fmt.Println("assigned node :", server.Id())
}
}
+
+var topologyLayout2 = `
+{
+ "dc1":{
+ "rack1":{
+ "server111":{
+ "volumes":[
+ {"id":1, "size":12312},
+ {"id":2, "size":12312},
+ {"id":3, "size":12312}
+ ],
+ "limit":300
+ },
+ "server112":{
+ "volumes":[
+ {"id":4, "size":12312},
+ {"id":5, "size":12312},
+ {"id":6, "size":12312}
+ ],
+ "limit":300
+ },
+ "server113":{
+ "volumes":[],
+ "limit":300
+ },
+ "server114":{
+ "volumes":[],
+ "limit":300
+ },
+ "server115":{
+ "volumes":[],
+ "limit":300
+ },
+ "server116":{
+ "volumes":[],
+ "limit":300
+ }
+ },
+ "rack2":{
+ "server121":{
+ "volumes":[
+ {"id":4, "size":12312},
+ {"id":5, "size":12312},
+ {"id":6, "size":12312}
+ ],
+ "limit":300
+ },
+ "server122":{
+ "volumes":[],
+ "limit":300
+ },
+ "server123":{
+ "volumes":[
+ {"id":2, "size":12312},
+ {"id":3, "size":12312},
+ {"id":4, "size":12312}
+ ],
+ "limit":300
+ },
+ "server124":{
+ "volumes":[],
+ "limit":300
+ },
+ "server125":{
+ "volumes":[],
+ "limit":300
+ },
+ "server126":{
+ "volumes":[],
+ "limit":300
+ }
+ },
+ "rack3":{
+ "server131":{
+ "volumes":[],
+ "limit":300
+ },
+ "server132":{
+ "volumes":[],
+ "limit":300
+ },
+ "server133":{
+ "volumes":[],
+ "limit":300
+ },
+ "server134":{
+ "volumes":[],
+ "limit":300
+ },
+ "server135":{
+ "volumes":[],
+ "limit":300
+ },
+ "server136":{
+ "volumes":[],
+ "limit":300
+ }
+ }
+ }
+}
+`
+
+func TestReplication011(t *testing.T) {
+ topo := setup(topologyLayout2)
+ vg := NewDefaultVolumeGrowth()
+ rp, _ := super_block.NewReplicaPlacementFromString("011")
+ volumeGrowOption := &VolumeGrowOption{
+ Collection: "MAIL",
+ ReplicaPlacement: rp,
+ DataCenter: "dc1",
+ Rack: "",
+ DataNode: "",
+ }
+ servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption)
+ if err != nil {
+ fmt.Println("finding empty slots error :", err)
+ t.Fail()
+ }
+ for _, server := range servers {
+ fmt.Println("assigned node :", server.Id())
+ }
+}
+
+var topologyLayout3 = `
+{
+ "dc1":{
+ "rack1":{
+ "server111":{
+ "volumes":[],
+ "limit":2000
+ }
+ }
+ },
+ "dc2":{
+ "rack2":{
+ "server222":{
+ "volumes":[],
+ "limit":2000
+ }
+ }
+ },
+ "dc3":{
+ "rack3":{
+ "server333":{
+ "volumes":[],
+ "limit":1000
+ }
+ }
+ },
+ "dc4":{
+ "rack4":{
+ "server444":{
+ "volumes":[],
+ "limit":1000
+ }
+ }
+ },
+ "dc5":{
+ "rack5":{
+ "server555":{
+ "volumes":[],
+ "limit":500
+ }
+ }
+ },
+ "dc6":{
+ "rack6":{
+ "server666":{
+ "volumes":[],
+ "limit":500
+ }
+ }
+ }
+}
+`
+
+func TestFindEmptySlotsForOneVolumeScheduleByWeight(t *testing.T) {
+ topo := setup(topologyLayout3)
+ vg := NewDefaultVolumeGrowth()
+ rp, _ := super_block.NewReplicaPlacementFromString("100")
+ volumeGrowOption := &VolumeGrowOption{
+ Collection: "Weight",
+ ReplicaPlacement: rp,
+ DataCenter: "",
+ Rack: "",
+ DataNode: "",
+ }
+
+ distribution := map[NodeId]int{}
+ // assign 1000 volumes
+ for i := 0; i < 1000; i++ {
+ servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption)
+ if err != nil {
+ fmt.Println("finding empty slots error :", err)
+ t.Fail()
+ }
+ for _, server := range servers {
+ // fmt.Println("assigned node :", server.Id())
+ if _, ok := distribution[server.id]; !ok {
+ distribution[server.id] = 0
+ }
+ distribution[server.id] += 1
+ }
+ }
+
+ for k, v := range distribution {
+ fmt.Printf("%s : %d\n", k, v)
+ }
+}
diff --git a/weed/topology/volume_layout.go b/weed/topology/volume_layout.go
index 7633b28be..c7e171248 100644
--- a/weed/topology/volume_layout.go
+++ b/weed/topology/volume_layout.go
@@ -3,6 +3,7 @@ package topology
import (
"errors"
"fmt"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
"math/rand"
"sync"
"time"
@@ -13,15 +14,103 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/super_block"
)
+type copyState int
+
+const (
+ noCopies copyState = 0 + iota
+ insufficientCopies
+ enoughCopies
+)
+
+type volumeState string
+
+const (
+ readOnlyState volumeState = "ReadOnly"
+ oversizedState = "Oversized"
+)
+
+type stateIndicator func(copyState) bool
+
+func ExistCopies() stateIndicator {
+ return func(state copyState) bool { return state != noCopies }
+}
+
+func NoCopies() stateIndicator {
+ return func(state copyState) bool { return state == noCopies }
+}
+
+type volumesBinaryState struct {
+ rp *super_block.ReplicaPlacement
+ name volumeState // the name for volume state (eg. "Readonly", "Oversized")
+ indicator stateIndicator // indicate whether the volumes should be marked as `name`
+ copyMap map[needle.VolumeId]*VolumeLocationList
+}
+
+func NewVolumesBinaryState(name volumeState, rp *super_block.ReplicaPlacement, indicator stateIndicator) *volumesBinaryState {
+ return &volumesBinaryState{
+ rp: rp,
+ name: name,
+ indicator: indicator,
+ copyMap: make(map[needle.VolumeId]*VolumeLocationList),
+ }
+}
+
+func (v *volumesBinaryState) Dump() (res []uint32) {
+ for vid, list := range v.copyMap {
+ if v.indicator(v.copyState(list)) {
+ res = append(res, uint32(vid))
+ }
+ }
+ return
+}
+
+func (v *volumesBinaryState) IsTrue(vid needle.VolumeId) bool {
+ list, _ := v.copyMap[vid]
+ return v.indicator(v.copyState(list))
+}
+
+func (v *volumesBinaryState) Add(vid needle.VolumeId, dn *DataNode) {
+ list, _ := v.copyMap[vid]
+ if list != nil {
+ list.Set(dn)
+ return
+ }
+ list = NewVolumeLocationList()
+ list.Set(dn)
+ v.copyMap[vid] = list
+}
+
+func (v *volumesBinaryState) Remove(vid needle.VolumeId, dn *DataNode) {
+ list, _ := v.copyMap[vid]
+ if list != nil {
+ list.Remove(dn)
+ if list.Length() == 0 {
+ delete(v.copyMap, vid)
+ }
+ }
+}
+
+func (v *volumesBinaryState) copyState(list *VolumeLocationList) copyState {
+ if list == nil {
+ return noCopies
+ }
+ if list.Length() < v.rp.GetCopyCount() {
+ return insufficientCopies
+ }
+ return enoughCopies
+}
+
// mapping from volume to its locations, inverted from server to volume
type VolumeLayout struct {
rp *super_block.ReplicaPlacement
ttl *needle.TTL
+ diskType types.DiskType
vid2location map[needle.VolumeId]*VolumeLocationList
- writables []needle.VolumeId // transient array of writable volume id
- readonlyVolumes map[needle.VolumeId]bool // transient set of readonly volumes
- oversizedVolumes map[needle.VolumeId]bool // set of oversized volumes
+ writables []needle.VolumeId // transient array of writable volume id
+ readonlyVolumes *volumesBinaryState // readonly volumes
+ oversizedVolumes *volumesBinaryState // oversized volumes
volumeSizeLimit uint64
+ replicationAsMin bool
accessLock sync.RWMutex
}
@@ -31,19 +120,23 @@ type VolumeLayoutStats struct {
FileCount uint64
}
-func NewVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, volumeSizeLimit uint64) *VolumeLayout {
+func NewVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, diskType types.DiskType, volumeSizeLimit uint64, replicationAsMin bool) *VolumeLayout {
return &VolumeLayout{
rp: rp,
ttl: ttl,
+ diskType: diskType,
vid2location: make(map[needle.VolumeId]*VolumeLocationList),
writables: *new([]needle.VolumeId),
- readonlyVolumes: make(map[needle.VolumeId]bool),
- oversizedVolumes: make(map[needle.VolumeId]bool),
+ readonlyVolumes: NewVolumesBinaryState(readOnlyState, rp, ExistCopies()),
+ oversizedVolumes: NewVolumesBinaryState(oversizedState, rp, ExistCopies()),
volumeSizeLimit: volumeSizeLimit,
+ replicationAsMin: replicationAsMin,
}
}
func (vl *VolumeLayout) String() string {
+ vl.accessLock.RLock()
+ defer vl.accessLock.RUnlock()
return fmt.Sprintf("rp:%v, ttl:%v, vid2location:%v, writables:%v, volumeSizeLimit:%v", vl.rp, vl.ttl, vl.vid2location, vl.writables, vl.volumeSizeLimit)
}
@@ -51,6 +144,8 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
vl.accessLock.Lock()
defer vl.accessLock.Unlock()
+ defer vl.rememberOversizedVolume(v, dn)
+
if _, ok := vl.vid2location[v.Id]; !ok {
vl.vid2location[v.Id] = NewVolumeLocationList()
}
@@ -61,27 +156,26 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
if vInfo.ReadOnly {
glog.V(1).Infof("vid %d removed from writable", v.Id)
vl.removeFromWritable(v.Id)
- vl.readonlyVolumes[v.Id] = true
+ vl.readonlyVolumes.Add(v.Id, dn)
return
} else {
- delete(vl.readonlyVolumes, v.Id)
+ vl.readonlyVolumes.Remove(v.Id, dn)
}
} else {
glog.V(1).Infof("vid %d removed from writable", v.Id)
vl.removeFromWritable(v.Id)
- delete(vl.readonlyVolumes, v.Id)
+ vl.readonlyVolumes.Remove(v.Id, dn)
return
}
}
- vl.rememberOversizedVolume(v)
- vl.ensureCorrectWritables(v)
-
}
-func (vl *VolumeLayout) rememberOversizedVolume(v *storage.VolumeInfo) {
+func (vl *VolumeLayout) rememberOversizedVolume(v *storage.VolumeInfo, dn *DataNode) {
if vl.isOversized(v) {
- vl.oversizedVolumes[v.Id] = true
+ vl.oversizedVolumes.Add(v.Id, dn)
+ } else {
+ vl.oversizedVolumes.Remove(v.Id, dn)
}
}
@@ -97,7 +191,9 @@ func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
if location.Remove(dn) {
- vl.ensureCorrectWritables(v)
+ vl.readonlyVolumes.Remove(v.Id, dn)
+ vl.oversizedVolumes.Remove(v.Id, dn)
+ vl.ensureCorrectWritables(v.Id)
if location.Length() == 0 {
delete(vl.vid2location, v.Id)
@@ -106,23 +202,32 @@ func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
}
}
-func (vl *VolumeLayout) ensureCorrectWritables(v *storage.VolumeInfo) {
- if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) {
- if _, ok := vl.oversizedVolumes[v.Id]; !ok {
- vl.addToWritable(v.Id)
+func (vl *VolumeLayout) EnsureCorrectWritables(v *storage.VolumeInfo) {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ vl.ensureCorrectWritables(v.Id)
+}
+
+func (vl *VolumeLayout) ensureCorrectWritables(vid needle.VolumeId) {
+ if vl.enoughCopies(vid) && vl.isAllWritable(vid) {
+ if !vl.oversizedVolumes.IsTrue(vid) {
+ vl.setVolumeWritable(vid)
}
} else {
- vl.removeFromWritable(v.Id)
+ vl.removeFromWritable(vid)
}
}
-func (vl *VolumeLayout) addToWritable(vid needle.VolumeId) {
- for _, id := range vl.writables {
- if vid == id {
- return
+func (vl *VolumeLayout) isAllWritable(vid needle.VolumeId) bool {
+ for _, dn := range vl.vid2location[vid].list {
+ if v, getError := dn.GetVolumesById(vid); getError == nil {
+ if v.ReadOnly {
+ return false
+ }
}
}
- vl.writables = append(vl.writables, vid)
+ return true
}
func (vl *VolumeLayout) isOversized(v *storage.VolumeInfo) bool {
@@ -258,6 +363,8 @@ func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid needle.VolumeId)
if location, ok := vl.vid2location[vid]; ok {
if location.Remove(dn) {
+ vl.readonlyVolumes.Remove(vid, dn)
+ vl.oversizedVolumes.Remove(vid, dn)
if location.Length() < vl.rp.GetCopyCount() {
glog.V(0).Infoln("Volume", vid, "has", location.Length(), "replica, less than required", vl.rp.GetCopyCount())
return vl.removeFromWritable(vid)
@@ -266,17 +373,33 @@ func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid needle.VolumeId)
}
return false
}
-func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid needle.VolumeId) bool {
+func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid needle.VolumeId, isReadOnly bool) bool {
vl.accessLock.Lock()
defer vl.accessLock.Unlock()
+ vInfo, err := dn.GetVolumesById(vid)
+ if err != nil {
+ return false
+ }
+
vl.vid2location[vid].Set(dn)
- if vl.vid2location[vid].Length() == vl.rp.GetCopyCount() {
+
+ if vInfo.ReadOnly || isReadOnly {
+ return false
+ }
+
+ if vl.enoughCopies(vid) {
return vl.setVolumeWritable(vid)
}
return false
}
+func (vl *VolumeLayout) enoughCopies(vid needle.VolumeId) bool {
+ locations := vl.vid2location[vid].Length()
+ desired := vl.rp.GetCopyCount()
+ return locations == desired || (vl.replicationAsMin && locations > desired)
+}
+
func (vl *VolumeLayout) SetVolumeCapacityFull(vid needle.VolumeId) bool {
vl.accessLock.Lock()
defer vl.accessLock.Unlock()
@@ -306,10 +429,10 @@ func (vl *VolumeLayout) Stats() *VolumeLayoutStats {
size, fileCount := vll.Stats(vid, freshThreshold)
ret.FileCount += uint64(fileCount)
ret.UsedSize += size
- if vl.readonlyVolumes[vid] {
+ if vl.readonlyVolumes.IsTrue(vid) {
ret.TotalSize += size
} else {
- ret.TotalSize += vl.volumeSizeLimit
+ ret.TotalSize += vl.volumeSizeLimit * uint64(vll.Length())
}
}
diff --git a/weed/topology/volume_layout_test.go b/weed/topology/volume_layout_test.go
new file mode 100644
index 000000000..e148d6107
--- /dev/null
+++ b/weed/topology/volume_layout_test.go
@@ -0,0 +1,116 @@
+package topology
+
+import (
+ "testing"
+
+ "github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/super_block"
+)
+
+func TestVolumesBinaryState(t *testing.T) {
+ vids := []needle.VolumeId{
+ needle.VolumeId(1),
+ needle.VolumeId(2),
+ needle.VolumeId(3),
+ needle.VolumeId(4),
+ needle.VolumeId(5),
+ }
+
+ dns := []*DataNode{
+ &DataNode{
+ Ip: "127.0.0.1",
+ Port: 8081,
+ },
+ &DataNode{
+ Ip: "127.0.0.1",
+ Port: 8082,
+ },
+ &DataNode{
+ Ip: "127.0.0.1",
+ Port: 8083,
+ },
+ }
+
+ rp, _ := super_block.NewReplicaPlacementFromString("002")
+
+ state_exist := NewVolumesBinaryState(readOnlyState, rp, ExistCopies())
+ state_exist.Add(vids[0], dns[0])
+ state_exist.Add(vids[0], dns[1])
+ state_exist.Add(vids[1], dns[2])
+ state_exist.Add(vids[2], dns[1])
+ state_exist.Add(vids[4], dns[1])
+ state_exist.Add(vids[4], dns[2])
+
+ state_no := NewVolumesBinaryState(readOnlyState, rp, NoCopies())
+ state_no.Add(vids[0], dns[0])
+ state_no.Add(vids[0], dns[1])
+ state_no.Add(vids[3], dns[1])
+
+ tests := []struct {
+ name string
+ state *volumesBinaryState
+ expectResult []bool
+ update func()
+ expectResultAfterUpdate []bool
+ }{
+ {
+ name: "mark true when exist copies",
+ state: state_exist,
+ expectResult: []bool{true, true, true, false, true},
+ update: func() {
+ state_exist.Remove(vids[0], dns[2])
+ state_exist.Remove(vids[1], dns[2])
+ state_exist.Remove(vids[3], dns[2])
+ state_exist.Remove(vids[4], dns[1])
+ state_exist.Remove(vids[4], dns[2])
+ },
+ expectResultAfterUpdate: []bool{true, false, true, false, false},
+ },
+ {
+ name: "mark true when inexist copies",
+ state: state_no,
+ expectResult: []bool{false, true, true, false, true},
+ update: func() {
+ state_no.Remove(vids[0], dns[2])
+ state_no.Remove(vids[1], dns[2])
+ state_no.Add(vids[2], dns[1])
+ state_no.Remove(vids[3], dns[1])
+ state_no.Remove(vids[4], dns[2])
+ },
+ expectResultAfterUpdate: []bool{false, true, false, true, true},
+ },
+ }
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ var result []bool
+ for index, _ := range vids {
+ result = append(result, test.state.IsTrue(vids[index]))
+ }
+ if len(result) != len(test.expectResult) {
+ t.Fatalf("len(result) != len(expectResult), got %d, expected %d\n",
+ len(result), len(test.expectResult))
+ }
+ for index, val := range result {
+ if val != test.expectResult[index] {
+ t.Fatalf("result not matched, index %d, got %v, expect %v\n",
+ index, val, test.expectResult[index])
+ }
+ }
+ test.update()
+ var updateResult []bool
+ for index, _ := range vids {
+ updateResult = append(updateResult, test.state.IsTrue(vids[index]))
+ }
+ if len(updateResult) != len(test.expectResultAfterUpdate) {
+ t.Fatalf("len(updateResult) != len(expectResultAfterUpdate), got %d, expected %d\n",
+ len(updateResult), len(test.expectResultAfterUpdate))
+ }
+ for index, val := range updateResult {
+ if val != test.expectResultAfterUpdate[index] {
+ t.Fatalf("update result not matched, index %d, got %v, expect %v\n",
+ index, val, test.expectResultAfterUpdate[index])
+ }
+ }
+ })
+ }
+}
diff --git a/weed/topology/volume_location_list.go b/weed/topology/volume_location_list.go
index 8905c54b5..548c4cd25 100644
--- a/weed/topology/volume_location_list.go
+++ b/weed/topology/volume_location_list.go
@@ -18,12 +18,23 @@ func (dnll *VolumeLocationList) String() string {
return fmt.Sprintf("%v", dnll.list)
}
+func (dnll *VolumeLocationList) Copy() *VolumeLocationList {
+ list := make([]*DataNode, len(dnll.list))
+ copy(list, dnll.list)
+ return &VolumeLocationList{
+ list: list,
+ }
+}
+
func (dnll *VolumeLocationList) Head() *DataNode {
//mark first node as master volume
return dnll.list[0]
}
func (dnll *VolumeLocationList) Length() int {
+ if dnll == nil {
+ return 0
+ }
return len(dnll.list)
}
@@ -71,7 +82,7 @@ func (dnll *VolumeLocationList) Stats(vid needle.VolumeId, freshThreshHold int64
if dnl.LastSeen < freshThreshHold {
vinfo, err := dnl.GetVolumesById(vid)
if err == nil {
- return vinfo.Size - vinfo.DeletedByteCount, vinfo.FileCount - vinfo.DeleteCount
+ return (vinfo.Size - vinfo.DeletedByteCount) * uint64(len(dnll.list)), vinfo.FileCount - vinfo.DeleteCount
}
}
}