aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lu <chris.lu@gmail.com>2012-09-29 16:07:24 -0700
committerChris Lu <chris.lu@gmail.com>2012-09-29 16:07:24 -0700
commit2fe43718994a034e655ec2684b3dcf5ab094eeb2 (patch)
tree603f3f8f3529df945271b3b1ee14f56bff90ed79
parent3a6c37aa6cc8ef77454c34175be75263a9111069 (diff)
downloadseaweedfs-2fe43718994a034e655ec2684b3dcf5ab094eeb2.tar.xz
seaweedfs-2fe43718994a034e655ec2684b3dcf5ab094eeb2.zip
change to a more memory efficient map, implemented by several lists of
<key,offset,size>
-rw-r--r--weed-fs/src/pkg/storage/compact_map.go158
-rw-r--r--weed-fs/src/pkg/storage/compact_map_perf_test.go43
-rw-r--r--weed-fs/src/pkg/storage/compact_map_test.go65
-rw-r--r--weed-fs/src/pkg/storage/needle_map.go36
-rw-r--r--weed-fs/src/pkg/storage/sample.idxbin0 -> 27140560 bytes
5 files changed, 280 insertions, 22 deletions
diff --git a/weed-fs/src/pkg/storage/compact_map.go b/weed-fs/src/pkg/storage/compact_map.go
new file mode 100644
index 000000000..4e83a76c8
--- /dev/null
+++ b/weed-fs/src/pkg/storage/compact_map.go
@@ -0,0 +1,158 @@
+package storage
+
+import ()
+
+type NeedleValue struct {
+ Key Key
+ Offset uint32 "Volume offset" //since aligned to 8 bytes, range is 4G*8=32G
+ Size uint32 "Size of the data portion"
+}
+
+const (
+ batch = 100000
+)
+
+type Key uint64
+
+type CompactSection struct {
+ values []NeedleValue
+ overflow map[Key]*NeedleValue
+ start Key
+ end Key
+ counter int
+}
+
+func NewCompactSection(start Key) CompactSection {
+ return CompactSection{
+ values: make([]NeedleValue, batch),
+ overflow: make(map[Key]*NeedleValue),
+ start: start,
+ }
+}
+func (cs *CompactSection) Set(key Key, offset uint32, size uint32) {
+ if key > cs.end {
+ cs.end = key
+ }
+ if i := cs.binarySearchValues(key); i >= 0 {
+ cs.values[i].Offset, cs.values[i].Size = offset, size
+ } else {
+ needOverflow := cs.counter >= batch
+ needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key
+ if needOverflow {
+ //println("start", cs.start, "counter", cs.counter, "key", key)
+ cs.overflow[key] = &NeedleValue{Key: key, Offset: offset, Size: size}
+ } else {
+ p := &cs.values[cs.counter]
+ p.Key, p.Offset, p.Size = key, offset, size
+ //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key)
+ cs.counter++
+ }
+ }
+}
+func (cs *CompactSection) Delete(key Key) {
+ if i := cs.binarySearchValues(key); i >= 0 {
+ cs.values[i].Size = 0
+ }
+ delete(cs.overflow, key)
+}
+func (cs *CompactSection) Get(key Key) (*NeedleValue, bool) {
+ if v, ok := cs.overflow[key]; ok {
+ return v, true
+ }
+ if i := cs.binarySearchValues(key); i >= 0 {
+ return &cs.values[i], true
+ }
+ return nil, false
+}
+func (cs *CompactSection) binarySearchValues(key Key) int {
+ l, h := 0, cs.counter-1
+ if h >= 0 && cs.values[h].Key < key {
+ return -2
+ }
+ //println("looking for key", key)
+ for l <= h {
+ m := (l + h) / 2
+ //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size)
+ if cs.values[m].Key < key {
+ l = m + 1
+ } else if key < cs.values[m].Key {
+ h = m - 1
+ } else {
+ //println("found", m)
+ return m
+ }
+ }
+ return -1
+}
+
+//This map assumes mostly inserting increasing keys
+type CompactMap struct {
+ list []CompactSection
+}
+
+func NewCompactMap() CompactMap {
+ return CompactMap{}
+}
+
+func (cm *CompactMap) Set(key Key, offset uint32, size uint32) {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ //println(x, "creating", len(cm.list), "section1, starting", key)
+ cm.list = append(cm.list, NewCompactSection(key))
+ x = len(cm.list) - 1
+ }
+ cm.list[x].Set(key, offset, size)
+}
+func (cm *CompactMap) Delete(key Key) {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ return
+ }
+ cm.list[x].Delete(key)
+}
+func (cm *CompactMap) Get(key Key) (*NeedleValue, bool) {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ return nil, false
+ }
+ return cm.list[x].Get(key)
+}
+func (cm *CompactMap) binarySearchCompactSection(key Key) int {
+ l, h := 0, len(cm.list)-1
+ if h < 0 {
+ return -5
+ }
+ if cm.list[h].start <= key {
+ if cm.list[h].counter < batch || key <= cm.list[h].end{
+ return h
+ } else {
+ return -4
+ }
+ }
+ for l <= h {
+ m := (l + h) / 2
+ if key < cm.list[m].start {
+ h = m - 1
+ } else { // cm.list[m].start <= key
+ if cm.list[m+1].start <= key {
+ l = m + 1
+ } else {
+ return m
+ }
+ }
+ }
+ return -3
+}
+
+func (cm *CompactMap) Peek() {
+ for k, v := range cm.list[0].values {
+ if k < 100 {
+ println("[", v.Key, v.Offset, v.Size, "]")
+ }
+ }
+ for k, v := range cm.list[0].overflow {
+ if k < 100 {
+ println("o[", v.Key, v.Offset, v.Size, "]")
+ }
+ }
+}
diff --git a/weed-fs/src/pkg/storage/compact_map_perf_test.go b/weed-fs/src/pkg/storage/compact_map_perf_test.go
new file mode 100644
index 000000000..2e2227279
--- /dev/null
+++ b/weed-fs/src/pkg/storage/compact_map_perf_test.go
@@ -0,0 +1,43 @@
+package storage
+
+import (
+ "testing"
+ "log"
+ "os"
+ "pkg/util"
+)
+
+func TestMemoryUsage(t *testing.T) {
+
+ indexFile, ie := os.OpenFile("sample.idx", os.O_RDWR|os.O_RDONLY, 0644)
+ if ie != nil {
+ log.Fatalln(ie)
+ }
+ LoadNewNeedleMap(indexFile)
+
+}
+
+func LoadNewNeedleMap(file *os.File) CompactMap {
+ m := NewCompactMap()
+ bytes := make([]byte, 16*1024)
+ count, e := file.Read(bytes)
+ if count > 0 {
+ fstat, _ := file.Stat()
+ log.Println("Loading index file", fstat.Name(), "size", fstat.Size())
+ }
+ for count > 0 && e == nil {
+ for i := 0; i < count; i += 16 {
+ key := util.BytesToUint64(bytes[i : i+8])
+ offset := util.BytesToUint32(bytes[i+8 : i+12])
+ size := util.BytesToUint32(bytes[i+12 : i+16])
+ if offset > 0 {
+ m.Set(Key(key), offset, size)
+ } else {
+ //delete(m, key)
+ }
+ }
+
+ count, e = file.Read(bytes)
+ }
+ return m
+}
diff --git a/weed-fs/src/pkg/storage/compact_map_test.go b/weed-fs/src/pkg/storage/compact_map_test.go
new file mode 100644
index 000000000..6c3bb6e96
--- /dev/null
+++ b/weed-fs/src/pkg/storage/compact_map_test.go
@@ -0,0 +1,65 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestXYZ(t *testing.T) {
+ m := NewCompactMap()
+ for i := uint32(0); i < 100*batch; i += 2 {
+ m.Set(Key(i), i, i)
+ }
+
+ for i := uint32(0); i < 100*batch; i += 37 {
+ m.Delete(Key(i))
+ }
+
+ for i := uint32(0); i < 10*batch; i += 3 {
+ m.Set(Key(i), i+11, i+5)
+ }
+
+// for i := uint32(0); i < 100; i++ {
+// if v := m.Get(Key(i)); v != nil {
+// println(i, "=", v.Key, v.Offset, v.Size)
+// }
+// }
+
+ for i := uint32(0); i < 10*batch; i++ {
+ v, ok := m.Get(Key(i))
+ if i%3 == 0 {
+ if !ok {
+ t.Fatal("key", i, "missing!")
+ }
+ if v.Size != i+5 {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ } else if i%37 == 0 {
+ if ok && v.Size > 0 {
+ t.Fatal("key", i, "should have been deleted needle value", v)
+ }
+ } else if i%2 == 0 {
+ if v.Size != i {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ }
+ }
+
+ //println("cm.list =", len(m.list))
+
+ for i := uint32(10 * batch); i < 100*batch; i++ {
+ v, ok := m.Get(Key(i))
+ if i%37 == 0 {
+ if ok && v.Size > 0 {
+ t.Fatal("key", i, "should have been deleted needle value", v)
+ }
+ } else if i%2 == 0 {
+ if v==nil{
+ t.Fatal("key", i, "missing")
+ }
+ if v.Size != i {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ }
+ }
+
+}
diff --git a/weed-fs/src/pkg/storage/needle_map.go b/weed-fs/src/pkg/storage/needle_map.go
index 4e6406d6a..53a640052 100644
--- a/weed-fs/src/pkg/storage/needle_map.go
+++ b/weed-fs/src/pkg/storage/needle_map.go
@@ -6,20 +6,15 @@ import (
"pkg/util"
)
-type NeedleValue struct {
- Offset uint32 "Volume offset" //since aligned to 8 bytes, range is 4G*8=32G
- Size uint32 "Size of the data portion"
-}
-
type NeedleMap struct {
indexFile *os.File
- m map[uint64]NeedleValue //mapping needle key(uint64) to NeedleValue
+ m CompactMap
bytes []byte
}
func NewNeedleMap(file *os.File) *NeedleMap {
nm := &NeedleMap{
- m: make(map[uint64]NeedleValue),
+ m: NewCompactMap(),
bytes: make([]byte, 16),
indexFile: file,
}
@@ -43,31 +38,31 @@ func LoadNeedleMap(file *os.File) *NeedleMap {
key := util.BytesToUint64(bytes[i : i+8])
offset := util.BytesToUint32(bytes[i+8 : i+12])
size := util.BytesToUint32(bytes[i+12 : i+16])
- if offset>0 {
- nm.m[key] = NeedleValue{util.Offset: offset, Size: size}
- }else{
- delete(nm.m, key)
- }
- }
-
+ if offset > 0 {
+ nm.m.Set(Key(key), offset, size)
+ } else {
+ nm.m.Delete(Key(key))
+ }
+ }
+
count, e = nm.indexFile.Read(bytes)
}
return nm
-}
+}
func (nm *NeedleMap) Put(key uint64, offset uint32, size uint32) (int, error) {
- nm.m[key] = NeedleValue{Offset: offset, Size: size}
+ nm.m.Set(Key(key), offset, size)
util.Uint64toBytes(nm.bytes[0:8], key)
util.Uint32toBytes(nm.bytes[8:12], offset)
util.Uint32toBytes(nm.bytes[12:16], size)
return nm.indexFile.Write(nm.bytes)
}
-func (nm *NeedleMap) Get(key uint64) (element NeedleValue, ok bool) {
- element, ok = nm.m[key]
+func (nm *NeedleMap) Get(key uint64) (element *NeedleValue, ok bool) {
+ element, ok = nm.m.Get(Key(key))
return
}
func (nm *NeedleMap) Delete(key uint64) {
- delete(nm.m, key)
+ nm.m.Delete(Key(key))
util.Uint64toBytes(nm.bytes[0:8], key)
util.Uint32toBytes(nm.bytes[8:12], 0)
util.Uint32toBytes(nm.bytes[12:16], 0)
@@ -76,6 +71,3 @@ func (nm *NeedleMap) Delete(key uint64) {
func (nm *NeedleMap) Close() {
nm.indexFile.Close()
}
-func (nm *NeedleMap) Length() int{
- return len(nm.m)
-}
diff --git a/weed-fs/src/pkg/storage/sample.idx b/weed-fs/src/pkg/storage/sample.idx
new file mode 100644
index 000000000..44918b41d
--- /dev/null
+++ b/weed-fs/src/pkg/storage/sample.idx
Binary files differ