aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lu <chrislusf@users.noreply.github.com>2022-07-20 09:06:06 -0700
committerGitHub <noreply@github.com>2022-07-20 09:06:06 -0700
commit9ec0d1caaa1d7e7c9d19951aa0689eb47da8b8fa (patch)
tree0aa52231e120c1b4033a0f64cd8fab742c7142ac
parentefec31de8df321b01c0e20085d0a10e430af2ed6 (diff)
parentac694f0c8f2b5456bd175e1288023f0bdb5b7a14 (diff)
downloadseaweedfs-9ec0d1caaa1d7e7c9d19951aa0689eb47da8b8fa.tar.xz
seaweedfs-9ec0d1caaa1d7e7c9d19951aa0689eb47da8b8fa.zip
Merge pull request #3337 from guol-fnst/loading_volume
-rw-r--r--unmaintained/diff_volume_servers/diff_volume_servers.go9
-rw-r--r--unmaintained/see_idx/see_idx.go5
-rw-r--r--weed/storage/erasure_coding/ec_encoder.go2
-rw-r--r--weed/storage/idx/walk.go4
-rw-r--r--weed/storage/needle_map/memdb.go2
-rw-r--r--weed/storage/needle_map_leveldb.go89
-rw-r--r--weed/storage/needle_map_memory.go2
7 files changed, 93 insertions, 20 deletions
diff --git a/unmaintained/diff_volume_servers/diff_volume_servers.go b/unmaintained/diff_volume_servers/diff_volume_servers.go
index 0188d18d4..815eeae54 100644
--- a/unmaintained/diff_volume_servers/diff_volume_servers.go
+++ b/unmaintained/diff_volume_servers/diff_volume_servers.go
@@ -6,6 +6,10 @@ import (
"errors"
"flag"
"fmt"
+ "io"
+ "math"
+ "os"
+
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb"
@@ -16,9 +20,6 @@ import (
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/chrislusf/seaweedfs/weed/util"
"google.golang.org/grpc"
- "io"
- "math"
- "os"
)
var (
@@ -155,7 +156,7 @@ func getVolumeFiles(v uint32, addr pb.ServerAddress) (map[types.NeedleId]needleS
var maxOffset int64
files := map[types.NeedleId]needleState{}
- err = idx.WalkIndexFile(idxFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
+ err = idx.WalkIndexFile(idxFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if offset.IsZero() || size.IsDeleted() {
files[key] = needleState{
state: stateDeleted,
diff --git a/unmaintained/see_idx/see_idx.go b/unmaintained/see_idx/see_idx.go
index 22c659351..616263b1c 100644
--- a/unmaintained/see_idx/see_idx.go
+++ b/unmaintained/see_idx/see_idx.go
@@ -3,11 +3,12 @@ package main
import (
"flag"
"fmt"
- "github.com/chrislusf/seaweedfs/weed/util"
"os"
"path"
"strconv"
+ "github.com/chrislusf/seaweedfs/weed/util"
+
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage/idx"
"github.com/chrislusf/seaweedfs/weed/storage/types"
@@ -36,7 +37,7 @@ func main() {
}
defer indexFile.Close()
- idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
+ idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error {
fmt.Printf("key:%v offset:%v size:%v(%v)\n", key, offset, size, util.BytesToHumanReadable(uint64(size)))
return nil
})
diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go
index 157149865..ea331ca39 100644
--- a/weed/storage/erasure_coding/ec_encoder.go
+++ b/weed/storage/erasure_coding/ec_encoder.go
@@ -294,7 +294,7 @@ func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) {
defer indexFile.Close()
cm := needle_map.NewMemDb()
- err = idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size types.Size) error {
+ err = idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if !offset.IsZero() && size != types.TombstoneFileSize {
cm.Set(key, offset, size)
} else {
diff --git a/weed/storage/idx/walk.go b/weed/storage/idx/walk.go
index 5215d3c4f..70d3855ea 100644
--- a/weed/storage/idx/walk.go
+++ b/weed/storage/idx/walk.go
@@ -9,8 +9,8 @@ import (
// walks through the index file, calls fn function with each key, offset, size
// stops with the error returned by the fn function
-func WalkIndexFile(r io.ReaderAt, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error {
- var readerOffset int64
+func WalkIndexFile(r io.ReaderAt, startFrom uint64, fn func(key types.NeedleId, offset types.Offset, size types.Size) error) error {
+ readerOffset := int64(startFrom * types.NeedleMapEntrySize)
bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead)
count, e := r.ReadAt(bytes, readerOffset)
if count == 0 && e == io.EOF {
diff --git a/weed/storage/needle_map/memdb.go b/weed/storage/needle_map/memdb.go
index ba1fd3d1e..a362a85ae 100644
--- a/weed/storage/needle_map/memdb.go
+++ b/weed/storage/needle_map/memdb.go
@@ -111,7 +111,7 @@ func (cm *MemDb) LoadFromIdx(idxName string) (ret error) {
func (cm *MemDb) LoadFromReaderAt(readerAt io.ReaderAt) (ret error) {
- return idx.WalkIndexFile(readerAt, func(key NeedleId, offset Offset, size Size) error {
+ return idx.WalkIndexFile(readerAt, 0, func(key NeedleId, offset Offset, size Size) error {
if offset.IsZero() || size.IsDeleted() {
return cm.Delete(key)
}
diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go
index 31c86d124..a1934b8f1 100644
--- a/weed/storage/needle_map_leveldb.go
+++ b/weed/storage/needle_map_leveldb.go
@@ -9,6 +9,8 @@ import (
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/chrislusf/seaweedfs/weed/storage/idx"
+ "github.com/chrislusf/seaweedfs/weed/storage/types"
+ "github.com/chrislusf/seaweedfs/weed/util"
"github.com/syndtr/goleveldb/leveldb"
@@ -17,10 +19,16 @@ import (
. "github.com/chrislusf/seaweedfs/weed/storage/types"
)
+//mark it every watermarkBatchSize operations
+const watermarkBatchSize = 10000
+
+var watermarkKey = []byte("idx_entry_watermark")
+
type LevelDbNeedleMap struct {
baseNeedleMapper
- dbFileName string
- db *leveldb.DB
+ dbFileName string
+ db *leveldb.DB
+ recordCount uint64
}
func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options) (m *LevelDbNeedleMap, err error) {
@@ -46,7 +54,14 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Option
return
}
}
- glog.V(1).Infof("Loading %s...", indexFile.Name())
+ glog.V(0).Infof("Loading %s... , watermark: %d", dbFileName, getWatermark(m.db))
+ m.recordCount = uint64(m.indexFileOffset / types.NeedleMapEntrySize)
+ watermark := (m.recordCount / watermarkBatchSize) * watermarkBatchSize
+ err = setWatermark(m.db, watermark)
+ if err != nil {
+ glog.Fatalf("set watermark for %s error: %s\n", dbFileName, err)
+ return
+ }
mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile)
if indexLoadError != nil {
return nil, indexLoadError
@@ -78,9 +93,20 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error {
return err
}
defer db.Close()
- return idx.WalkIndexFile(indexFile, func(key NeedleId, offset Offset, size Size) error {
+
+ watermark := getWatermark(db)
+ if stat, err := indexFile.Stat(); err != nil {
+ glog.Fatalf("stat file %s: %v", indexFile.Name(), err)
+ return err
+ } else {
+ if watermark*types.NeedleMapEntrySize > uint64(stat.Size()) {
+ glog.Warningf("wrong watermark %d for filesize %d", watermark, stat.Size())
+ }
+ glog.V(0).Infof("generateLevelDbFile %s, watermark %d, num of entries:%d", dbFileName, watermark, (uint64(stat.Size())-watermark*types.NeedleMapEntrySize)/types.NeedleMapEntrySize)
+ }
+ return idx.WalkIndexFile(indexFile, watermark, func(key NeedleId, offset Offset, size Size) error {
if !offset.IsZero() && size.IsValid() {
- levelDbWrite(db, key, offset, size)
+ levelDbWrite(db, key, offset, size, false, 0)
} else {
levelDbDelete(db, key)
}
@@ -102,6 +128,7 @@ func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, o
func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error {
var oldSize Size
+ var watermark uint64
if oldNeedle, ok := m.Get(key); ok {
oldSize = oldNeedle.Size
}
@@ -110,16 +137,54 @@ func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size Size) error {
if err := m.appendToIndexFile(key, offset, size); err != nil {
return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err)
}
- return levelDbWrite(m.db, key, offset, size)
+ m.recordCount++
+ if m.recordCount%watermarkBatchSize != 0 {
+ watermark = 0
+ } else {
+ watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize
+ glog.V(1).Infof("put cnt:%d for %s,watermark: %d", m.recordCount, m.dbFileName, watermark)
+ }
+ return levelDbWrite(m.db, key, offset, size, watermark == 0, watermark)
+}
+
+func getWatermark(db *leveldb.DB) uint64 {
+ data, err := db.Get(watermarkKey, nil)
+ if err != nil || len(data) != 8 {
+ glog.Warningf("get watermark from db error: %v, %d", err, len(data))
+ /*
+ if !strings.Contains(strings.ToLower(err.Error()), "not found") {
+ err = setWatermark(db, 0)
+ if err != nil {
+ glog.Errorf("failed to set watermark: %v", err)
+ }
+ }
+ */
+ return 0
+ }
+ return util.BytesToUint64(data)
}
-func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size) error {
+func setWatermark(db *leveldb.DB, watermark uint64) error {
+ glog.V(1).Infof("set watermark %d", watermark)
+ var wmBytes = make([]byte, 8)
+ util.Uint64toBytes(wmBytes, watermark)
+ if err := db.Put(watermarkKey, wmBytes, nil); err != nil {
+ return fmt.Errorf("failed to setWatermark: %v", err)
+ }
+ return nil
+}
+
+func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size Size, updateWatermark bool, watermark uint64) error {
bytes := needle_map.ToBytes(key, offset, size)
if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil {
return fmt.Errorf("failed to write leveldb: %v", err)
}
+ // set watermark
+ if updateWatermark {
+ return setWatermark(db, watermark)
+ }
return nil
}
func levelDbDelete(db *leveldb.DB, key NeedleId) error {
@@ -129,6 +194,7 @@ func levelDbDelete(db *leveldb.DB, key NeedleId) error {
}
func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error {
+ var watermark uint64
oldNeedle, found := m.Get(key)
if !found || oldNeedle.Size.IsDeleted() {
return nil
@@ -139,8 +205,13 @@ func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error {
if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil {
return err
}
-
- return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size)
+ m.recordCount++
+ if m.recordCount%watermarkBatchSize != 0 {
+ watermark = 0
+ } else {
+ watermark = (m.recordCount / watermarkBatchSize) * watermarkBatchSize
+ }
+ return levelDbWrite(m.db, key, oldNeedle.Offset, -oldNeedle.Size, watermark == 0, watermark)
}
func (m *LevelDbNeedleMap) Close() {
diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go
index 1b58708c6..4c7909dbd 100644
--- a/weed/storage/needle_map_memory.go
+++ b/weed/storage/needle_map_memory.go
@@ -33,7 +33,7 @@ func LoadCompactNeedleMap(file *os.File) (*NeedleMap, error) {
}
func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) {
- e := idx.WalkIndexFile(file, func(key NeedleId, offset Offset, size Size) error {
+ e := idx.WalkIndexFile(file, 0, func(key NeedleId, offset Offset, size Size) error {
nm.MaybeSetMaxFileKey(key)
if !offset.IsZero() && size.IsValid() {
nm.FileCounter++