aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lu <chris.lu@gmail.com>2020-01-22 23:00:04 -0800
committerChris Lu <chris.lu@gmail.com>2020-01-22 23:00:04 -0800
commitc2e589f202b84b5beb98f1f5c243cc38b58f232b (patch)
tree8488688a1812620705e59bee378b0530e1654cb9
parent6a5c0370995653621fa8b576ea149e91875938d6 (diff)
downloadseaweedfs-c2e589f202b84b5beb98f1f5c243cc38b58f232b.tar.xz
seaweedfs-c2e589f202b84b5beb98f1f5c243cc38b58f232b.zip
mount: better combines connected intervals to write to volume servers
-rw-r--r--weed/filesys/dirty_page.go161
-rw-r--r--weed/filesys/dirty_page_interval.go190
-rw-r--r--weed/filesys/file.go6
-rw-r--r--weed/filesys/filehandle.go15
4 files changed, 259 insertions, 113 deletions
diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go
index 3418dc1c9..f1532a6a0 100644
--- a/weed/filesys/dirty_page.go
+++ b/weed/filesys/dirty_page.go
@@ -4,8 +4,8 @@ import (
"bytes"
"context"
"fmt"
+ "io"
"sync"
- "sync/atomic"
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
@@ -15,28 +15,19 @@ import (
)
type ContinuousDirtyPages struct {
- hasData bool
- Offset int64
- Size int64
- Data []byte
- f *File
- lock sync.Mutex
+ intervals *ContinuousIntervals
+ f *File
+ lock sync.Mutex
}
func newDirtyPages(file *File) *ContinuousDirtyPages {
return &ContinuousDirtyPages{
- Data: nil,
- f: file,
+ intervals: &ContinuousIntervals{},
+ f: file,
}
}
func (pages *ContinuousDirtyPages) releaseResource() {
- if pages.Data != nil {
- pages.f.wfs.bufPool.Put(pages.Data)
- pages.Data = nil
- atomic.AddInt32(&counter, -1)
- glog.V(3).Infof("%s/%s releasing resource %d", pages.f.dir.Path, pages.f.Name, counter)
- }
}
var counter = int32(0)
@@ -46,84 +37,49 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da
pages.lock.Lock()
defer pages.lock.Unlock()
- var chunk *filer_pb.FileChunk
+ glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data)))
if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) {
// this is more than what buffer can hold.
return pages.flushAndSave(ctx, offset, data)
}
- if pages.Data == nil {
- pages.Data = pages.f.wfs.bufPool.Get().([]byte)
- atomic.AddInt32(&counter, 1)
- glog.V(3).Infof("%s/%s acquire resource %d", pages.f.dir.Path, pages.f.Name, counter)
- }
-
- if offset < pages.Offset || offset >= pages.Offset+int64(len(pages.Data)) ||
- pages.Offset+int64(len(pages.Data)) < offset+int64(len(data)) {
- // if the data is out of range,
- // or buffer is full if adding new data,
- // flush current buffer and add new data
-
- glog.V(4).Infof("offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
-
- if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil {
- if chunk != nil {
- glog.V(4).Infof("%s/%s add save [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size))
- chunks = append(chunks, chunk)
- }
- } else {
- glog.V(0).Infof("%s/%s add save [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err)
- return
- }
- pages.Offset = offset
- glog.V(4).Infof("copy data0: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
- copy(pages.Data, data)
- pages.Size = int64(len(data))
+ hasOverlap := pages.intervals.AddInterval(data, offset)
+ if hasOverlap {
+ chunks, err = pages.saveExistingPagesToStorage(ctx)
+ pages.intervals.AddInterval(data, offset)
return
}
- if offset != pages.Offset+pages.Size {
- // when this happens, debug shows the data overlapping with existing data is empty
- // the data is not just append
- if offset == pages.Offset && int(pages.Size) < len(data) {
- glog.V(4).Infof("copy data1: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
- copy(pages.Data[pages.Size:], data[pages.Size:])
- } else {
- if pages.Size != 0 {
- glog.V(1).Infof("%s/%s add page: pages [%d, %d) write [%d, %d)", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Offset+pages.Size, offset, offset+int64(len(data)))
- }
- return pages.flushAndSave(ctx, offset, data)
+ var chunk *filer_pb.FileChunk
+ var hasSavedData bool
+
+ if pages.intervals.TotalSize() > pages.f.wfs.option.ChunkSizeLimit {
+ chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx)
+ if hasSavedData {
+ chunks = append(chunks, chunk)
}
- } else {
- glog.V(4).Infof("copy data2: offset=%d, size=%d, existing pages offset=%d, pages size=%d, data=%d", offset, len(data), pages.Offset, pages.Size, len(pages.Data))
- copy(pages.Data[offset-pages.Offset:], data)
}
- pages.Size = max(pages.Size, offset+int64(len(data))-pages.Offset)
-
return
}
func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) {
var chunk *filer_pb.FileChunk
+ var newChunks []*filer_pb.FileChunk
// flush existing
- if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil {
- if chunk != nil {
- glog.V(4).Infof("%s/%s flush existing [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId)
- chunks = append(chunks, chunk)
+ if newChunks, err = pages.saveExistingPagesToStorage(ctx); err == nil {
+ if newChunks != nil {
+ chunks = append(chunks, newChunks...)
}
} else {
- glog.V(0).Infof("%s/%s failed to flush1 [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err)
return
}
- pages.Size = 0
- pages.Offset = 0
// flush the new page
- if chunk, err = pages.saveToStorage(ctx, data, offset); err == nil {
+ if chunk, err = pages.saveToStorage(ctx, bytes.NewReader(data), offset, int64(len(data))); err == nil {
if chunk != nil {
glog.V(4).Infof("%s/%s flush big request [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId)
chunks = append(chunks, chunk)
@@ -136,37 +92,55 @@ func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int6
return
}
-func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, err error) {
+func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) {
pages.lock.Lock()
defer pages.lock.Unlock()
- if pages.Size == 0 {
- return nil, nil
- }
+ return pages.saveExistingPagesToStorage(ctx)
+}
- if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil {
- pages.Size = 0
- pages.Offset = 0
- if chunk != nil {
- glog.V(4).Infof("%s/%s flush [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size))
+func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (chunks []*filer_pb.FileChunk, err error) {
+
+ var hasSavedData bool
+ var chunk *filer_pb.FileChunk
+
+ for {
+
+ chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage(ctx)
+ if !hasSavedData {
+ return chunks, err
+ }
+
+ if err == nil {
+ chunks = append(chunks, chunk)
+ } else {
+ return
}
}
- return
+
}
-func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (*filer_pb.FileChunk, error) {
+func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, hasSavedData bool, err error) {
- if pages.Size == 0 {
- return nil, nil
+ maxList := pages.intervals.RemoveLargestIntervalLinkedList()
+ if maxList == nil {
+ return nil, false, nil
}
- glog.V(0).Infof("%s/%s saveExistingPagesToStorage [%d,%d): Data len=%d", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Size, len(pages.Data))
+ chunk, err = pages.saveToStorage(ctx, maxList.ToReader(), maxList.Offset(), maxList.Size())
+ if err == nil {
+ hasSavedData = true
+ glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId)
+ } else {
+ glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err)
+ return
+ }
- return pages.saveToStorage(ctx, pages.Data[:pages.Size], pages.Offset)
+ return
}
-func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte, offset int64) (*filer_pb.FileChunk, error) {
+func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, reader io.Reader, offset int64, size int64) (*filer_pb.FileChunk, error) {
var fileId, host string
var auth security.EncodedJwt
@@ -195,8 +169,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte
}
fileUrl := fmt.Sprintf("http://%s/%s", host, fileId)
- bufReader := bytes.NewReader(buf)
- uploadResult, err := operation.Upload(fileUrl, pages.f.Name, bufReader, false, "", nil, auth)
+ uploadResult, err := operation.Upload(fileUrl, pages.f.Name, reader, false, "", nil, auth)
if err != nil {
glog.V(0).Infof("upload data %v to %s: %v", pages.f.Name, fileUrl, err)
return nil, fmt.Errorf("upload data: %v", err)
@@ -209,7 +182,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte
return &filer_pb.FileChunk{
FileId: fileId,
Offset: offset,
- Size: uint64(len(buf)),
+ Size: uint64(size),
Mtime: time.Now().UnixNano(),
ETag: uploadResult.ETag,
}, nil
@@ -229,23 +202,11 @@ func min(x, y int64) int64 {
return y
}
-func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int, err error) {
- bufSize := int64(len(data))
+func (pages *ContinuousDirtyPages) ReadDirtyData(ctx context.Context, data []byte, startOffset int64) (offset int64, size int) {
pages.lock.Lock()
defer pages.lock.Unlock()
- if startOffset+bufSize < pages.Offset {
- return
- }
- if startOffset >= pages.Offset+pages.Size {
- return
- }
-
- offset = max(pages.Offset, startOffset)
- stopOffset := min(pages.Offset+pages.Size, startOffset+bufSize)
- size = int(stopOffset - offset)
- copy(data[offset-startOffset:], pages.Data[offset-pages.Offset:stopOffset-pages.Offset])
+ return pages.intervals.ReadData(data, startOffset)
- return
}
diff --git a/weed/filesys/dirty_page_interval.go b/weed/filesys/dirty_page_interval.go
new file mode 100644
index 000000000..5c55268c7
--- /dev/null
+++ b/weed/filesys/dirty_page_interval.go
@@ -0,0 +1,190 @@
+package filesys
+
+import (
+ "bytes"
+ "io"
+ "math"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type IntervalNode struct {
+ Data []byte
+ Offset int64
+ Size int64
+ Next *IntervalNode
+}
+
+type IntervalLinkedList struct {
+ Head *IntervalNode
+ Tail *IntervalNode
+}
+
+type ContinuousIntervals struct {
+ lists []*IntervalLinkedList
+}
+
+func (list *IntervalLinkedList) Offset() int64 {
+ return list.Head.Offset
+}
+func (list *IntervalLinkedList) Size() int64 {
+ return list.Tail.Offset + list.Tail.Size - list.Head.Offset
+}
+func (list *IntervalLinkedList) addNodeToTail(node *IntervalNode) {
+ // glog.V(0).Infof("add to tail [%d,%d) + [%d,%d) => [%d,%d)", list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, node.Offset+node.Size, list.Head.Offset, node.Offset+node.Size)
+ list.Tail.Next = node
+ list.Tail = node
+}
+func (list *IntervalLinkedList) addNodeToHead(node *IntervalNode) {
+ // glog.V(0).Infof("add to head [%d,%d) + [%d,%d) => [%d,%d)", node.Offset, node.Offset+node.Size, list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, list.Tail.Offset+list.Tail.Size)
+ node.Next = list.Head
+ list.Head = node
+}
+
+func (list *IntervalLinkedList) ReadData(buf []byte, start, stop int64) {
+ t := list.Head
+ for {
+
+ nodeStart, nodeStop := max(start, t.Offset), min(stop, t.Offset+t.Size)
+ if nodeStart < nodeStop {
+ glog.V(0).Infof("copying start=%d stop=%d t=[%d,%d) t.data=%d => bufSize=%d nodeStart=%d, nodeStop=%d",
+ start, stop, t.Offset, t.Offset+t.Size, len(t.Data),
+ len(buf), nodeStart, nodeStop)
+ copy(buf[nodeStart-start:], t.Data[nodeStart-t.Offset:nodeStop-t.Offset])
+ }
+
+ if t.Next == nil {
+ break
+ }
+ t = t.Next
+ }
+}
+
+func (c *ContinuousIntervals) TotalSize() (total int64) {
+ for _, list := range c.lists {
+ total += list.Size()
+ }
+ return
+}
+
+func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap bool) {
+ interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))}
+
+ var prevList, nextList *IntervalLinkedList
+
+ for _, list := range c.lists {
+ if list.Head.Offset == interval.Offset+interval.Size {
+ nextList = list
+ break
+ }
+ }
+
+ for _, list := range c.lists {
+ if list.Head.Offset+list.Size() == offset {
+ list.addNodeToTail(interval)
+ prevList = list
+ break
+ }
+ if list.Head.Offset <= offset && offset < list.Head.Offset+list.Size() {
+ if list.Tail.Offset <= offset {
+ dataStartIndex := list.Tail.Offset + list.Tail.Size - offset
+ // glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):]))
+ interval.Data = interval.Data[dataStartIndex:]
+ interval.Size -= dataStartIndex
+ interval.Offset = offset + dataStartIndex
+ // glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
+ list.addNodeToTail(interval)
+ prevList = list
+ break
+ }
+ glog.V(4).Infof("overlapped! interval is [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
+ hasOverlap = true
+ return
+ }
+ }
+
+ if prevList != nil && nextList != nil {
+ // glog.V(4).Infof("connecting [%d,%d) + [%d,%d) => [%d,%d)", prevList.Head.Offset, prevList.Tail.Offset+prevList.Tail.Size, nextList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size, prevList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size)
+ prevList.Tail.Next = nextList.Head
+ prevList.Tail = nextList.Tail
+ c.removeList(nextList)
+ } else if nextList != nil {
+ // add to head was not done when checking
+ nextList.addNodeToHead(interval)
+ }
+ if prevList == nil && nextList == nil {
+ c.lists = append(c.lists, &IntervalLinkedList{
+ Head: interval,
+ Tail: interval,
+ })
+ }
+
+ return
+}
+
+func (c *ContinuousIntervals) RemoveLargestIntervalLinkedList() *IntervalLinkedList {
+ var maxSize int64
+ maxIndex := -1
+ for k, list := range c.lists {
+ if maxSize <= list.Size() {
+ maxSize = list.Size()
+ maxIndex = k
+ }
+ }
+ if maxSize <= 0 {
+ return nil
+ }
+
+ t := c.lists[maxIndex]
+ c.lists = append(c.lists[0:maxIndex], c.lists[maxIndex+1:]...)
+ return t
+
+}
+
+func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) {
+ index := -1
+ for k, list := range c.lists {
+ if list.Offset() == target.Offset() {
+ index = k
+ }
+ }
+ if index < 0 {
+ return
+ }
+
+ c.lists = append(c.lists[0:index], c.lists[index+1:]...)
+
+}
+
+func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) {
+ var minOffset int64 = math.MaxInt64
+ var maxStop int64
+ for _, list := range c.lists {
+ start := max(startOffset, list.Offset())
+ stop := min(startOffset+int64(len(data)), list.Offset()+list.Size())
+ if start <= stop {
+ list.ReadData(data[start-startOffset:], start, stop)
+ minOffset = min(minOffset, start)
+ maxStop = max(maxStop, stop)
+ }
+ }
+
+ if minOffset == math.MaxInt64 {
+ return 0, 0
+ }
+
+ offset = minOffset
+ size = int(maxStop - offset)
+ return
+}
+
+func (l *IntervalLinkedList) ToReader() io.Reader {
+ var readers []io.Reader
+ t := l.Head
+ readers = append(readers, bytes.NewReader(t.Data))
+ for t.Next != nil {
+ t = t.Next
+ readers = append(readers, bytes.NewReader(t.Data))
+ }
+ return io.MultiReader(readers...)
+}
diff --git a/weed/filesys/file.go b/weed/filesys/file.go
index b1d53507b..5a823f516 100644
--- a/weed/filesys/file.go
+++ b/weed/filesys/file.go
@@ -230,12 +230,6 @@ func (file *File) maybeLoadEntry(ctx context.Context) error {
return nil
}
-func (file *File) addChunk(chunk *filer_pb.FileChunk) {
- if chunk != nil {
- file.addChunks([]*filer_pb.FileChunk{chunk})
- }
-}
-
func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
sort.Slice(chunks, func(i, j int) bool {
diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go
index 981de7ea2..c3f06ae8a 100644
--- a/weed/filesys/filehandle.go
+++ b/weed/filesys/filehandle.go
@@ -55,8 +55,8 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
totalRead, err := fh.readFromChunks(ctx, buff, req.Offset)
if err == nil {
- dirtyOffset, dirtySize, dirtyReadErr := fh.readFromDirtyPages(ctx, buff, req.Offset)
- if dirtyReadErr == nil && totalRead+req.Offset < dirtyOffset+int64(dirtySize) {
+ dirtyOffset, dirtySize := fh.readFromDirtyPages(ctx, buff, req.Offset)
+ if totalRead+req.Offset < dirtyOffset+int64(dirtySize) {
totalRead = dirtyOffset + int64(dirtySize) - req.Offset
}
}
@@ -70,7 +70,7 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
return err
}
-func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int, err error) {
+func (fh *FileHandle) readFromDirtyPages(ctx context.Context, buff []byte, startOffset int64) (offset int64, size int) {
return fh.dirtyPages.ReadDirtyData(ctx, buff, startOffset)
}
@@ -102,8 +102,6 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
// write the request to volume servers
- glog.V(4).Infof("%+v/%v write fh %d: [%d,%d)", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)))
-
chunks, err := fh.dirtyPages.AddPage(ctx, req.Offset, req.Data)
if err != nil {
glog.Errorf("%+v/%v write fh %d: [%d,%d): %v", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err)
@@ -152,13 +150,16 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error {
// send the data to the OS
glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req)
- chunk, err := fh.dirtyPages.FlushToStorage(ctx)
+ chunks, err := fh.dirtyPages.FlushToStorage(ctx)
if err != nil {
glog.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err)
return fmt.Errorf("flush %s/%s: %v", fh.f.dir.Path, fh.f.Name, err)
}
- fh.f.addChunk(chunk)
+ fh.f.addChunks(chunks)
+ if len(chunks) > 0 {
+ fh.dirtyMetadata = true
+ }
if !fh.dirtyMetadata {
return nil