aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lu <chris.lu@gmail.com>2020-01-26 13:01:11 -0800
committerChris Lu <chris.lu@gmail.com>2020-01-26 13:01:11 -0800
commit08e4b56a8abaaadb0701979e4bd857fb8c50c776 (patch)
tree3a00ef26c2fbf288383974cda6bd1400e0c77159
parent19a05ad1746f7489a8e4981c01a4b8d2f5573654 (diff)
downloadseaweedfs-08e4b56a8abaaadb0701979e4bd857fb8c50c776.tar.xz
seaweedfs-08e4b56a8abaaadb0701979e4bd857fb8c50c776.zip
mount: able to handle large git clone
-rw-r--r--weed/filesys/dirty_page.go7
-rw-r--r--weed/filesys/dirty_page_interval.go34
-rw-r--r--weed/filesys/dirty_page_interval_test.go40
3 files changed, 60 insertions, 21 deletions
diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go
index f1532a6a0..7a41e371e 100644
--- a/weed/filesys/dirty_page.go
+++ b/weed/filesys/dirty_page.go
@@ -44,12 +44,7 @@ func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, da
return pages.flushAndSave(ctx, offset, data)
}
- hasOverlap := pages.intervals.AddInterval(data, offset)
- if hasOverlap {
- chunks, err = pages.saveExistingPagesToStorage(ctx)
- pages.intervals.AddInterval(data, offset)
- return
- }
+ pages.intervals.AddInterval(data, offset)
var chunk *filer_pb.FileChunk
var hasSavedData bool
diff --git a/weed/filesys/dirty_page_interval.go b/weed/filesys/dirty_page_interval.go
index c64196cdf..77fab75ef 100644
--- a/weed/filesys/dirty_page_interval.go
+++ b/weed/filesys/dirty_page_interval.go
@@ -65,7 +65,10 @@ func (c *ContinuousIntervals) TotalSize() (total int64) {
return
}
-func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap bool) {
+func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) {
+
+ // TODO AddInterval needs to handle all possible out of order writes
+
interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))}
var prevList, nextList *IntervalLinkedList
@@ -75,6 +78,10 @@ func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap
nextList = list
break
}
+ if list.Head.Offset < interval.Offset+interval.Size && interval.Offset+interval.Size <= list.Head.Offset+list.Size() {
+ glog.V(0).Infof("unexpected [%d,%d) overlaps [%d,%d)", interval.Offset, interval.Offset+interval.Size, list.Head.Offset, list.Head.Offset+list.Size())
+ break
+ }
}
for _, list := range c.lists {
@@ -84,20 +91,17 @@ func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) (hasOverlap
break
}
if list.Head.Offset <= offset && offset < list.Head.Offset+list.Size() {
- if list.Tail.Offset <= offset {
- dataStartIndex := list.Tail.Offset + list.Tail.Size - offset
- glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):]))
- interval.Data = interval.Data[dataStartIndex:]
- interval.Size -= dataStartIndex
- interval.Offset = offset + dataStartIndex
- glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
- list.addNodeToTail(interval)
- prevList = list
- break
- }
- glog.V(4).Infof("overlapped! interval is [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
- hasOverlap = true
- return
+
+ // the new interval overwrites the old tail
+ dataStartIndex := list.Tail.Offset + list.Tail.Size - offset
+ glog.V(4).Infof("overlap data new [0,%d) same=%v", dataStartIndex, bytes.Compare(interval.Data[0:dataStartIndex], list.Tail.Data[len(list.Tail.Data)-int(dataStartIndex):]))
+ list.Tail.Data = list.Tail.Data[:len(list.Tail.Data)-int(dataStartIndex)]
+ list.Tail.Size -= dataStartIndex
+ glog.V(4).Infof("overlapping append as [%d,%d) dataSize=%d", interval.Offset, interval.Offset+interval.Size, len(interval.Data))
+
+ list.addNodeToTail(interval)
+ prevList = list
+ break
}
}
diff --git a/weed/filesys/dirty_page_interval_test.go b/weed/filesys/dirty_page_interval_test.go
new file mode 100644
index 000000000..4f62f90c9
--- /dev/null
+++ b/weed/filesys/dirty_page_interval_test.go
@@ -0,0 +1,40 @@
+package filesys
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestContinuousIntervals_AddInterval(t *testing.T) {
+
+ c := &ContinuousIntervals{}
+
+ // 25, 25, 25
+ c.AddInterval(getBytes(25, 3), 0)
+ // _, _, 23, 23, 23, 23
+ c.AddInterval(getBytes(23, 4), 2)
+
+ expectedData(t, c, 0, 25, 25, 23, 23, 23, 23)
+}
+
+func expectedData(t *testing.T, c *ContinuousIntervals, offset int, data ...byte) {
+ start, stop := int64(offset), int64(offset+len(data))
+ for _, list := range c.lists {
+ nodeStart, nodeStop := max(start, list.Head.Offset), min(stop, list.Head.Offset+list.Size())
+ if nodeStart < nodeStop {
+ buf := make([]byte, nodeStop-nodeStart)
+ list.ReadData(buf, nodeStart, nodeStop)
+ if bytes.Compare(buf, data[nodeStart-start:nodeStop-start]) != 0 {
+ t.Errorf("expected %v actual %v", data[nodeStart-start:nodeStop-start], buf)
+ }
+ }
+ }
+}
+
+func getBytes(content byte, length int) []byte {
+ data := make([]byte, length)
+ for i := 0; i < length; i++ {
+ data[i] = content
+ }
+ return data
+}