aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchrislu <chris.lu@gmail.com>2022-02-07 03:46:28 -0800
committerchrislu <chris.lu@gmail.com>2022-02-07 03:46:28 -0800
commit9405eaefdbd8ee94dac9bd12e324e81afaa474f5 (patch)
tree5f5f90356071ca8024ee91e3d8c3a9674cbc511d
parent433fde4b186a8244aabeb8659bb901a89b56e213 (diff)
downloadseaweedfs-9405eaefdbd8ee94dac9bd12e324e81afaa474f5.tar.xz
seaweedfs-9405eaefdbd8ee94dac9bd12e324e81afaa474f5.zip
filer.sync: fix replicating partially updated file
Run two servers with volumes and fillers: server -dir=Server1alpha -master.port=11000 -filer -filer.port=11001 -volume.port=11002 server -dir=Server1sigma -master.port=11006 -filer -filer.port=11007 -volume.port=11008 Run Active-Passive filler.sync: filer.sync -a localhost:11007 -b localhost:11001 -isActivePassive Upload file to 11007 port: curl -F file=@/Desktop/9.xml "http://localhost:11007/testFacebook/" If we request a file on two servers now, everything will be correct, even if we add data to the file and upload it again: curl "http://localhost:11007/testFacebook/9.xml" EQUALS curl "http://localhost:11001/testFacebook/9.xml" However, if we change the already existing data in the file (for example, we change the first line in the file, reducing its length), then this file on the second server will not be valid and will not be equivalent to the first file Снимок экрана 2022-02-07 в 14 21 11 This problem occurs on line 202 in the filer_sink.go file. In particular, this is due to incorrect mapping of chunk names in the DoMinusChunks function. The names of deletedChunks do not match the chunks of existingEntry.Chunks, since the first chunks come from another server and have a different addressing (name) compared to the addressing on the server where the file is being overwritten. Deleted chunks are not actually deleted on the server to which the file is replicated.
-rw-r--r--weed/filer/filechunks.go15
-rw-r--r--weed/replication/sink/filersink/filer_sink.go2
2 files changed, 16 insertions, 1 deletions
diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go
index be18d45ac..d18d06f2c 100644
--- a/weed/filer/filechunks.go
+++ b/weed/filer/filechunks.go
@@ -101,6 +101,21 @@ func DoMinusChunks(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) {
return
}
+func DoMinusChunksBySourceFileId(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) {
+
+ fileIds := make(map[string]bool)
+ for _, interval := range bs {
+ fileIds[interval.GetFileIdString()] = true
+ }
+ for _, chunk := range as {
+ if _, found := fileIds[chunk.GetSourceFileId()]; !found {
+ delta = append(delta, chunk)
+ }
+ }
+
+ return
+}
+
type ChunkView struct {
FileId string
Offset int64
diff --git a/weed/replication/sink/filersink/filer_sink.go b/weed/replication/sink/filersink/filer_sink.go
index c48ab2368..345c7f13b 100644
--- a/weed/replication/sink/filersink/filer_sink.go
+++ b/weed/replication/sink/filersink/filer_sink.go
@@ -199,7 +199,7 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParent
// delete the chunks that are deleted from the source
if deleteIncludeChunks {
// remove the deleted chunks. Actual data deletion happens in filer UpdateEntry FindUnusedFileChunks
- existingEntry.Chunks = filer.DoMinusChunks(existingEntry.Chunks, deletedChunks)
+ existingEntry.Chunks = filer.DoMinusChunksBySourceFileId(existingEntry.Chunks, deletedChunks)
}
// replicate the chunks that are new in the source