diff options
Diffstat (limited to 'weed/storage')
| -rw-r--r-- | weed/storage/erasure_coding/ec_encoder.go | 128 | ||||
| -rw-r--r-- | weed/storage/erasure_coding/ec_test.go | 106 |
2 files changed, 123 insertions, 111 deletions
diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go index 4b5205dee..f4aa17e9d 100644 --- a/weed/storage/erasure_coding/ec_encoder.go +++ b/weed/storage/erasure_coding/ec_encoder.go @@ -6,6 +6,9 @@ import ( "os" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/klauspost/reedsolomon" ) @@ -16,11 +19,66 @@ const ( ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB ) +// WriteSortedEcxFile generates .ecx file from existing .idx file +// all keys are sorted in ascending order +func WriteSortedEcxFile(baseFileName string) (e error) { + + cm, err := readCompactMap(baseFileName) + if err != nil { + return fmt.Errorf("readCompactMap: %v", err) + } + + ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer ecxFile.Close() + + err = cm.AscendingVisit(func(value needle_map.NeedleValue) error { + bytes := value.ToBytes() + _, writeErr := ecxFile.Write(bytes) + return writeErr + }) + + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + + return nil +} + +// WriteEcFiles generates .ec01 ~ .ec14 files +func WriteEcFiles(baseFileName string) error { + return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize) +} + +func ToExt(ecIndex int) string { + return fmt.Sprintf(".ec%02d", ecIndex) +} + +func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error { + file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer file.Close() + + fi, err := file.Stat() + if err != nil { + return fmt.Errorf("failed to stat dat file: %v", err) + } + err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize) + if err != nil { + return fmt.Errorf("encodeDatFile: %v", err) + } + return nil +} + func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error { bufferSize := int64(len(buffers[0])) - batchCount := blockSize/bufferSize - if blockSize%bufferSize!=0 { + batchCount := blockSize / bufferSize + if blockSize%bufferSize != 0 { glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize) } @@ -34,10 +92,10 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i return nil } -func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error){ - for i := 0; i< DataShardsCount+ParityShardsCount; i++{ - fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1) - openOption := os.O_TRUNC|os.O_CREATE|os.O_WRONLY +func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) { + for i := 0; i < DataShardsCount+ParityShardsCount; i++ { + fname := baseFileName + ToExt(i+1) + openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY if forRead { openOption = os.O_RDONLY } @@ -50,15 +108,14 @@ func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error return } -func closeEcFiles(files []*os.File){ - for _, f := range files{ +func closeEcFiles(files []*os.File) { + for _, f := range files { if f != nil { f.Close() } } } - func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error { // read data into buffers @@ -90,3 +147,56 @@ func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blo return nil } + +func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error { + var processedSize int64 + enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) + if err != nil { + return fmt.Errorf("failed to create encoder: %v", err) + } + buffers := make([][]byte, DataShardsCount+ParityShardsCount) + outputs, err := openEcFiles(baseFileName, false) + defer closeEcFiles(outputs) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + for i, _ := range buffers { + buffers[i] = make([]byte, bufferSize) + } + for remainingSize > largeBlockSize*DataShardsCount { + err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs) + if err != nil { + return fmt.Errorf("failed to encode large chunk data: %v", err) + } + remainingSize -= largeBlockSize * DataShardsCount + processedSize += largeBlockSize * DataShardsCount + } + for remainingSize > 0 { + encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs) + if err != nil { + return fmt.Errorf("failed to encode small chunk data: %v", err) + } + remainingSize -= smallBlockSize * DataShardsCount + processedSize += smallBlockSize * DataShardsCount + } + return nil +} + +func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) { + indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644) + if err != nil { + return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err) + } + defer indexFile.Close() + + cm := needle_map.NewCompactMap() + err = storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { + if !offset.IsZero() && size != types.TombstoneFileSize { + cm.Set(key, offset, size) + } else { + cm.Delete(key) + } + return nil + }) + return cm, err +} diff --git a/weed/storage/erasure_coding/ec_test.go b/weed/storage/erasure_coding/ec_test.go index d631471e9..625f4e9a6 100644 --- a/weed/storage/erasure_coding/ec_test.go +++ b/weed/storage/erasure_coding/ec_test.go @@ -7,7 +7,6 @@ import ( "os" "testing" - "github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/storage/needle_map" "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/klauspost/reedsolomon" @@ -27,98 +26,20 @@ func TestEncodingDecoding(t *testing.T) { t.Logf("generateEcFiles: %v", err) } - err = writeSortedEcxFiles(baseFileName) + err = WriteSortedEcxFile(baseFileName) if err != nil { - t.Logf("writeSortedEcxFiles: %v", err) + t.Logf("WriteSortedEcxFile: %v", err) } err = validateFiles(baseFileName) if err != nil { - t.Logf("writeSortedEcxFiles: %v", err) + t.Logf("WriteSortedEcxFile: %v", err) } removeGeneratedFiles(baseFileName) } -func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error { - file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) - if err != nil { - return fmt.Errorf("failed to open dat file: %v", err) - } - defer file.Close() - - fi, err := file.Stat() - if err != nil { - return fmt.Errorf("failed to stat dat file: %v", err) - } - err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize) - if err != nil { - return fmt.Errorf("encodeDatFile: %v", err) - } - return nil -} - -func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error { - var processedSize int64 - enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) - if err != nil { - return fmt.Errorf("failed to create encoder: %v", err) - } - buffers := make([][]byte, DataShardsCount+ParityShardsCount) - outputs, err := openEcFiles(baseFileName, false) - defer closeEcFiles(outputs) - if err != nil { - return fmt.Errorf("failed to open dat file: %v", err) - } - for i, _ := range buffers { - buffers[i] = make([]byte, bufferSize) - } - for remainingSize > largeBlockSize*DataShardsCount { - err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs) - if err != nil { - return fmt.Errorf("failed to encode large chunk data: %v", err) - } - remainingSize -= largeBlockSize * DataShardsCount - processedSize += largeBlockSize * DataShardsCount - } - for remainingSize > 0 { - encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs) - if err != nil { - return fmt.Errorf("failed to encode small chunk data: %v", err) - } - remainingSize -= smallBlockSize * DataShardsCount - processedSize += smallBlockSize * DataShardsCount - } - return nil -} - -func writeSortedEcxFiles(baseFileName string) (e error) { - - cm, err := readCompactMap(baseFileName) - if err != nil { - return fmt.Errorf("readCompactMap: %v", err) - } - - ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return fmt.Errorf("failed to open dat file: %v", err) - } - defer ecxFile.Close() - - err = cm.AscendingVisit(func(value needle_map.NeedleValue) error { - bytes := value.ToBytes() - _, writeErr := ecxFile.Write(bytes) - return writeErr - }) - - if err != nil { - return fmt.Errorf("failed to open dat file: %v", err) - } - - return nil -} - func validateFiles(baseFileName string) error { cm, err := readCompactMap(baseFileName) if err != nil { @@ -148,25 +69,6 @@ func validateFiles(baseFileName string) error { return nil } -func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) { - indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644) - if err != nil { - return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err) - } - defer indexFile.Close() - - cm := needle_map.NewCompactMap() - err = storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { - if !offset.IsZero() && size != types.TombstoneFileSize { - cm.Set(key, offset, size) - } else { - cm.Delete(key) - } - return nil - }) - return cm, err -} - func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error { data, err := readDatFile(datFile, offset, size) @@ -288,7 +190,7 @@ func removeGeneratedFiles(baseFileName string) { fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1) os.Remove(fname) } - os.Remove(baseFileName+".ecx") + os.Remove(baseFileName + ".ecx") } func TestLocateData(t *testing.T) { |
