diff options
45 files changed, 1844 insertions, 150 deletions
@@ -122,6 +122,7 @@ SeaweedFS can work as a distributed [Key-Large-Value store][KeyLargeValueStore]. * Support ETag, Accept-Range, Last-Modified, etc. * Support in-memory/leveldb/readonly mode tuning for memory/performance balance. * Support rebalancing the writable and readonly volumes. +* [Customizable Multiple Storage Tiers][TieredStorage]: Customizable storage disk types to balance performance and cost. * [Transparent cloud integration][CloudTier]: unlimited capacity via tiered cloud storage for warm data. * [Erasure Coding for warm storage][ErasureCoding] Rack-Aware 10.4 erasure coding reduces storage cost and increases availability. @@ -152,6 +153,7 @@ SeaweedFS can work as a distributed [Key-Large-Value store][KeyLargeValueStore]. [Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System [WebDAV]: https://github.com/chrislusf/seaweedfs/wiki/WebDAV [ErasureCoding]: https://github.com/chrislusf/seaweedfs/wiki/Erasure-coding-for-warm-storage +[TieredStorage]: https://github.com/chrislusf/seaweedfs/wiki/Tiered-Storage [CloudTier]: https://github.com/chrislusf/seaweedfs/wiki/Cloud-Tier [FilerDataEncryption]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Data-Encryption [FilerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Stores diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 5a858d993..a5a240575 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -60,9 +60,9 @@ case "$1" in 'cronjob') MASTER=${WEED_MASTER-localhost:9333} FIX_REPLICATION_CRON_SCHEDULE=${CRON_SCHEDULE-*/7 * * * * *} - echo "$FIX_REPLICATION_CRON_SCHEDULE" 'echo "volume.fix.replication" | weed shell -master='$MASTER > /crontab + echo "$FIX_REPLICATION_CRON_SCHEDULE" 'echo "lock; volume.fix.replication; unlock" | weed shell -master='$MASTER > /crontab BALANCING_CRON_SCHEDULE=${CRON_SCHEDULE-25 * * * * *} - echo "$BALANCING_CRON_SCHEDULE" 'echo "volume.balance -c ALL -force" | weed shell -master='$MASTER >> /crontab + echo "$BALANCING_CRON_SCHEDULE" 'echo "lock; volume.balance -collection ALL_COLLECTIONS -force; unlock" | weed shell -master='$MASTER >> /crontab echo "Running Crontab:" cat /crontab exec supercronic /crontab @@ -48,7 +48,7 @@ require ( github.com/klauspost/crc32 v1.2.0 github.com/klauspost/reedsolomon v1.9.2 github.com/kurin/blazer v0.5.3 - github.com/lib/pq v1.2.0 + github.com/lib/pq v1.10.0 github.com/lunixbochs/vtclean v1.0.0 // indirect github.com/magiconair/properties v1.8.1 // indirect github.com/mattn/go-colorable v0.1.2 // indirect @@ -496,6 +496,8 @@ github.com/kurin/blazer v0.5.3/go.mod h1:4FCXMUWo9DllR2Do4TtBd377ezyAJ51vB5uTBjt github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E= +github.com/lib/pq v1.10.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= github.com/lunixbochs/vtclean v1.0.0 h1:xu2sLAri4lGiovBDQKxl5mrXyESr3gUr5m5SM5+LVb8= diff --git a/k8s/seaweedfs/Chart.yaml b/k8s/seaweedfs/Chart.yaml index 5be2e0f9d..c19a964d3 100644 --- a/k8s/seaweedfs/Chart.yaml +++ b/k8s/seaweedfs/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 description: SeaweedFS name: seaweedfs -appVersion: "2.29" -version: 2.29 +appVersion: "2.31" +version: 2.31 diff --git a/k8s/seaweedfs/values.yaml b/k8s/seaweedfs/values.yaml index 0d8459bc1..a5813b8b3 100644 --- a/k8s/seaweedfs/values.yaml +++ b/k8s/seaweedfs/values.yaml @@ -4,7 +4,7 @@ global: registry: "" repository: "" imageName: chrislusf/seaweedfs - # imageTag: "2.29" - started using {.Chart.appVersion} + # imageTag: "2.31" - started using {.Chart.appVersion} imagePullPolicy: IfNotPresent imagePullSecrets: imagepullsecret restartPolicy: Always diff --git a/note/SeaweedFS_Architecture.png b/note/SeaweedFS_Architecture.png Binary files differnew file mode 100644 index 000000000..de27a5f53 --- /dev/null +++ b/note/SeaweedFS_Architecture.png diff --git a/weed/command/benchmark.go b/weed/command/benchmark.go index c1bc80c42..af0793c70 100644 --- a/weed/command/benchmark.go +++ b/weed/command/benchmark.go @@ -41,6 +41,7 @@ type BenchmarkOptions struct { grpcDialOption grpc.DialOption masterClient *wdclient.MasterClient fsync *bool + useTcp *bool } var ( @@ -67,6 +68,7 @@ func init() { b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "cpu profile output file") b.maxCpu = cmdBenchmark.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") b.fsync = cmdBenchmark.Flag.Bool("fsync", false, "flush data to disk after write") + b.useTcp = cmdBenchmark.Flag.Bool("useTcp", false, "send data via tcp") sharedBytes = make([]byte, 1024) } @@ -223,6 +225,8 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { random := rand.New(rand.NewSource(time.Now().UnixNano())) + volumeTcpClient := wdclient.NewVolumeTcpClient() + for id := range idChan { start := time.Now() fileSize := int64(*b.fileSize + random.Intn(64)) @@ -243,7 +247,15 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { if !isSecure && assignResult.Auth != "" { isSecure = true } - if _, err := fp.Upload(0, b.masterClient.GetMaster, false, assignResult.Auth, b.grpcDialOption); err == nil { + if *b.useTcp { + if uploadByTcp(volumeTcpClient, fp) { + fileIdLineChan <- fp.Fid + s.completed++ + s.transferred += fileSize + } else { + s.failed++ + } + } else if _, err := fp.Upload(0, b.masterClient.GetMaster, false, assignResult.Auth, b.grpcDialOption); err == nil { if random.Intn(100) < *b.deletePercentage { s.total++ delayedDeleteChan <- &delayedFile{time.Now().Add(time.Second), fp} @@ -329,6 +341,17 @@ func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan b } } +func uploadByTcp(volumeTcpClient *wdclient.VolumeTcpClient, fp *operation.FilePart) bool { + + err := volumeTcpClient.PutFileChunk(fp.Server, fp.Fid, uint32(fp.FileSize), fp.Reader) + if err != nil { + glog.Errorf("upload chunk err: %v", err) + return false + } + + return true +} + func readFileIds(fileName string, fileIdLineChan chan string) { file, err := os.Open(fileName) // For read access. if err != nil { diff --git a/weed/command/server.go b/weed/command/server.go index 64321b4d7..a39802412 100644 --- a/weed/command/server.go +++ b/weed/command/server.go @@ -111,6 +111,7 @@ func init() { serverOptions.v.preStopSeconds = cmdServer.Flag.Int("volume.preStopSeconds", 10, "number of seconds between stop send heartbeats and stop volume server") serverOptions.v.pprof = cmdServer.Flag.Bool("volume.pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile") serverOptions.v.idxFolder = cmdServer.Flag.String("volume.dir.idx", "", "directory to store .idx files") + serverOptions.v.enableTcp = cmdServer.Flag.Bool("volume.tcp", false, "<exprimental> enable tcp port") s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port") s3Options.domainName = cmdServer.Flag.String("s3.domainName", "", "suffix of the host name in comma separated list, {bucket}.{domainName}") diff --git a/weed/command/volume.go b/weed/command/volume.go index cf162a732..f49ece9dc 100644 --- a/weed/command/volume.go +++ b/weed/command/volume.go @@ -62,6 +62,7 @@ type VolumeServerOptions struct { preStopSeconds *int metricsHttpPort *int // pulseSeconds *int + enableTcp *bool } func init() { @@ -88,6 +89,7 @@ func init() { v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile") v.metricsHttpPort = cmdVolume.Flag.Int("metricsPort", 0, "Prometheus metrics listen port") v.idxFolder = cmdVolume.Flag.String("dir.idx", "", "directory to store .idx files") + v.enableTcp = cmdVolume.Flag.Bool("tcp", false, "<exprimental> enable tcp port") } var cmdVolume = &Command{ @@ -252,7 +254,9 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v } // starting tcp server - go v.startTcpService(volumeServer) + if *v.enableTcp { + go v.startTcpService(volumeServer) + } // starting the cluster http server clusterHttpServer := v.startClusterHttpService(volumeMux) diff --git a/weed/filer/filer_on_meta_event.go b/weed/filer/filer_on_meta_event.go index 295a5039e..b29324b61 100644 --- a/weed/filer/filer_on_meta_event.go +++ b/weed/filer/filer_on_meta_event.go @@ -11,6 +11,10 @@ import ( // onMetadataChangeEvent is triggered after filer processed change events from local or remote filers func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse) { + f.maybeReloadFilerConfiguration(event) +} + +func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) { if DirectoryEtcSeaweedFS != event.Directory { if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath { return @@ -26,7 +30,6 @@ func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse) if entry.Name == FilerConfName { f.reloadFilerConfiguration(entry) } - } func (f *Filer) readEntry(chunks []*filer_pb.FileChunk) ([]byte, error) { diff --git a/weed/filer/meta_aggregator.go b/weed/filer/meta_aggregator.go index 9437e9992..5c368a57e 100644 --- a/weed/filer/meta_aggregator.go +++ b/weed/filer/meta_aggregator.go @@ -69,6 +69,7 @@ func (ma *MetaAggregator) subscribeToOneFiler(f *Filer, self string, peer string peerSignature, err = ma.readFilerStoreSignature(peer) } + // when filer store is not shared by multiple filers if peerSignature != f.Signature { if prevTsNs, err := ma.readOffset(f, peer, peerSignature); err == nil { lastTsNs = prevTsNs diff --git a/weed/filesys/dir_link.go b/weed/filesys/dir_link.go index 29391faeb..606e52fcb 100644 --- a/weed/filesys/dir_link.go +++ b/weed/filesys/dir_link.go @@ -35,15 +35,20 @@ func (dir *Dir) Link(ctx context.Context, req *fuse.LinkRequest, old fs.Node) (f return nil, err } + oldEntry := oldFile.getEntry() + if oldEntry == nil { + return nil, fuse.EIO + } + // update old file to hardlink mode - if len(oldFile.entry.HardLinkId) == 0 { - oldFile.entry.HardLinkId = append(util.RandomBytes(16), HARD_LINK_MARKER) - oldFile.entry.HardLinkCounter = 1 + if len(oldEntry.HardLinkId) == 0 { + oldEntry.HardLinkId = append(util.RandomBytes(16), HARD_LINK_MARKER) + oldEntry.HardLinkCounter = 1 } - oldFile.entry.HardLinkCounter++ + oldEntry.HardLinkCounter++ updateOldEntryRequest := &filer_pb.UpdateEntryRequest{ Directory: oldFile.dir.FullPath(), - Entry: oldFile.entry, + Entry: oldEntry, Signatures: []int32{dir.wfs.signature}, } @@ -53,11 +58,11 @@ func (dir *Dir) Link(ctx context.Context, req *fuse.LinkRequest, old fs.Node) (f Entry: &filer_pb.Entry{ Name: req.NewName, IsDirectory: false, - Attributes: oldFile.entry.Attributes, - Chunks: oldFile.entry.Chunks, - Extended: oldFile.entry.Extended, - HardLinkId: oldFile.entry.HardLinkId, - HardLinkCounter: oldFile.entry.HardLinkCounter, + Attributes: oldEntry.Attributes, + Chunks: oldEntry.Chunks, + Extended: oldEntry.Extended, + HardLinkId: oldEntry.HardLinkId, + HardLinkCounter: oldEntry.HardLinkCounter, }, Signatures: []int32{dir.wfs.signature}, } diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go index f05a3a56a..8888cff96 100644 --- a/weed/filesys/dirty_page.go +++ b/weed/filesys/dirty_page.go @@ -30,7 +30,7 @@ func newDirtyPages(file *File) *ContinuousDirtyPages { func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) { - glog.V(4).Infof("%s AddPage [%d,%d) of %d bytes", pages.f.fullpath(), offset, offset+int64(len(data)), pages.f.entry.Attributes.FileSize) + glog.V(4).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data))) if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) { // this is more than what buffer can hold. @@ -69,7 +69,12 @@ func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage() (hasSavedD return false } - fileSize := int64(pages.f.entry.Attributes.FileSize) + entry := pages.f.getEntry() + if entry == nil { + return false + } + + fileSize := int64(entry.Attributes.FileSize) chunkSize := min(maxList.Size(), fileSize-maxList.Offset()) if chunkSize == 0 { diff --git a/weed/filesys/file.go b/weed/filesys/file.go index bd722b31f..5931dd2ff 100644 --- a/weed/filesys/file.go +++ b/weed/filesys/file.go @@ -5,6 +5,7 @@ import ( "io" "os" "sort" + "sync" "time" "github.com/seaweedfs/fuse" @@ -33,6 +34,7 @@ type File struct { dir *Dir wfs *WFS entry *filer_pb.Entry + entryLock sync.RWMutex entryViewCache []filer.VisibleInterval isOpen int reader io.ReaderAt @@ -47,7 +49,7 @@ func (file *File) Attr(ctx context.Context, attr *fuse.Attr) (err error) { glog.V(4).Infof("file Attr %s, open:%v existing:%v", file.fullpath(), file.isOpen, attr) - entry := file.entry + entry := file.getEntry() if file.isOpen <= 0 || entry == nil { if entry, err = file.maybeLoadEntry(ctx); err != nil { return err @@ -258,7 +260,7 @@ func (file *File) Forget() { } func (file *File) maybeLoadEntry(ctx context.Context) (entry *filer_pb.Entry, err error) { - entry = file.entry + entry = file.getEntry() if file.isOpen > 0 { return entry, nil } @@ -299,8 +301,13 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) { } } + entry := file.getEntry() + if entry == nil { + return + } + // pick out-of-order chunks from existing chunks - for _, chunk := range file.entry.Chunks { + for _, chunk := range entry.Chunks { if lessThan(earliestChunk, chunk) { chunks = append(chunks, chunk) } @@ -318,18 +325,22 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) { file.reader = nil - glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks)) + glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(entry.Chunks), len(chunks)) - file.entry.Chunks = append(file.entry.Chunks, newChunks...) + entry.Chunks = append(entry.Chunks, newChunks...) } func (file *File) setEntry(entry *filer_pb.Entry) { + file.entryLock.Lock() + defer file.entryLock.Unlock() file.entry = entry file.entryViewCache, _ = filer.NonOverlappingVisibleIntervals(file.wfs.LookupFn(), entry.Chunks) file.reader = nil } func (file *File) clearEntry() { + file.entryLock.Lock() + defer file.entryLock.Unlock() file.entry = nil file.entryViewCache = nil file.reader = nil @@ -359,3 +370,9 @@ func (file *File) saveEntry(entry *filer_pb.Entry) error { return nil }) } + +func (file *File) getEntry() *filer_pb.Entry { + file.entryLock.RLock() + defer file.entryLock.RUnlock() + return file.entry +} diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go index fb073c9cd..adec1cd70 100644 --- a/weed/filesys/filehandle.go +++ b/weed/filesys/filehandle.go @@ -40,8 +40,9 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle { Uid: uid, Gid: gid, } - if fh.f.entry != nil { - fh.f.entry.Attributes.FileSize = filer.FileSize(fh.f.entry) + entry := fh.f.getEntry() + if entry != nil { + entry.Attributes.FileSize = filer.FileSize(entry) } return fh @@ -104,22 +105,27 @@ func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (maxSto func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) { - fileSize := int64(filer.FileSize(fh.f.entry)) + entry := fh.f.getEntry() + if entry == nil { + return 0, io.EOF + } + + fileSize := int64(filer.FileSize(entry)) if fileSize == 0 { glog.V(1).Infof("empty fh %v", fh.f.fullpath()) return 0, io.EOF } - if offset+int64(len(buff)) <= int64(len(fh.f.entry.Content)) { - totalRead := copy(buff, fh.f.entry.Content[offset:]) + if offset+int64(len(buff)) <= int64(len(entry.Content)) { + totalRead := copy(buff, entry.Content[offset:]) glog.V(4).Infof("file handle read cached %s [%d,%d] %d", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead) return int64(totalRead), nil } var chunkResolveErr error if fh.f.entryViewCache == nil { - fh.f.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), fh.f.entry.Chunks) + fh.f.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), entry.Chunks) if chunkResolveErr != nil { return 0, fmt.Errorf("fail to resolve chunk manifest: %v", chunkResolveErr) } @@ -158,8 +164,13 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f copy(data, req.Data) } - fh.f.entry.Content = nil - fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(fh.f.entry.Attributes.FileSize))) + entry := fh.f.getEntry() + if entry == nil { + return fuse.EIO + } + + entry.Content = nil + entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(entry.Attributes.FileSize))) glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data)) fh.dirtyPages.AddPage(req.Offset, data) @@ -242,35 +253,40 @@ func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error { err := fh.f.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { - if fh.f.entry.Attributes != nil { - fh.f.entry.Attributes.Mime = fh.contentType - if fh.f.entry.Attributes.Uid == 0 { - fh.f.entry.Attributes.Uid = header.Uid + entry := fh.f.getEntry() + if entry == nil { + return nil + } + + if entry.Attributes != nil { + entry.Attributes.Mime = fh.contentType + if entry.Attributes.Uid == 0 { + entry.Attributes.Uid = header.Uid } - if fh.f.entry.Attributes.Gid == 0 { - fh.f.entry.Attributes.Gid = header.Gid + if entry.Attributes.Gid == 0 { + entry.Attributes.Gid = header.Gid } - if fh.f.entry.Attributes.Crtime == 0 { - fh.f.entry.Attributes.Crtime = time.Now().Unix() + if entry.Attributes.Crtime == 0 { + entry.Attributes.Crtime = time.Now().Unix() } - fh.f.entry.Attributes.Mtime = time.Now().Unix() - fh.f.entry.Attributes.FileMode = uint32(os.FileMode(fh.f.entry.Attributes.FileMode) &^ fh.f.wfs.option.Umask) - fh.f.entry.Attributes.Collection = fh.dirtyPages.collection - fh.f.entry.Attributes.Replication = fh.dirtyPages.replication + entry.Attributes.Mtime = time.Now().Unix() + entry.Attributes.FileMode = uint32(os.FileMode(entry.Attributes.FileMode) &^ fh.f.wfs.option.Umask) + entry.Attributes.Collection = fh.dirtyPages.collection + entry.Attributes.Replication = fh.dirtyPages.replication } request := &filer_pb.CreateEntryRequest{ Directory: fh.f.dir.FullPath(), - Entry: fh.f.entry, + Entry: entry, Signatures: []int32{fh.f.wfs.signature}, } - glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks)) - for i, chunk := range fh.f.entry.Chunks { + glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(entry.Chunks)) + for i, chunk := range entry.Chunks { glog.V(4).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size)) } - manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(fh.f.entry.Chunks) + manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(entry.Chunks) chunks, _ := filer.CompactFileChunks(fh.f.wfs.LookupFn(), nonManifestChunks) chunks, manifestErr := filer.MaybeManifestize(fh.f.wfs.saveDataAsChunk(fh.f.fullpath()), chunks) @@ -278,7 +294,7 @@ func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error { // not good, but should be ok glog.V(0).Infof("MaybeManifestize: %v", manifestErr) } - fh.f.entry.Chunks = append(chunks, manifestChunks...) + entry.Chunks = append(chunks, manifestChunks...) fh.f.wfs.mapPbIdFromLocalToFiler(request.Entry) defer fh.f.wfs.mapPbIdFromFilerToLocal(request.Entry) diff --git a/weed/filesys/fscache.go b/weed/filesys/fscache.go index fdec8253c..6b1012090 100644 --- a/weed/filesys/fscache.go +++ b/weed/filesys/fscache.go @@ -124,8 +124,9 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode { } if f, ok := src.node.(*File); ok { f.Name = target.name - if f.entry != nil { - f.entry.Name = f.Name + entry := f.getEntry() + if entry != nil { + entry.Name = f.Name } } parent.disconnectChild(target) diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go index 70428bb07..9957a04cd 100644 --- a/weed/operation/upload_content.go +++ b/weed/operation/upload_content.go @@ -31,6 +31,7 @@ type UploadResult struct { Mime string `json:"mime,omitempty"` Gzip uint32 `json:"gzip,omitempty"` ContentMd5 string `json:"contentMd5,omitempty"` + RetryCount int `json:"-"` } func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *filer_pb.FileChunk { @@ -96,6 +97,7 @@ func retriedUploadData(uploadUrl string, filename string, cipher bool, data []by for i := 0; i < 3; i++ { uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt) if err == nil { + uploadResult.RetryCount = i return } else { glog.Warningf("uploading to %s: %v", uploadUrl, err) diff --git a/weed/pb/grpc_client_server.go b/weed/pb/grpc_client_server.go index c7a47a91c..9efcd9bdc 100644 --- a/weed/pb/grpc_client_server.go +++ b/weed/pb/grpc_client_server.go @@ -109,15 +109,19 @@ func WithCachedGrpcClient(fn func(*grpc.ClientConn) error, address string, opts } func ParseServerToGrpcAddress(server string) (serverGrpcAddress string, err error) { + return ParseServerAddress(server, 10000) +} + +func ParseServerAddress(server string, deltaPort int) (newServerAddress string, err error) { host, port, parseErr := hostAndPort(server) if parseErr != nil { return "", fmt.Errorf("server port parse error: %v", parseErr) } - grpcPort := int(port) + 10000 + newPort := int(port) + deltaPort - return fmt.Sprintf("%s:%d", host, grpcPort), nil + return fmt.Sprintf("%s:%d", host, newPort), nil } func hostAndPort(address string) (host string, port uint64, err error) { diff --git a/weed/pb/volume_info.go b/weed/pb/volume_info.go index c4f733f5c..cae9e018f 100644 --- a/weed/pb/volume_info.go +++ b/weed/pb/volume_info.go @@ -15,40 +15,49 @@ import ( ) // MaybeLoadVolumeInfo load the file data as *volume_server_pb.VolumeInfo, the returned volumeInfo will not be nil -func MaybeLoadVolumeInfo(fileName string) (*volume_server_pb.VolumeInfo, bool, error) { +func MaybeLoadVolumeInfo(fileName string) (volumeInfo *volume_server_pb.VolumeInfo, hasRemoteFile bool, hasVolumeInfoFile bool, err error) { - volumeInfo := &volume_server_pb.VolumeInfo{} + volumeInfo = &volume_server_pb.VolumeInfo{} glog.V(1).Infof("maybeLoadVolumeInfo checks %s", fileName) if exists, canRead, _, _, _ := util.CheckFile(fileName); !exists || !canRead { if !exists { - return volumeInfo, false, nil + return } + hasVolumeInfoFile = true if !canRead { glog.Warningf("can not read %s", fileName) - return volumeInfo, false, fmt.Errorf("can not read %s", fileName) + err = fmt.Errorf("can not read %s", fileName) + return } - return volumeInfo, false, nil + return } + hasVolumeInfoFile = true + glog.V(1).Infof("maybeLoadVolumeInfo reads %s", fileName) tierData, readErr := ioutil.ReadFile(fileName) if readErr != nil { glog.Warningf("fail to read %s : %v", fileName, readErr) - return volumeInfo, false, fmt.Errorf("fail to read %s : %v", fileName, readErr) + err = fmt.Errorf("fail to read %s : %v", fileName, readErr) + return + } glog.V(1).Infof("maybeLoadVolumeInfo Unmarshal volume info %v", fileName) - if err := jsonpb.Unmarshal(bytes.NewReader(tierData), volumeInfo); err != nil { + if err = jsonpb.Unmarshal(bytes.NewReader(tierData), volumeInfo); err != nil { glog.Warningf("unmarshal error: %v", err) - return volumeInfo, false, fmt.Errorf("unmarshal error: %v", err) + err = fmt.Errorf("unmarshal error: %v", err) + return } if len(volumeInfo.GetFiles()) == 0 { - return volumeInfo, false, nil + return } - return volumeInfo, true, nil + hasRemoteFile = true + + return } func SaveVolumeInfo(fileName string, volumeInfo *volume_server_pb.VolumeInfo) error { diff --git a/weed/s3api/s3api_object_handlers.go b/weed/s3api/s3api_object_handlers.go index 55199d4eb..19d85c495 100644 --- a/weed/s3api/s3api_object_handlers.go +++ b/weed/s3api/s3api_object_handlers.go @@ -185,7 +185,7 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h for _, object := range deleteObjects.Objects { lastSeparator := strings.LastIndex(object.ObjectName, "/") - parentDirectoryPath, entryName, isDeleteData, isRecursive := "", object.ObjectName, true, true + parentDirectoryPath, entryName, isDeleteData, isRecursive := "", object.ObjectName, true, false if lastSeparator > 0 && lastSeparator+1 < len(object.ObjectName) { entryName = object.ObjectName[lastSeparator+1:] parentDirectoryPath = "/" + object.ObjectName[:lastSeparator] diff --git a/weed/server/filer_server_handlers_write_autochunk.go b/weed/server/filer_server_handlers_write_autochunk.go index d3ce7e605..318399281 100644 --- a/weed/server/filer_server_handlers_write_autochunk.go +++ b/weed/server/filer_server_handlers_write_autochunk.go @@ -38,10 +38,10 @@ func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r * chunkSize := 1024 * 1024 * maxMB - stats.FilerRequestCounter.WithLabelValues("postAutoChunk").Inc() + stats.FilerRequestCounter.WithLabelValues("chunk").Inc() start := time.Now() defer func() { - stats.FilerRequestHistogram.WithLabelValues("postAutoChunk").Observe(time.Since(start).Seconds()) + stats.FilerRequestHistogram.WithLabelValues("chunk").Observe(time.Since(start).Seconds()) }() var reply *FilerPostResult @@ -302,13 +302,16 @@ func (fs *FilerServer) uploadReaderToChunks(w http.ResponseWriter, r *http.Reque func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error, []byte) { - stats.FilerRequestCounter.WithLabelValues("postAutoChunkUpload").Inc() + stats.FilerRequestCounter.WithLabelValues("chunkUpload").Inc() start := time.Now() defer func() { - stats.FilerRequestHistogram.WithLabelValues("postAutoChunkUpload").Observe(time.Since(start).Seconds()) + stats.FilerRequestHistogram.WithLabelValues("chunkUpload").Observe(time.Since(start).Seconds()) }() uploadResult, err, data := operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth) + if uploadResult != nil && uploadResult.RetryCount > 0 { + stats.FilerRequestCounter.WithLabelValues("chunkUploadRetry").Add(float64(uploadResult.RetryCount)) + } return uploadResult, err, data } diff --git a/weed/server/volume_server_tcp_handlers_write.go b/weed/server/volume_server_tcp_handlers_write.go index 7c2f1a77f..a009611da 100644 --- a/weed/server/volume_server_tcp_handlers_write.go +++ b/weed/server/volume_server_tcp_handlers_write.go @@ -6,6 +6,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" + "io" "net" "strings" ) @@ -21,9 +22,12 @@ func (vs *VolumeServer) HandleTcpConnection(c net.Conn) { for { cmd, err := bufReader.ReadString('\n') if err != nil { - glog.Errorf("read command from %s: %v", c.RemoteAddr().String(), err) + if err != io.EOF { + glog.Errorf("read command from %s: %v", c.RemoteAddr().String(), err) + } return } + cmd = cmd[:len(cmd)-1] switch cmd[0] { case '+': fileId := cmd[1:] diff --git a/weed/storage/erasure_coding/ec_volume.go b/weed/storage/erasure_coding/ec_volume.go index 85d6a5fc8..171db92a4 100644 --- a/weed/storage/erasure_coding/ec_volume.go +++ b/weed/storage/erasure_coding/ec_volume.go @@ -63,7 +63,7 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection // read volume info ev.Version = needle.Version3 - if volumeInfo, found, _ := pb.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found { + if volumeInfo, _, found, _ := pb.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found { ev.Version = needle.Version(volumeInfo.Version) } else { pb.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)}) diff --git a/weed/storage/needle/crc.go b/weed/storage/needle/crc.go index e1bac829a..22456faa2 100644 --- a/weed/storage/needle/crc.go +++ b/weed/storage/needle/crc.go @@ -2,7 +2,6 @@ package needle import ( "fmt" - "hash" "io" "github.com/klauspost/crc32" @@ -35,21 +34,21 @@ func (n *Needle) Etag() string { func NewCRCwriter(w io.Writer) *CRCwriter { return &CRCwriter{ - h: crc32.New(table), + crc: CRC(0), w: w, } } type CRCwriter struct { - h hash.Hash32 - w io.Writer + crc CRC + w io.Writer } func (c *CRCwriter) Write(p []byte) (n int, err error) { n, err = c.w.Write(p) // with each write ... - c.h.Write(p) // ... update the hash + c.crc = c.crc.Update(p) return } -func (c *CRCwriter) Sum() uint32 { return c.h.Sum32() } // final hash +func (c *CRCwriter) Sum() uint32 { return c.crc.Value() } // final hash diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index 9716e9729..31c86d124 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -152,8 +152,10 @@ func (m *LevelDbNeedleMap) Close() { glog.Warningf("close index file %s failed: %v", indexFileName, err) } - if err := m.db.Close(); err != nil { - glog.Warningf("close levelDB failed: %v", err) + if m.db != nil { + if err := m.db.Close(); err != nil { + glog.Warningf("close levelDB failed: %v", err) + } } } diff --git a/weed/storage/needle_map_sorted_file.go b/weed/storage/needle_map_sorted_file.go index 3449ff9dc..662b90531 100644 --- a/weed/storage/needle_map_sorted_file.go +++ b/weed/storage/needle_map_sorted_file.go @@ -94,8 +94,12 @@ func (m *SortedFileNeedleMap) Delete(key NeedleId, offset Offset) error { } func (m *SortedFileNeedleMap) Close() { - m.indexFile.Close() - m.dbFile.Close() + if m.indexFile != nil { + m.indexFile.Close() + } + if m.dbFile != nil { + m.dbFile.Close() + } } func (m *SortedFileNeedleMap) Destroy() error { diff --git a/weed/storage/store.go b/weed/storage/store.go index 47829666a..fb33a708c 100644 --- a/weed/storage/store.go +++ b/weed/storage/store.go @@ -220,20 +220,30 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat { if maxFileKey < curMaxFileKey { maxFileKey = curMaxFileKey } + deleteVolume := false if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) { volumeMessages = append(volumeMessages, volumeMessage) } else { if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) { deleteVids = append(deleteVids, v.Id) + deleteVolume = true } else { glog.V(0).Infof("volume %d is expired", v.Id) } if v.lastIoError != nil { deleteVids = append(deleteVids, v.Id) + deleteVolume = true glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError) } } - collectionVolumeSize[v.Collection] += volumeMessage.Size + + if _, exist := collectionVolumeSize[v.Collection]; !exist { + collectionVolumeSize[v.Collection] = 0 + } + if !deleteVolume { + collectionVolumeSize[v.Collection] += volumeMessage.Size + } + if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist { collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{ "IsReadOnly": 0, @@ -242,7 +252,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat { "isDiskSpaceLow": 0, } } - if v.IsReadOnly() { + if !deleteVolume && v.IsReadOnly() { collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1 if v.noWriteOrDelete { collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1 @@ -267,7 +277,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat { glog.V(0).Infof("volume %d is deleted", vid) } } else { - glog.V(0).Infof("delete volume %d: %v", vid, err) + glog.Warningf("delete volume %d: %v", vid, err) } } location.volumesLock.Unlock() @@ -446,7 +456,7 @@ func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error { // load, modify, save baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name())) vifFile := filepath.Join(location.Directory, baseFileName+".vif") - volumeInfo, _, err := pb.MaybeLoadVolumeInfo(vifFile) + volumeInfo, _, _, err := pb.MaybeLoadVolumeInfo(vifFile) if err != nil { return fmt.Errorf("volume %d fail to load vif", i) } diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index 07376bc88..1853e458a 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -104,47 +104,8 @@ func (v *Volume) syncWrite(n *needle.Needle) (offset uint64, size Size, isUnchan err = fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.nm.ContentSize()) return } - if v.isFileUnchanged(n) { - size = Size(n.DataSize) - isUnchanged = true - return - } - - // check whether existing needle cookie matches - nv, ok := v.nm.Get(n.Id) - if ok { - existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToActualOffset()) - if existingNeedleReadErr != nil { - err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr) - return - } - if existingNeedle.Cookie != n.Cookie { - glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie) - err = fmt.Errorf("mismatching cookie %x", n.Cookie) - return - } - } - - // append to dat file - n.AppendAtNs = uint64(time.Now().UnixNano()) - offset, size, _, err = n.Append(v.DataBackend, v.Version()) - v.checkReadWriteError(err) - if err != nil { - return - } - - v.lastAppendAtNs = n.AppendAtNs - // add to needle map - if !ok || uint64(nv.Offset.ToActualOffset()) < offset { - if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil { - glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err) - } - } - if v.lastModifiedTsSeconds < n.LastModified { - v.lastModifiedTsSeconds = n.LastModified - } - return + return v.doWriteRequest(n) } func (v *Volume) writeNeedle2(n *needle.Needle, fsync bool) (offset uint64, size Size, isUnchanged bool, err error) { @@ -223,24 +184,7 @@ func (v *Volume) syncDelete(n *needle.Needle) (Size, error) { return 0, err } - nv, ok := v.nm.Get(n.Id) - // fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size) - if ok && nv.Size.IsValid() { - size := nv.Size - n.Data = nil - n.AppendAtNs = uint64(time.Now().UnixNano()) - offset, _, _, err := n.Append(v.DataBackend, v.Version()) - v.checkReadWriteError(err) - if err != nil { - return size, err - } - v.lastAppendAtNs = n.AppendAtNs - if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil { - return size, err - } - return size, err - } - return 0, nil + return v.doDeleteRequest(n) } func (v *Volume) deleteNeedle2(n *needle.Needle) (Size, error) { diff --git a/weed/storage/volume_stream_write.go b/weed/storage/volume_stream_write.go index f619de30b..955875aa2 100644 --- a/weed/storage/volume_stream_write.go +++ b/weed/storage/volume_stream_write.go @@ -48,8 +48,6 @@ func (v *Volume) StreamWrite(n *needle.Needle, data io.Reader, dataSize uint32) // data checksum util.Uint32toBytes(header[0:needle.NeedleChecksumSize], crcWriter.Sum()) - df.Write(header[0:needle.NeedleChecksumSize]) - // write timestamp, padding n.AppendAtNs = uint64(time.Now().UnixNano()) util.Uint64toBytes(header[needle.NeedleChecksumSize:needle.NeedleChecksumSize+TimestampSize], n.AppendAtNs) diff --git a/weed/storage/volume_tier.go b/weed/storage/volume_tier.go index da93221b2..23160906b 100644 --- a/weed/storage/volume_tier.go +++ b/weed/storage/volume_tier.go @@ -16,7 +16,7 @@ func (v *Volume) GetVolumeInfo() *volume_server_pb.VolumeInfo { func (v *Volume) maybeLoadVolumeInfo() (found bool) { var err error - v.volumeInfo, v.hasRemoteFile, err = pb.MaybeLoadVolumeInfo(v.FileName(".vif")) + v.volumeInfo, v.hasRemoteFile, found, err = pb.MaybeLoadVolumeInfo(v.FileName(".vif")) if v.volumeInfo.Version == 0 { v.volumeInfo.Version = uint32(needle.CurrentVersion) @@ -29,10 +29,10 @@ func (v *Volume) maybeLoadVolumeInfo() (found bool) { if err != nil { glog.Warningf("load volume %d.vif file: %v", v.Id, err) - return false + return } - return true + return } diff --git a/weed/util/constants.go b/weed/util/constants.go index 2a7e57c5d..1cd2f160a 100644 --- a/weed/util/constants.go +++ b/weed/util/constants.go @@ -5,7 +5,7 @@ import ( ) var ( - VERSION = fmt.Sprintf("%s %d.%02d", sizeLimit, 2, 29) + VERSION = fmt.Sprintf("%s %d.%02d", sizeLimit, 2, 31) COMMIT = "" ) diff --git a/weed/wdclient/net2/base_connection_pool.go b/weed/wdclient/net2/base_connection_pool.go new file mode 100644 index 000000000..5cc037d0f --- /dev/null +++ b/weed/wdclient/net2/base_connection_pool.go @@ -0,0 +1,159 @@ +package net2 + +import ( + "net" + "strings" + "time" + + rp "github.com/chrislusf/seaweedfs/weed/wdclient/resource_pool" +) + +const defaultDialTimeout = 1 * time.Second + +func defaultDialFunc(network string, address string) (net.Conn, error) { + return net.DialTimeout(network, address, defaultDialTimeout) +} + +func parseResourceLocation(resourceLocation string) ( + network string, + address string) { + + idx := strings.Index(resourceLocation, " ") + if idx >= 0 { + return resourceLocation[:idx], resourceLocation[idx+1:] + } + + return "", resourceLocation +} + +// A thin wrapper around the underlying resource pool. +type connectionPoolImpl struct { + options ConnectionOptions + + pool rp.ResourcePool +} + +// This returns a connection pool where all connections are connected +// to the same (network, address) +func newBaseConnectionPool( + options ConnectionOptions, + createPool func(rp.Options) rp.ResourcePool) ConnectionPool { + + dial := options.Dial + if dial == nil { + dial = defaultDialFunc + } + + openFunc := func(loc string) (interface{}, error) { + network, address := parseResourceLocation(loc) + return dial(network, address) + } + + closeFunc := func(handle interface{}) error { + return handle.(net.Conn).Close() + } + + poolOptions := rp.Options{ + MaxActiveHandles: options.MaxActiveConnections, + MaxIdleHandles: options.MaxIdleConnections, + MaxIdleTime: options.MaxIdleTime, + OpenMaxConcurrency: options.DialMaxConcurrency, + Open: openFunc, + Close: closeFunc, + NowFunc: options.NowFunc, + } + + return &connectionPoolImpl{ + options: options, + pool: createPool(poolOptions), + } +} + +// This returns a connection pool where all connections are connected +// to the same (network, address) +func NewSimpleConnectionPool(options ConnectionOptions) ConnectionPool { + return newBaseConnectionPool(options, rp.NewSimpleResourcePool) +} + +// This returns a connection pool that manages multiple (network, address) +// entries. The connections to each (network, address) entry acts +// independently. For example ("tcp", "localhost:11211") could act as memcache +// shard 0 and ("tcp", "localhost:11212") could act as memcache shard 1. +func NewMultiConnectionPool(options ConnectionOptions) ConnectionPool { + return newBaseConnectionPool( + options, + func(poolOptions rp.Options) rp.ResourcePool { + return rp.NewMultiResourcePool(poolOptions, nil) + }) +} + +// See ConnectionPool for documentation. +func (p *connectionPoolImpl) NumActive() int32 { + return p.pool.NumActive() +} + +// See ConnectionPool for documentation. +func (p *connectionPoolImpl) ActiveHighWaterMark() int32 { + return p.pool.ActiveHighWaterMark() +} + +// This returns the number of alive idle connections. This method is not part +// of ConnectionPool's API. It is used only for testing. +func (p *connectionPoolImpl) NumIdle() int { + return p.pool.NumIdle() +} + +// BaseConnectionPool can only register a single (network, address) entry. +// Register should be call before any Get calls. +func (p *connectionPoolImpl) Register(network string, address string) error { + return p.pool.Register(network + " " + address) +} + +// BaseConnectionPool has nothing to do on Unregister. +func (p *connectionPoolImpl) Unregister(network string, address string) error { + return nil +} + +func (p *connectionPoolImpl) ListRegistered() []NetworkAddress { + result := make([]NetworkAddress, 0, 1) + for _, location := range p.pool.ListRegistered() { + network, address := parseResourceLocation(location) + + result = append( + result, + NetworkAddress{ + Network: network, + Address: address, + }) + } + return result +} + +// This gets an active connection from the connection pool. Note that network +// and address arguments are ignored (The connections with point to the +// network/address provided by the first Register call). +func (p *connectionPoolImpl) Get( + network string, + address string) (ManagedConn, error) { + + handle, err := p.pool.Get(network + " " + address) + if err != nil { + return nil, err + } + return NewManagedConn(network, address, handle, p, p.options), nil +} + +// See ConnectionPool for documentation. +func (p *connectionPoolImpl) Release(conn ManagedConn) error { + return conn.ReleaseConnection() +} + +// See ConnectionPool for documentation. +func (p *connectionPoolImpl) Discard(conn ManagedConn) error { + return conn.DiscardConnection() +} + +// See ConnectionPool for documentation. +func (p *connectionPoolImpl) EnterLameDuckMode() { + p.pool.EnterLameDuckMode() +} diff --git a/weed/wdclient/net2/connection_pool.go b/weed/wdclient/net2/connection_pool.go new file mode 100644 index 000000000..5b8d4d232 --- /dev/null +++ b/weed/wdclient/net2/connection_pool.go @@ -0,0 +1,97 @@ +package net2 + +import ( + "net" + "time" +) + +type ConnectionOptions struct { + // The maximum number of connections that can be active per host at any + // given time (A non-positive value indicates the number of connections + // is unbounded). + MaxActiveConnections int32 + + // The maximum number of idle connections per host that are kept alive by + // the connection pool. + MaxIdleConnections uint32 + + // The maximum amount of time an idle connection can alive (if specified). + MaxIdleTime *time.Duration + + // This limits the number of concurrent Dial calls (there's no limit when + // DialMaxConcurrency is non-positive). + DialMaxConcurrency int + + // Dial specifies the dial function for creating network connections. + // If Dial is nil, net.DialTimeout is used, with timeout set to 1 second. + Dial func(network string, address string) (net.Conn, error) + + // This specifies the now time function. When the function is non-nil, the + // connection pool will use the specified function instead of time.Now to + // generate the current time. + NowFunc func() time.Time + + // This specifies the timeout for any Read() operation. + // Note that setting this to 0 (i.e. not setting it) will make + // read operations block indefinitely. + ReadTimeout time.Duration + + // This specifies the timeout for any Write() operation. + // Note that setting this to 0 (i.e. not setting it) will make + // write operations block indefinitely. + WriteTimeout time.Duration +} + +func (o ConnectionOptions) getCurrentTime() time.Time { + if o.NowFunc == nil { + return time.Now() + } else { + return o.NowFunc() + } +} + +// A generic interface for managed connection pool. All connection pool +// implementations must be threadsafe. +type ConnectionPool interface { + // This returns the number of active connections that are on loan. + NumActive() int32 + + // This returns the highest number of active connections for the entire + // lifetime of the pool. + ActiveHighWaterMark() int32 + + // This returns the number of idle connections that are in the pool. + NumIdle() int + + // This associates (network, address) to the connection pool; afterwhich, + // the user can get connections to (network, address). + Register(network string, address string) error + + // This dissociate (network, address) from the connection pool; + // afterwhich, the user can no longer get connections to + // (network, address). + Unregister(network string, address string) error + + // This returns the list of registered (network, address) entries. + ListRegistered() []NetworkAddress + + // This gets an active connection from the connection pool. The connection + // will remain active until one of the following is called: + // 1. conn.ReleaseConnection() + // 2. conn.DiscardConnection() + // 3. pool.Release(conn) + // 4. pool.Discard(conn) + Get(network string, address string) (ManagedConn, error) + + // This releases an active connection back to the connection pool. + Release(conn ManagedConn) error + + // This discards an active connection from the connection pool. + Discard(conn ManagedConn) error + + // Enter the connection pool into lame duck mode. The connection pool + // will no longer return connections, and all idle connections are closed + // immediately (including active connections that are released back to the + // pool afterward). + EnterLameDuckMode() +} diff --git a/weed/wdclient/net2/doc.go b/weed/wdclient/net2/doc.go new file mode 100644 index 000000000..f4d6552e4 --- /dev/null +++ b/weed/wdclient/net2/doc.go @@ -0,0 +1,6 @@ +// net2 is a collection of functions meant to supplement the capabilities +// provided by the standard "net" package. +package net2 + +// copied from https://github.com/dropbox/godropbox/tree/master/net2 +// removed other dependencies
\ No newline at end of file diff --git a/weed/wdclient/net2/ip.go b/weed/wdclient/net2/ip.go new file mode 100644 index 000000000..60e46342f --- /dev/null +++ b/weed/wdclient/net2/ip.go @@ -0,0 +1,177 @@ +package net2 + +import ( + "fmt" + "log" + "net" + "os" + "strings" + "sync" +) + +var myHostname string +var myHostnameOnce sync.Once + +// Like os.Hostname but caches first successful result, making it cheap to call it +// over and over. +// It will also crash whole process if fetching Hostname fails! +func MyHostname() string { + myHostnameOnce.Do(func() { + var err error + myHostname, err = os.Hostname() + if err != nil { + log.Fatal(err) + } + }) + return myHostname +} + +var myIp4 *net.IPAddr +var myIp4Once sync.Once + +// Resolves `MyHostname()` to an Ip4 address. Caches first successful result, making it +// cheap to call it over and over. +// It will also crash whole process if resolving the IP fails! +func MyIp4() *net.IPAddr { + myIp4Once.Do(func() { + var err error + myIp4, err = net.ResolveIPAddr("ip4", MyHostname()) + if err != nil { + log.Fatal(err) + } + }) + return myIp4 +} + +var myIp6 *net.IPAddr +var myIp6Once sync.Once + +// Resolves `MyHostname()` to an Ip6 address. Caches first successful result, making it +// cheap to call it over and over. +// It will also crash whole process if resolving the IP fails! +func MyIp6() *net.IPAddr { + myIp6Once.Do(func() { + var err error + myIp6, err = net.ResolveIPAddr("ip6", MyHostname()) + if err != nil { + log.Fatal(err) + } + }) + return myIp6 +} + +// This returns the list of local ip addresses which other hosts can connect +// to (NOTE: Loopback ip is ignored). +// Also resolves Hostname to an address and adds it to the list too, so +// IPs from /etc/hosts can work too. +func GetLocalIPs() ([]*net.IP, error) { + hostname, err := os.Hostname() + if err != nil { + return nil, fmt.Errorf("Failed to lookup hostname: %v", err) + } + // Resolves IP Address from Hostname, this way overrides in /etc/hosts + // can work too for IP resolution. + ipInfo, err := net.ResolveIPAddr("ip4", hostname) + if err != nil { + return nil, fmt.Errorf("Failed to resolve ip: %v", err) + } + ips := []*net.IP{&ipInfo.IP} + + // TODO(zviad): Is rest of the code really necessary? + addrs, err := net.InterfaceAddrs() + if err != nil { + return nil, fmt.Errorf( "Failed to get interface addresses: %v", err) + } + for _, addr := range addrs { + ipnet, ok := addr.(*net.IPNet) + if !ok { + continue + } + + if ipnet.IP.IsLoopback() { + continue + } + + ips = append(ips, &ipnet.IP) + } + return ips, nil +} + +var localhostIPNets []*net.IPNet + +func init() { + for _, mask := range []string{"127.0.0.1/8", "::1/128"} { + _, ipnet, err := net.ParseCIDR(mask) + if err != nil { + panic(err) + } + localhostIPNets = append(localhostIPNets, ipnet) + } +} + +func IsLocalhostIp(ipStr string) bool { + ip := net.ParseIP(ipStr) + if ip == nil { + return false + } + for _, ipnet := range localhostIPNets { + if ipnet.Contains(ip) { + return true + } + } + return false +} + +// Given a host string, return true if the host is an ip (v4/v6) localhost. +func IsLocalhost(host string) bool { + return IsLocalhostIp(host) || + host == "localhost" || + host == "ip6-localhost" || + host == "ipv6-localhost" +} + +// Resolves hostnames in addresses to actual IP4 addresses. Skips all invalid addresses +// and all addresses that can't be resolved. +// `addrs` are assumed to be of form: ["<hostname>:<port>", ...] +// Returns an error in addition to resolved addresses if not all resolutions succeed. +func ResolveIP4s(addrs []string) ([]string, error) { + resolvedAddrs := make([]string, 0, len(addrs)) + var lastErr error + + for _, server := range addrs { + hostPort := strings.Split(server, ":") + if len(hostPort) != 2 { + lastErr = fmt.Errorf("Skipping invalid address: %s", server) + continue + } + + ip, err := net.ResolveIPAddr("ip4", hostPort[0]) + if err != nil { + lastErr = err + continue + } + resolvedAddrs = append(resolvedAddrs, ip.IP.String()+":"+hostPort[1]) + } + return resolvedAddrs, lastErr +} + +func LookupValidAddrs() (map[string]bool, error) { + hostName, err := os.Hostname() + if err != nil { + return nil, err + } + addrs, err := net.LookupHost(hostName) + if err != nil { + return nil, err + } + validAddrs := make(map[string]bool) + validAddrs[hostName] = true + for _, addr := range addrs { + validAddrs[addr] = true + } + // Special case localhost/127.0.0.1 so that this works on devVMs. It should + // have no affect in production. + validAddrs["127.0.0.1"] = true + validAddrs["localhost"] = true + return validAddrs, nil +} diff --git a/weed/wdclient/net2/managed_connection.go b/weed/wdclient/net2/managed_connection.go new file mode 100644 index 000000000..a886210d1 --- /dev/null +++ b/weed/wdclient/net2/managed_connection.go @@ -0,0 +1,185 @@ +package net2 + +import ( + "fmt" + "net" + "time" + + "errors" + "github.com/chrislusf/seaweedfs/weed/wdclient/resource_pool" +) + +// Dial's arguments. +type NetworkAddress struct { + Network string + Address string +} + +// A connection managed by a connection pool. NOTE: SetDeadline, +// SetReadDeadline and SetWriteDeadline are disabled for managed connections. +// (The deadlines are set by the connection pool). +type ManagedConn interface { + net.Conn + + // This returns the original (network, address) entry used for creating + // the connection. + Key() NetworkAddress + + // This returns the underlying net.Conn implementation. + RawConn() net.Conn + + // This returns the connection pool which owns this connection. + Owner() ConnectionPool + + // This indictes a user is done with the connection and releases the + // connection back to the connection pool. + ReleaseConnection() error + + // This indicates the connection is an invalid state, and that the + // connection should be discarded from the connection pool. + DiscardConnection() error +} + +// A physical implementation of ManagedConn +type managedConnImpl struct { + addr NetworkAddress + handle resource_pool.ManagedHandle + pool ConnectionPool + options ConnectionOptions +} + +// This creates a managed connection wrapper. +func NewManagedConn( + network string, + address string, + handle resource_pool.ManagedHandle, + pool ConnectionPool, + options ConnectionOptions) ManagedConn { + + addr := NetworkAddress{ + Network: network, + Address: address, + } + + return &managedConnImpl{ + addr: addr, + handle: handle, + pool: pool, + options: options, + } +} + +func (c *managedConnImpl) rawConn() (net.Conn, error) { + h, err := c.handle.Handle() + return h.(net.Conn), err +} + +// See ManagedConn for documentation. +func (c *managedConnImpl) RawConn() net.Conn { + h, _ := c.handle.Handle() + return h.(net.Conn) +} + +// See ManagedConn for documentation. +func (c *managedConnImpl) Key() NetworkAddress { + return c.addr +} + +// See ManagedConn for documentation. +func (c *managedConnImpl) Owner() ConnectionPool { + return c.pool +} + +// See ManagedConn for documentation. +func (c *managedConnImpl) ReleaseConnection() error { + return c.handle.Release() +} + +// See ManagedConn for documentation. +func (c *managedConnImpl) DiscardConnection() error { + return c.handle.Discard() +} + +// See net.Conn for documentation +func (c *managedConnImpl) Read(b []byte) (n int, err error) { + conn, err := c.rawConn() + if err != nil { + return 0, err + } + + if c.options.ReadTimeout > 0 { + deadline := c.options.getCurrentTime().Add(c.options.ReadTimeout) + _ = conn.SetReadDeadline(deadline) + } + n, err = conn.Read(b) + if err != nil { + var localAddr string + if conn.LocalAddr() != nil { + localAddr = conn.LocalAddr().String() + } else { + localAddr = "(nil)" + } + + var remoteAddr string + if conn.RemoteAddr() != nil { + remoteAddr = conn.RemoteAddr().String() + } else { + remoteAddr = "(nil)" + } + err = fmt.Errorf("Read error from host: %s <-> %s: %v", localAddr, remoteAddr, err) + } + return +} + +// See net.Conn for documentation +func (c *managedConnImpl) Write(b []byte) (n int, err error) { + conn, err := c.rawConn() + if err != nil { + return 0, err + } + + if c.options.WriteTimeout > 0 { + deadline := c.options.getCurrentTime().Add(c.options.WriteTimeout) + _ = conn.SetWriteDeadline(deadline) + } + n, err = conn.Write(b) + if err != nil { + err = fmt.Errorf("Write error: %v", err) + } + return +} + +// See net.Conn for documentation +func (c *managedConnImpl) Close() error { + return c.handle.Discard() +} + +// See net.Conn for documentation +func (c *managedConnImpl) LocalAddr() net.Addr { + conn, _ := c.rawConn() + return conn.LocalAddr() +} + +// See net.Conn for documentation +func (c *managedConnImpl) RemoteAddr() net.Addr { + conn, _ := c.rawConn() + return conn.RemoteAddr() +} + +// SetDeadline is disabled for managed connection (The deadline is set by +// us, with respect to the read/write timeouts specified in ConnectionOptions). +func (c *managedConnImpl) SetDeadline(t time.Time) error { + return errors.New("Cannot set deadline for managed connection") +} + +// SetReadDeadline is disabled for managed connection (The deadline is set by +// us with respect to the read timeout specified in ConnectionOptions). +func (c *managedConnImpl) SetReadDeadline(t time.Time) error { + return errors.New("Cannot set read deadline for managed connection") +} + +// SetWriteDeadline is disabled for managed connection (The deadline is set by +// us with respect to the write timeout specified in ConnectionOptions). +func (c *managedConnImpl) SetWriteDeadline(t time.Time) error { + return errors.New("Cannot set write deadline for managed connection") +} diff --git a/weed/wdclient/net2/port.go b/weed/wdclient/net2/port.go new file mode 100644 index 000000000..f83adba28 --- /dev/null +++ b/weed/wdclient/net2/port.go @@ -0,0 +1,19 @@ +package net2 + +import ( + "net" + "strconv" +) + +// Returns the port information. +func GetPort(addr net.Addr) (int, error) { + _, lport, err := net.SplitHostPort(addr.String()) + if err != nil { + return -1, err + } + lportInt, err := strconv.Atoi(lport) + if err != nil { + return -1, err + } + return lportInt, nil +} diff --git a/weed/wdclient/resource_pool/doc.go b/weed/wdclient/resource_pool/doc.go new file mode 100644 index 000000000..c17b17c6c --- /dev/null +++ b/weed/wdclient/resource_pool/doc.go @@ -0,0 +1,5 @@ +// A generic resource pool for managing resources such as network connections. +package resource_pool + +// copied from https://github.com/dropbox/godropbox/tree/master/resource_pool +// removed other dependencies
\ No newline at end of file diff --git a/weed/wdclient/resource_pool/managed_handle.go b/weed/wdclient/resource_pool/managed_handle.go new file mode 100644 index 000000000..e1d82ca7b --- /dev/null +++ b/weed/wdclient/resource_pool/managed_handle.go @@ -0,0 +1,97 @@ +package resource_pool + +import ( + "sync/atomic" + + "errors" +) + +// A resource handle managed by a resource pool. +type ManagedHandle interface { + // This returns the handle's resource location. + ResourceLocation() string + + // This returns the underlying resource handle (or error if the handle + // is no longer active). + Handle() (interface{}, error) + + // This returns the resource pool which owns this handle. + Owner() ResourcePool + + // The releases the underlying resource handle to the caller and marks the + // managed handle as inactive. The caller is responsible for cleaning up + // the released handle. This returns nil if the managed handle no longer + // owns the resource. + ReleaseUnderlyingHandle() interface{} + + // This indictes a user is done with the handle and releases the handle + // back to the resource pool. + Release() error + + // This indicates the handle is an invalid state, and that the + // connection should be discarded from the connection pool. + Discard() error +} + +// A physical implementation of ManagedHandle +type managedHandleImpl struct { + location string + handle interface{} + pool ResourcePool + isActive int32 // atomic bool + options Options +} + +// This creates a managed handle wrapper. +func NewManagedHandle( + resourceLocation string, + handle interface{}, + pool ResourcePool, + options Options) ManagedHandle { + + h := &managedHandleImpl{ + location: resourceLocation, + handle: handle, + pool: pool, + options: options, + } + atomic.StoreInt32(&h.isActive, 1) + + return h +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) ResourceLocation() string { + return c.location +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) Handle() (interface{}, error) { + if atomic.LoadInt32(&c.isActive) == 0 { + return c.handle, errors.New("Resource handle is no longer valid") + } + return c.handle, nil +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) Owner() ResourcePool { + return c.pool +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) ReleaseUnderlyingHandle() interface{} { + if atomic.CompareAndSwapInt32(&c.isActive, 1, 0) { + return c.handle + } + return nil +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) Release() error { + return c.pool.Release(c) +} + +// See ManagedHandle for documentation. +func (c *managedHandleImpl) Discard() error { + return c.pool.Discard(c) +} diff --git a/weed/wdclient/resource_pool/multi_resource_pool.go b/weed/wdclient/resource_pool/multi_resource_pool.go new file mode 100644 index 000000000..9ac25526d --- /dev/null +++ b/weed/wdclient/resource_pool/multi_resource_pool.go @@ -0,0 +1,200 @@ +package resource_pool + +import ( + "fmt" + "sync" + + "errors" +) + +// A resource pool implementation that manages multiple resource location +// entries. The handles to each resource location entry acts independently. +// For example "tcp localhost:11211" could act as memcache +// shard 0 and "tcp localhost:11212" could act as memcache shard 1. +type multiResourcePool struct { + options Options + + createPool func(Options) ResourcePool + + rwMutex sync.RWMutex + isLameDuck bool // guarded by rwMutex + // NOTE: the locationPools is guarded by rwMutex, but the pool entries + // are not. + locationPools map[string]ResourcePool +} + +// This returns a MultiResourcePool, which manages multiple +// resource location entries. The handles to each resource location +// entry acts independently. +// +// When createPool is nil, NewSimpleResourcePool is used as default. +func NewMultiResourcePool( + options Options, + createPool func(Options) ResourcePool) ResourcePool { + + if createPool == nil { + createPool = NewSimpleResourcePool + } + + return &multiResourcePool{ + options: options, + createPool: createPool, + rwMutex: sync.RWMutex{}, + isLameDuck: false, + locationPools: make(map[string]ResourcePool), + } +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) NumActive() int32 { + total := int32(0) + + p.rwMutex.RLock() + defer p.rwMutex.RUnlock() + + for _, pool := range p.locationPools { + total += pool.NumActive() + } + return total +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) ActiveHighWaterMark() int32 { + high := int32(0) + + p.rwMutex.RLock() + defer p.rwMutex.RUnlock() + + for _, pool := range p.locationPools { + val := pool.ActiveHighWaterMark() + if val > high { + high = val + } + } + return high +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) NumIdle() int { + total := 0 + + p.rwMutex.RLock() + defer p.rwMutex.RUnlock() + + for _, pool := range p.locationPools { + total += pool.NumIdle() + } + return total +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) Register(resourceLocation string) error { + if resourceLocation == "" { + return errors.New("Registering invalid resource location") + } + + p.rwMutex.Lock() + defer p.rwMutex.Unlock() + + if p.isLameDuck { + return fmt.Errorf( + "Cannot register %s to lame duck resource pool", + resourceLocation) + } + + if _, inMap := p.locationPools[resourceLocation]; inMap { + return nil + } + + pool := p.createPool(p.options) + if err := pool.Register(resourceLocation); err != nil { + return err + } + + p.locationPools[resourceLocation] = pool + return nil +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) Unregister(resourceLocation string) error { + p.rwMutex.Lock() + defer p.rwMutex.Unlock() + + if pool, inMap := p.locationPools[resourceLocation]; inMap { + _ = pool.Unregister("") + pool.EnterLameDuckMode() + delete(p.locationPools, resourceLocation) + } + return nil +} + +func (p *multiResourcePool) ListRegistered() []string { + p.rwMutex.RLock() + defer p.rwMutex.RUnlock() + + result := make([]string, 0, len(p.locationPools)) + for key, _ := range p.locationPools { + result = append(result, key) + } + + return result +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) Get( + resourceLocation string) (ManagedHandle, error) { + + pool := p.getPool(resourceLocation) + if pool == nil { + return nil, fmt.Errorf( + "%s is not registered in the resource pool", + resourceLocation) + } + return pool.Get(resourceLocation) +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) Release(handle ManagedHandle) error { + pool := p.getPool(handle.ResourceLocation()) + if pool == nil { + return errors.New( + "Resource pool cannot take control of a handle owned " + + "by another resource pool") + } + + return pool.Release(handle) +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) Discard(handle ManagedHandle) error { + pool := p.getPool(handle.ResourceLocation()) + if pool == nil { + return errors.New( + "Resource pool cannot take control of a handle owned " + + "by another resource pool") + } + + return pool.Discard(handle) +} + +// See ResourcePool for documentation. +func (p *multiResourcePool) EnterLameDuckMode() { + p.rwMutex.Lock() + defer p.rwMutex.Unlock() + + p.isLameDuck = true + + for _, pool := range p.locationPools { + pool.EnterLameDuckMode() + } +} + +func (p *multiResourcePool) getPool(resourceLocation string) ResourcePool { + p.rwMutex.RLock() + defer p.rwMutex.RUnlock() + + if pool, inMap := p.locationPools[resourceLocation]; inMap { + return pool + } + return nil +} diff --git a/weed/wdclient/resource_pool/resource_pool.go b/weed/wdclient/resource_pool/resource_pool.go new file mode 100644 index 000000000..26c433f50 --- /dev/null +++ b/weed/wdclient/resource_pool/resource_pool.go @@ -0,0 +1,96 @@ +package resource_pool + +import ( + "time" +) + +type Options struct { + // The maximum number of active resource handles per resource location. (A + // non-positive value indicates the number of active resource handles is + // unbounded). + MaxActiveHandles int32 + + // The maximum number of idle resource handles per resource location that + // are kept alive by the resource pool. + MaxIdleHandles uint32 + + // The maximum amount of time an idle resource handle can remain alive (if + // specified). + MaxIdleTime *time.Duration + + // This limits the number of concurrent Open calls (there's no limit when + // OpenMaxConcurrency is non-positive). + OpenMaxConcurrency int + + // This function creates a resource handle (e.g., a connection) for a + // resource location. The function must be thread-safe. + Open func(resourceLocation string) ( + handle interface{}, + err error) + + // This function destroys a resource handle and performs the necessary + // cleanup to free up resources. The function must be thread-safe. + Close func(handle interface{}) error + + // This specifies the now time function. When the function is non-nil, the + // resource pool will use the specified function instead of time.Now to + // generate the current time. + NowFunc func() time.Time +} + +func (o Options) getCurrentTime() time.Time { + if o.NowFunc == nil { + return time.Now() + } else { + return o.NowFunc() + } +} + +// A generic interface for managed resource pool. All resource pool +// implementations must be threadsafe. +type ResourcePool interface { + // This returns the number of active resource handles. + NumActive() int32 + + // This returns the highest number of actives handles for the entire + // lifetime of the pool. If the pool contains multiple sub-pools, the + // high water mark is the max of the sub-pools' high water marks. + ActiveHighWaterMark() int32 + + // This returns the number of alive idle handles. NOTE: This is only used + // for testing. + NumIdle() int + + // This associates a resource location to the resource pool; afterwhich, + // the user can get resource handles for the resource location. + Register(resourceLocation string) error + + // This dissociates a resource location from the resource pool; afterwhich, + // the user can no longer get resource handles for the resource location. + // If the given resource location corresponds to a sub-pool, the unregistered + // sub-pool will enter lame duck mode. + Unregister(resourceLocation string) error + + // This returns the list of registered resource location entries. + ListRegistered() []string + + // This gets an active resource handle from the resource pool. The + // handle will remain active until one of the following is called: + // 1. handle.Release() + // 2. handle.Discard() + // 3. pool.Release(handle) + // 4. pool.Discard(handle) + Get(key string) (ManagedHandle, error) + + // This releases an active resource handle back to the resource pool. + Release(handle ManagedHandle) error + + // This discards an active resource from the resource pool. + Discard(handle ManagedHandle) error + + // Enter the resource pool into lame duck mode. The resource pool + // will no longer return resource handles, and all idle resource handles + // are closed immediately (including active resource handles that are + // released back to the pool afterward). + EnterLameDuckMode() +} diff --git a/weed/wdclient/resource_pool/semaphore.go b/weed/wdclient/resource_pool/semaphore.go new file mode 100644 index 000000000..ff35d5bc5 --- /dev/null +++ b/weed/wdclient/resource_pool/semaphore.go @@ -0,0 +1,154 @@ +package resource_pool + +import ( + "fmt" + "sync" + "sync/atomic" + "time" +) + +type Semaphore interface { + // Increment the semaphore counter by one. + Release() + + // Decrement the semaphore counter by one, and block if counter < 0 + Acquire() + + // Decrement the semaphore counter by one, and block if counter < 0 + // Wait for up to the given duration. Returns true if did not timeout + TryAcquire(timeout time.Duration) bool +} + +// A simple counting Semaphore. +type boundedSemaphore struct { + slots chan struct{} +} + +// Create a bounded semaphore. The count parameter must be a positive number. +// NOTE: The bounded semaphore will panic if the user tries to Release +// beyond the specified count. +func NewBoundedSemaphore(count uint) Semaphore { + sem := &boundedSemaphore{ + slots: make(chan struct{}, int(count)), + } + for i := 0; i < cap(sem.slots); i++ { + sem.slots <- struct{}{} + } + return sem +} + +// Acquire returns on successful acquisition. +func (sem *boundedSemaphore) Acquire() { + <-sem.slots +} + +// TryAcquire returns true if it acquires a resource slot within the +// timeout, false otherwise. +func (sem *boundedSemaphore) TryAcquire(timeout time.Duration) bool { + if timeout > 0 { + // Wait until we get a slot or timeout expires. + tm := time.NewTimer(timeout) + defer tm.Stop() + select { + case <-sem.slots: + return true + case <-tm.C: + // Timeout expired. In very rare cases this might happen even if + // there is a slot available, e.g. GC pause after we create the timer + // and select randomly picked this one out of the two available channels. + // We should do one final immediate check below. + } + } + + // Return true if we have a slot available immediately and false otherwise. + select { + case <-sem.slots: + return true + default: + return false + } +} + +// Release the acquired semaphore. You must not release more than you +// have acquired. +func (sem *boundedSemaphore) Release() { + select { + case sem.slots <- struct{}{}: + default: + // slots is buffered. If a send blocks, it indicates a programming + // error. + panic(fmt.Errorf("too many releases for boundedSemaphore")) + } +} + +// This returns an unbound counting semaphore with the specified initial count. +// The semaphore counter can be arbitrary large (i.e., Release can be called +// unlimited amount of times). +// +// NOTE: In general, users should use bounded semaphore since it is more +// efficient than unbounded semaphore. +func NewUnboundedSemaphore(initialCount int) Semaphore { + res := &unboundedSemaphore{ + counter: int64(initialCount), + } + res.cond.L = &res.lock + return res +} + +type unboundedSemaphore struct { + lock sync.Mutex + cond sync.Cond + counter int64 +} + +func (s *unboundedSemaphore) Release() { + s.lock.Lock() + s.counter += 1 + if s.counter > 0 { + // Not broadcasting here since it's unlike we can satify all waiting + // goroutines. Instead, we will Signal again if there are left over + // quota after Acquire, in case of lost wakeups. + s.cond.Signal() + } + s.lock.Unlock() +} + +func (s *unboundedSemaphore) Acquire() { + s.lock.Lock() + for s.counter < 1 { + s.cond.Wait() + } + s.counter -= 1 + if s.counter > 0 { + s.cond.Signal() + } + s.lock.Unlock() +} + +func (s *unboundedSemaphore) TryAcquire(timeout time.Duration) bool { + done := make(chan bool, 1) + // Gate used to communicate between the threads and decide what the result + // is. If the main thread decides, we have timed out, otherwise we succeed. + decided := new(int32) + atomic.StoreInt32(decided, 0) + go func() { + s.Acquire() + if atomic.SwapInt32(decided, 1) == 0 { + // Acquire won the race + done <- true + } else { + // If we already decided the result, and this thread did not win + s.Release() + } + }() + select { + case <-done: + return true + case <-time.After(timeout): + if atomic.SwapInt32(decided, 1) == 1 { + // The other thread already decided the result + return true + } + return false + } +} diff --git a/weed/wdclient/resource_pool/simple_resource_pool.go b/weed/wdclient/resource_pool/simple_resource_pool.go new file mode 100644 index 000000000..b0c539100 --- /dev/null +++ b/weed/wdclient/resource_pool/simple_resource_pool.go @@ -0,0 +1,343 @@ +package resource_pool + +import ( + "errors" + "fmt" + "sync" + "sync/atomic" + "time" +) + +type idleHandle struct { + handle interface{} + keepUntil *time.Time +} + +type TooManyHandles struct { + location string +} + +func (t TooManyHandles) Error() string { + return fmt.Sprintf("Too many handles to %s", t.location) +} + +type OpenHandleError struct { + location string + err error +} + +func (o OpenHandleError) Error() string { + return fmt.Sprintf("Failed to open resource handle: %s (%v)", o.location, o.err) +} + +// A resource pool implementation where all handles are associated to the +// same resource location. +type simpleResourcePool struct { + options Options + + numActive *int32 // atomic counter + + activeHighWaterMark *int32 // atomic / monotonically increasing value + + openTokens Semaphore + + mutex sync.Mutex + location string // guard by mutex + idleHandles []*idleHandle // guarded by mutex + isLameDuck bool // guarded by mutex +} + +// This returns a SimpleResourcePool, where all handles are associated to a +// single resource location. +func NewSimpleResourcePool(options Options) ResourcePool { + numActive := new(int32) + atomic.StoreInt32(numActive, 0) + + activeHighWaterMark := new(int32) + atomic.StoreInt32(activeHighWaterMark, 0) + + var tokens Semaphore + if options.OpenMaxConcurrency > 0 { + tokens = NewBoundedSemaphore(uint(options.OpenMaxConcurrency)) + } + + return &simpleResourcePool{ + location: "", + options: options, + numActive: numActive, + activeHighWaterMark: activeHighWaterMark, + openTokens: tokens, + mutex: sync.Mutex{}, + idleHandles: make([]*idleHandle, 0, 0), + isLameDuck: false, + } +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) NumActive() int32 { + return atomic.LoadInt32(p.numActive) +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) ActiveHighWaterMark() int32 { + return atomic.LoadInt32(p.activeHighWaterMark) +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) NumIdle() int { + p.mutex.Lock() + defer p.mutex.Unlock() + return len(p.idleHandles) +} + +// SimpleResourcePool can only register a single (network, address) entry. +// Register should be call before any Get calls. +func (p *simpleResourcePool) Register(resourceLocation string) error { + if resourceLocation == "" { + return errors.New("Invalid resource location") + } + + p.mutex.Lock() + defer p.mutex.Unlock() + + if p.isLameDuck { + return fmt.Errorf( + "cannot register %s to lame duck resource pool", + resourceLocation) + } + + if p.location == "" { + p.location = resourceLocation + return nil + } + return errors.New("SimpleResourcePool can only register one location") +} + +// SimpleResourcePool will enter lame duck mode upon calling Unregister. +func (p *simpleResourcePool) Unregister(resourceLocation string) error { + p.EnterLameDuckMode() + return nil +} + +func (p *simpleResourcePool) ListRegistered() []string { + p.mutex.Lock() + defer p.mutex.Unlock() + + if p.location != "" { + return []string{p.location} + } + return []string{} +} + +func (p *simpleResourcePool) getLocation() (string, error) { + p.mutex.Lock() + defer p.mutex.Unlock() + + if p.location == "" { + return "", fmt.Errorf( + "resource location is not set for SimpleResourcePool") + } + + if p.isLameDuck { + return "", fmt.Errorf( + "lame duck resource pool cannot return handles to %s", + p.location) + } + + return p.location, nil +} + +// This gets an active resource from the resource pool. Note that the +// resourceLocation argument is ignored (The handles are associated to the +// resource location provided by the first Register call). +func (p *simpleResourcePool) Get(unused string) (ManagedHandle, error) { + activeCount := atomic.AddInt32(p.numActive, 1) + if p.options.MaxActiveHandles > 0 && + activeCount > p.options.MaxActiveHandles { + + atomic.AddInt32(p.numActive, -1) + return nil, TooManyHandles{p.location} + } + + highest := atomic.LoadInt32(p.activeHighWaterMark) + for activeCount > highest && + !atomic.CompareAndSwapInt32( + p.activeHighWaterMark, + highest, + activeCount) { + + highest = atomic.LoadInt32(p.activeHighWaterMark) + } + + if h := p.getIdleHandle(); h != nil { + return h, nil + } + + location, err := p.getLocation() + if err != nil { + atomic.AddInt32(p.numActive, -1) + return nil, err + } + + if p.openTokens != nil { + // Current implementation does not wait for tokens to become available. + // If that causes availability hits, we could increase the wait, + // similar to simple_pool.go. + if p.openTokens.TryAcquire(0) { + defer p.openTokens.Release() + } else { + // We could not immediately acquire a token. + // Instead of waiting + atomic.AddInt32(p.numActive, -1) + return nil, OpenHandleError{ + p.location, errors.New("Open Error: reached OpenMaxConcurrency")} + } + } + + handle, err := p.options.Open(location) + if err != nil { + atomic.AddInt32(p.numActive, -1) + return nil, OpenHandleError{p.location, err} + } + + return NewManagedHandle(p.location, handle, p, p.options), nil +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) Release(handle ManagedHandle) error { + if pool, ok := handle.Owner().(*simpleResourcePool); !ok || pool != p { + return errors.New( + "Resource pool cannot take control of a handle owned " + + "by another resource pool") + } + + h := handle.ReleaseUnderlyingHandle() + if h != nil { + // We can unref either before or after queuing the idle handle. + // The advantage of unref-ing before queuing is that there is + // a higher chance of successful Get when number of active handles + // is close to the limit (but potentially more handle creation). + // The advantage of queuing before unref-ing is that there's a + // higher chance of reusing handle (but potentially more Get failures). + atomic.AddInt32(p.numActive, -1) + p.queueIdleHandles(h) + } + + return nil +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) Discard(handle ManagedHandle) error { + if pool, ok := handle.Owner().(*simpleResourcePool); !ok || pool != p { + return errors.New( + "Resource pool cannot take control of a handle owned " + + "by another resource pool") + } + + h := handle.ReleaseUnderlyingHandle() + if h != nil { + atomic.AddInt32(p.numActive, -1) + if err := p.options.Close(h); err != nil { + return fmt.Errorf("failed to close resource handle: %v", err) + } + } + return nil +} + +// See ResourcePool for documentation. +func (p *simpleResourcePool) EnterLameDuckMode() { + p.mutex.Lock() + + toClose := p.idleHandles + p.isLameDuck = true + p.idleHandles = []*idleHandle{} + + p.mutex.Unlock() + + p.closeHandles(toClose) +} + +// This returns an idle resource, if there is one. +func (p *simpleResourcePool) getIdleHandle() ManagedHandle { + var toClose []*idleHandle + defer func() { + // NOTE: Must keep the closure around to late bind the toClose slice. + p.closeHandles(toClose) + }() + + now := p.options.getCurrentTime() + + p.mutex.Lock() + defer p.mutex.Unlock() + + var i int + for i = 0; i < len(p.idleHandles); i++ { + idle := p.idleHandles[i] + if idle.keepUntil == nil || now.Before(*idle.keepUntil) { + break + } + } + if i > 0 { + toClose = p.idleHandles[0:i] + } + + if i < len(p.idleHandles) { + idle := p.idleHandles[i] + p.idleHandles = p.idleHandles[i+1:] + return NewManagedHandle(p.location, idle.handle, p, p.options) + } + + if len(p.idleHandles) > 0 { + p.idleHandles = []*idleHandle{} + } + return nil +} + +// This adds an idle resource to the pool. +func (p *simpleResourcePool) queueIdleHandles(handle interface{}) { + var toClose []*idleHandle + defer func() { + // NOTE: Must keep the closure around to late bind the toClose slice. + p.closeHandles(toClose) + }() + + now := p.options.getCurrentTime() + var keepUntil *time.Time + if p.options.MaxIdleTime != nil { + // NOTE: Assign to temp variable first to work around compiler bug + x := now.Add(*p.options.MaxIdleTime) + keepUntil = &x + } + + p.mutex.Lock() + defer p.mutex.Unlock() + + if p.isLameDuck { + toClose = []*idleHandle{ + {handle: handle}, + } + return + } + + p.idleHandles = append( + p.idleHandles, + &idleHandle{ + handle: handle, + keepUntil: keepUntil, + }) + + nIdleHandles := uint32(len(p.idleHandles)) + if nIdleHandles > p.options.MaxIdleHandles { + handlesToClose := nIdleHandles - p.options.MaxIdleHandles + toClose = p.idleHandles[0:handlesToClose] + p.idleHandles = p.idleHandles[handlesToClose:nIdleHandles] + } +} + +// Closes resources, at this point it is assumed that this resources +// are no longer referenced from the main idleHandles slice. +func (p *simpleResourcePool) closeHandles(handles []*idleHandle) { + for _, handle := range handles { + _ = p.options.Close(handle.handle) + } +} diff --git a/weed/wdclient/volume_tcp_client.go b/weed/wdclient/volume_tcp_client.go new file mode 100644 index 000000000..afebd71eb --- /dev/null +++ b/weed/wdclient/volume_tcp_client.go @@ -0,0 +1,97 @@ +package wdclient + +import ( + "bufio" + "bytes" + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/wdclient/net2" + "io" + "net" + "time" +) + +// VolumeTcpClient put/get/delete file chunks directly on volume servers without replication +type VolumeTcpClient struct { + cp net2.ConnectionPool +} + +type VolumeTcpConn struct { + net.Conn + bufWriter *bufio.Writer + bufReader *bufio.Reader +} + +func NewVolumeTcpClient() *VolumeTcpClient { + MaxIdleTime := 10 * time.Second + return &VolumeTcpClient{ + cp: net2.NewMultiConnectionPool(net2.ConnectionOptions{ + MaxActiveConnections: 16, + MaxIdleConnections: 1, + MaxIdleTime: &MaxIdleTime, + DialMaxConcurrency: 0, + Dial: func(network string, address string) (net.Conn, error) { + conn, err := net.Dial(network, address) + return &VolumeTcpConn{ + conn, + bufio.NewWriter(conn), + bufio.NewReader(conn), + }, err + }, + NowFunc: nil, + ReadTimeout: 0, + WriteTimeout: 0, + }), + } +} +func (c *VolumeTcpClient) PutFileChunk(volumeServerAddress string, fileId string, fileSize uint32, fileReader io.Reader) (err error) { + + tcpAddress, parseErr := pb.ParseServerAddress(volumeServerAddress, 20000) + if parseErr != nil { + return parseErr + } + + c.cp.Register("tcp", tcpAddress) + tcpConn, getErr := c.cp.Get("tcp", tcpAddress) + if getErr != nil { + return fmt.Errorf("get connection to %s: %v", tcpAddress, getErr) + } + conn := tcpConn.RawConn().(*VolumeTcpConn) + defer func() { + if err != nil { + tcpConn.DiscardConnection() + } else { + tcpConn.ReleaseConnection() + } + }() + + buf := []byte("+" + fileId + "\n") + _, err = conn.bufWriter.Write([]byte(buf)) + if err != nil { + return + } + util.Uint32toBytes(buf[0:4], fileSize) + _, err = conn.bufWriter.Write(buf[0:4]) + if err != nil { + return + } + _, err = io.Copy(conn.bufWriter, fileReader) + if err != nil { + return + } + conn.bufWriter.Write([]byte("!\n")) + conn.bufWriter.Flush() + + ret, _, err := conn.bufReader.ReadLine() + if err != nil { + glog.V(0).Infof("upload by tcp: %v", err) + return + } + if !bytes.HasPrefix(ret, []byte("+OK")) { + glog.V(0).Infof("upload by tcp: %v", string(ret)) + } + + return nil +} |
