aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rwxr-xr-xdocker/entrypoint.sh4
-rw-r--r--go.mod2
-rw-r--r--go.sum2
-rw-r--r--k8s/seaweedfs/Chart.yaml4
-rw-r--r--k8s/seaweedfs/values.yaml2
-rw-r--r--note/SeaweedFS_Architecture.pngbin0 -> 75867 bytes
-rw-r--r--weed/command/benchmark.go25
-rw-r--r--weed/command/server.go1
-rw-r--r--weed/command/volume.go6
-rw-r--r--weed/filer/filer_on_meta_event.go5
-rw-r--r--weed/filer/meta_aggregator.go1
-rw-r--r--weed/filesys/dir_link.go25
-rw-r--r--weed/filesys/dirty_page.go9
-rw-r--r--weed/filesys/file.go27
-rw-r--r--weed/filesys/filehandle.go66
-rw-r--r--weed/filesys/fscache.go5
-rw-r--r--weed/operation/upload_content.go2
-rw-r--r--weed/pb/grpc_client_server.go8
-rw-r--r--weed/pb/volume_info.go29
-rw-r--r--weed/s3api/s3api_object_handlers.go2
-rw-r--r--weed/server/filer_server_handlers_write_autochunk.go11
-rw-r--r--weed/server/volume_server_tcp_handlers_write.go6
-rw-r--r--weed/storage/erasure_coding/ec_volume.go2
-rw-r--r--weed/storage/needle/crc.go11
-rw-r--r--weed/storage/needle_map_leveldb.go6
-rw-r--r--weed/storage/needle_map_sorted_file.go8
-rw-r--r--weed/storage/store.go18
-rw-r--r--weed/storage/volume_read_write.go60
-rw-r--r--weed/storage/volume_stream_write.go2
-rw-r--r--weed/storage/volume_tier.go6
-rw-r--r--weed/util/constants.go2
-rw-r--r--weed/wdclient/net2/base_connection_pool.go159
-rw-r--r--weed/wdclient/net2/connection_pool.go97
-rw-r--r--weed/wdclient/net2/doc.go6
-rw-r--r--weed/wdclient/net2/ip.go177
-rw-r--r--weed/wdclient/net2/managed_connection.go185
-rw-r--r--weed/wdclient/net2/port.go19
-rw-r--r--weed/wdclient/resource_pool/doc.go5
-rw-r--r--weed/wdclient/resource_pool/managed_handle.go97
-rw-r--r--weed/wdclient/resource_pool/multi_resource_pool.go200
-rw-r--r--weed/wdclient/resource_pool/resource_pool.go96
-rw-r--r--weed/wdclient/resource_pool/semaphore.go154
-rw-r--r--weed/wdclient/resource_pool/simple_resource_pool.go343
-rw-r--r--weed/wdclient/volume_tcp_client.go97
45 files changed, 1844 insertions, 150 deletions
diff --git a/README.md b/README.md
index 4a13f09d9..737338d7a 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,7 @@ SeaweedFS can work as a distributed [Key-Large-Value store][KeyLargeValueStore].
* Support ETag, Accept-Range, Last-Modified, etc.
* Support in-memory/leveldb/readonly mode tuning for memory/performance balance.
* Support rebalancing the writable and readonly volumes.
+* [Customizable Multiple Storage Tiers][TieredStorage]: Customizable storage disk types to balance performance and cost.
* [Transparent cloud integration][CloudTier]: unlimited capacity via tiered cloud storage for warm data.
* [Erasure Coding for warm storage][ErasureCoding] Rack-Aware 10.4 erasure coding reduces storage cost and increases availability.
@@ -152,6 +153,7 @@ SeaweedFS can work as a distributed [Key-Large-Value store][KeyLargeValueStore].
[Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System
[WebDAV]: https://github.com/chrislusf/seaweedfs/wiki/WebDAV
[ErasureCoding]: https://github.com/chrislusf/seaweedfs/wiki/Erasure-coding-for-warm-storage
+[TieredStorage]: https://github.com/chrislusf/seaweedfs/wiki/Tiered-Storage
[CloudTier]: https://github.com/chrislusf/seaweedfs/wiki/Cloud-Tier
[FilerDataEncryption]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Data-Encryption
[FilerTTL]: https://github.com/chrislusf/seaweedfs/wiki/Filer-Stores
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 5a858d993..a5a240575 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -60,9 +60,9 @@ case "$1" in
'cronjob')
MASTER=${WEED_MASTER-localhost:9333}
FIX_REPLICATION_CRON_SCHEDULE=${CRON_SCHEDULE-*/7 * * * * *}
- echo "$FIX_REPLICATION_CRON_SCHEDULE" 'echo "volume.fix.replication" | weed shell -master='$MASTER > /crontab
+ echo "$FIX_REPLICATION_CRON_SCHEDULE" 'echo "lock; volume.fix.replication; unlock" | weed shell -master='$MASTER > /crontab
BALANCING_CRON_SCHEDULE=${CRON_SCHEDULE-25 * * * * *}
- echo "$BALANCING_CRON_SCHEDULE" 'echo "volume.balance -c ALL -force" | weed shell -master='$MASTER >> /crontab
+ echo "$BALANCING_CRON_SCHEDULE" 'echo "lock; volume.balance -collection ALL_COLLECTIONS -force; unlock" | weed shell -master='$MASTER >> /crontab
echo "Running Crontab:"
cat /crontab
exec supercronic /crontab
diff --git a/go.mod b/go.mod
index a200370fd..3c75a9cb5 100644
--- a/go.mod
+++ b/go.mod
@@ -48,7 +48,7 @@ require (
github.com/klauspost/crc32 v1.2.0
github.com/klauspost/reedsolomon v1.9.2
github.com/kurin/blazer v0.5.3
- github.com/lib/pq v1.2.0
+ github.com/lib/pq v1.10.0
github.com/lunixbochs/vtclean v1.0.0 // indirect
github.com/magiconair/properties v1.8.1 // indirect
github.com/mattn/go-colorable v0.1.2 // indirect
diff --git a/go.sum b/go.sum
index cf4ef4f98..8c7e3ac3f 100644
--- a/go.sum
+++ b/go.sum
@@ -496,6 +496,8 @@ github.com/kurin/blazer v0.5.3/go.mod h1:4FCXMUWo9DllR2Do4TtBd377ezyAJ51vB5uTBjt
github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0=
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
+github.com/lib/pq v1.10.0 h1:Zx5DJFEYQXio93kgXnQ09fXNiUKsqv4OUEu2UtGcB1E=
+github.com/lib/pq v1.10.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
github.com/lunixbochs/vtclean v1.0.0 h1:xu2sLAri4lGiovBDQKxl5mrXyESr3gUr5m5SM5+LVb8=
diff --git a/k8s/seaweedfs/Chart.yaml b/k8s/seaweedfs/Chart.yaml
index 5be2e0f9d..c19a964d3 100644
--- a/k8s/seaweedfs/Chart.yaml
+++ b/k8s/seaweedfs/Chart.yaml
@@ -1,5 +1,5 @@
apiVersion: v1
description: SeaweedFS
name: seaweedfs
-appVersion: "2.29"
-version: 2.29
+appVersion: "2.31"
+version: 2.31
diff --git a/k8s/seaweedfs/values.yaml b/k8s/seaweedfs/values.yaml
index 0d8459bc1..a5813b8b3 100644
--- a/k8s/seaweedfs/values.yaml
+++ b/k8s/seaweedfs/values.yaml
@@ -4,7 +4,7 @@ global:
registry: ""
repository: ""
imageName: chrislusf/seaweedfs
- # imageTag: "2.29" - started using {.Chart.appVersion}
+ # imageTag: "2.31" - started using {.Chart.appVersion}
imagePullPolicy: IfNotPresent
imagePullSecrets: imagepullsecret
restartPolicy: Always
diff --git a/note/SeaweedFS_Architecture.png b/note/SeaweedFS_Architecture.png
new file mode 100644
index 000000000..de27a5f53
--- /dev/null
+++ b/note/SeaweedFS_Architecture.png
Binary files differ
diff --git a/weed/command/benchmark.go b/weed/command/benchmark.go
index c1bc80c42..af0793c70 100644
--- a/weed/command/benchmark.go
+++ b/weed/command/benchmark.go
@@ -41,6 +41,7 @@ type BenchmarkOptions struct {
grpcDialOption grpc.DialOption
masterClient *wdclient.MasterClient
fsync *bool
+ useTcp *bool
}
var (
@@ -67,6 +68,7 @@ func init() {
b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "cpu profile output file")
b.maxCpu = cmdBenchmark.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs")
b.fsync = cmdBenchmark.Flag.Bool("fsync", false, "flush data to disk after write")
+ b.useTcp = cmdBenchmark.Flag.Bool("useTcp", false, "send data via tcp")
sharedBytes = make([]byte, 1024)
}
@@ -223,6 +225,8 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) {
random := rand.New(rand.NewSource(time.Now().UnixNano()))
+ volumeTcpClient := wdclient.NewVolumeTcpClient()
+
for id := range idChan {
start := time.Now()
fileSize := int64(*b.fileSize + random.Intn(64))
@@ -243,7 +247,15 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) {
if !isSecure && assignResult.Auth != "" {
isSecure = true
}
- if _, err := fp.Upload(0, b.masterClient.GetMaster, false, assignResult.Auth, b.grpcDialOption); err == nil {
+ if *b.useTcp {
+ if uploadByTcp(volumeTcpClient, fp) {
+ fileIdLineChan <- fp.Fid
+ s.completed++
+ s.transferred += fileSize
+ } else {
+ s.failed++
+ }
+ } else if _, err := fp.Upload(0, b.masterClient.GetMaster, false, assignResult.Auth, b.grpcDialOption); err == nil {
if random.Intn(100) < *b.deletePercentage {
s.total++
delayedDeleteChan <- &delayedFile{time.Now().Add(time.Second), fp}
@@ -329,6 +341,17 @@ func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan b
}
}
+func uploadByTcp(volumeTcpClient *wdclient.VolumeTcpClient, fp *operation.FilePart) bool {
+
+ err := volumeTcpClient.PutFileChunk(fp.Server, fp.Fid, uint32(fp.FileSize), fp.Reader)
+ if err != nil {
+ glog.Errorf("upload chunk err: %v", err)
+ return false
+ }
+
+ return true
+}
+
func readFileIds(fileName string, fileIdLineChan chan string) {
file, err := os.Open(fileName) // For read access.
if err != nil {
diff --git a/weed/command/server.go b/weed/command/server.go
index 64321b4d7..a39802412 100644
--- a/weed/command/server.go
+++ b/weed/command/server.go
@@ -111,6 +111,7 @@ func init() {
serverOptions.v.preStopSeconds = cmdServer.Flag.Int("volume.preStopSeconds", 10, "number of seconds between stop send heartbeats and stop volume server")
serverOptions.v.pprof = cmdServer.Flag.Bool("volume.pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
serverOptions.v.idxFolder = cmdServer.Flag.String("volume.dir.idx", "", "directory to store .idx files")
+ serverOptions.v.enableTcp = cmdServer.Flag.Bool("volume.tcp", false, "<exprimental> enable tcp port")
s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port")
s3Options.domainName = cmdServer.Flag.String("s3.domainName", "", "suffix of the host name in comma separated list, {bucket}.{domainName}")
diff --git a/weed/command/volume.go b/weed/command/volume.go
index cf162a732..f49ece9dc 100644
--- a/weed/command/volume.go
+++ b/weed/command/volume.go
@@ -62,6 +62,7 @@ type VolumeServerOptions struct {
preStopSeconds *int
metricsHttpPort *int
// pulseSeconds *int
+ enableTcp *bool
}
func init() {
@@ -88,6 +89,7 @@ func init() {
v.pprof = cmdVolume.Flag.Bool("pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile")
v.metricsHttpPort = cmdVolume.Flag.Int("metricsPort", 0, "Prometheus metrics listen port")
v.idxFolder = cmdVolume.Flag.String("dir.idx", "", "directory to store .idx files")
+ v.enableTcp = cmdVolume.Flag.Bool("tcp", false, "<exprimental> enable tcp port")
}
var cmdVolume = &Command{
@@ -252,7 +254,9 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v
}
// starting tcp server
- go v.startTcpService(volumeServer)
+ if *v.enableTcp {
+ go v.startTcpService(volumeServer)
+ }
// starting the cluster http server
clusterHttpServer := v.startClusterHttpService(volumeMux)
diff --git a/weed/filer/filer_on_meta_event.go b/weed/filer/filer_on_meta_event.go
index 295a5039e..b29324b61 100644
--- a/weed/filer/filer_on_meta_event.go
+++ b/weed/filer/filer_on_meta_event.go
@@ -11,6 +11,10 @@ import (
// onMetadataChangeEvent is triggered after filer processed change events from local or remote filers
func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse) {
+ f.maybeReloadFilerConfiguration(event)
+}
+
+func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) {
if DirectoryEtcSeaweedFS != event.Directory {
if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath {
return
@@ -26,7 +30,6 @@ func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse)
if entry.Name == FilerConfName {
f.reloadFilerConfiguration(entry)
}
-
}
func (f *Filer) readEntry(chunks []*filer_pb.FileChunk) ([]byte, error) {
diff --git a/weed/filer/meta_aggregator.go b/weed/filer/meta_aggregator.go
index 9437e9992..5c368a57e 100644
--- a/weed/filer/meta_aggregator.go
+++ b/weed/filer/meta_aggregator.go
@@ -69,6 +69,7 @@ func (ma *MetaAggregator) subscribeToOneFiler(f *Filer, self string, peer string
peerSignature, err = ma.readFilerStoreSignature(peer)
}
+ // when filer store is not shared by multiple filers
if peerSignature != f.Signature {
if prevTsNs, err := ma.readOffset(f, peer, peerSignature); err == nil {
lastTsNs = prevTsNs
diff --git a/weed/filesys/dir_link.go b/weed/filesys/dir_link.go
index 29391faeb..606e52fcb 100644
--- a/weed/filesys/dir_link.go
+++ b/weed/filesys/dir_link.go
@@ -35,15 +35,20 @@ func (dir *Dir) Link(ctx context.Context, req *fuse.LinkRequest, old fs.Node) (f
return nil, err
}
+ oldEntry := oldFile.getEntry()
+ if oldEntry == nil {
+ return nil, fuse.EIO
+ }
+
// update old file to hardlink mode
- if len(oldFile.entry.HardLinkId) == 0 {
- oldFile.entry.HardLinkId = append(util.RandomBytes(16), HARD_LINK_MARKER)
- oldFile.entry.HardLinkCounter = 1
+ if len(oldEntry.HardLinkId) == 0 {
+ oldEntry.HardLinkId = append(util.RandomBytes(16), HARD_LINK_MARKER)
+ oldEntry.HardLinkCounter = 1
}
- oldFile.entry.HardLinkCounter++
+ oldEntry.HardLinkCounter++
updateOldEntryRequest := &filer_pb.UpdateEntryRequest{
Directory: oldFile.dir.FullPath(),
- Entry: oldFile.entry,
+ Entry: oldEntry,
Signatures: []int32{dir.wfs.signature},
}
@@ -53,11 +58,11 @@ func (dir *Dir) Link(ctx context.Context, req *fuse.LinkRequest, old fs.Node) (f
Entry: &filer_pb.Entry{
Name: req.NewName,
IsDirectory: false,
- Attributes: oldFile.entry.Attributes,
- Chunks: oldFile.entry.Chunks,
- Extended: oldFile.entry.Extended,
- HardLinkId: oldFile.entry.HardLinkId,
- HardLinkCounter: oldFile.entry.HardLinkCounter,
+ Attributes: oldEntry.Attributes,
+ Chunks: oldEntry.Chunks,
+ Extended: oldEntry.Extended,
+ HardLinkId: oldEntry.HardLinkId,
+ HardLinkCounter: oldEntry.HardLinkCounter,
},
Signatures: []int32{dir.wfs.signature},
}
diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go
index f05a3a56a..8888cff96 100644
--- a/weed/filesys/dirty_page.go
+++ b/weed/filesys/dirty_page.go
@@ -30,7 +30,7 @@ func newDirtyPages(file *File) *ContinuousDirtyPages {
func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) {
- glog.V(4).Infof("%s AddPage [%d,%d) of %d bytes", pages.f.fullpath(), offset, offset+int64(len(data)), pages.f.entry.Attributes.FileSize)
+ glog.V(4).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data)))
if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) {
// this is more than what buffer can hold.
@@ -69,7 +69,12 @@ func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage() (hasSavedD
return false
}
- fileSize := int64(pages.f.entry.Attributes.FileSize)
+ entry := pages.f.getEntry()
+ if entry == nil {
+ return false
+ }
+
+ fileSize := int64(entry.Attributes.FileSize)
chunkSize := min(maxList.Size(), fileSize-maxList.Offset())
if chunkSize == 0 {
diff --git a/weed/filesys/file.go b/weed/filesys/file.go
index bd722b31f..5931dd2ff 100644
--- a/weed/filesys/file.go
+++ b/weed/filesys/file.go
@@ -5,6 +5,7 @@ import (
"io"
"os"
"sort"
+ "sync"
"time"
"github.com/seaweedfs/fuse"
@@ -33,6 +34,7 @@ type File struct {
dir *Dir
wfs *WFS
entry *filer_pb.Entry
+ entryLock sync.RWMutex
entryViewCache []filer.VisibleInterval
isOpen int
reader io.ReaderAt
@@ -47,7 +49,7 @@ func (file *File) Attr(ctx context.Context, attr *fuse.Attr) (err error) {
glog.V(4).Infof("file Attr %s, open:%v existing:%v", file.fullpath(), file.isOpen, attr)
- entry := file.entry
+ entry := file.getEntry()
if file.isOpen <= 0 || entry == nil {
if entry, err = file.maybeLoadEntry(ctx); err != nil {
return err
@@ -258,7 +260,7 @@ func (file *File) Forget() {
}
func (file *File) maybeLoadEntry(ctx context.Context) (entry *filer_pb.Entry, err error) {
- entry = file.entry
+ entry = file.getEntry()
if file.isOpen > 0 {
return entry, nil
}
@@ -299,8 +301,13 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
}
}
+ entry := file.getEntry()
+ if entry == nil {
+ return
+ }
+
// pick out-of-order chunks from existing chunks
- for _, chunk := range file.entry.Chunks {
+ for _, chunk := range entry.Chunks {
if lessThan(earliestChunk, chunk) {
chunks = append(chunks, chunk)
}
@@ -318,18 +325,22 @@ func (file *File) addChunks(chunks []*filer_pb.FileChunk) {
file.reader = nil
- glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks))
+ glog.V(4).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(entry.Chunks), len(chunks))
- file.entry.Chunks = append(file.entry.Chunks, newChunks...)
+ entry.Chunks = append(entry.Chunks, newChunks...)
}
func (file *File) setEntry(entry *filer_pb.Entry) {
+ file.entryLock.Lock()
+ defer file.entryLock.Unlock()
file.entry = entry
file.entryViewCache, _ = filer.NonOverlappingVisibleIntervals(file.wfs.LookupFn(), entry.Chunks)
file.reader = nil
}
func (file *File) clearEntry() {
+ file.entryLock.Lock()
+ defer file.entryLock.Unlock()
file.entry = nil
file.entryViewCache = nil
file.reader = nil
@@ -359,3 +370,9 @@ func (file *File) saveEntry(entry *filer_pb.Entry) error {
return nil
})
}
+
+func (file *File) getEntry() *filer_pb.Entry {
+ file.entryLock.RLock()
+ defer file.entryLock.RUnlock()
+ return file.entry
+}
diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go
index fb073c9cd..adec1cd70 100644
--- a/weed/filesys/filehandle.go
+++ b/weed/filesys/filehandle.go
@@ -40,8 +40,9 @@ func newFileHandle(file *File, uid, gid uint32) *FileHandle {
Uid: uid,
Gid: gid,
}
- if fh.f.entry != nil {
- fh.f.entry.Attributes.FileSize = filer.FileSize(fh.f.entry)
+ entry := fh.f.getEntry()
+ if entry != nil {
+ entry.Attributes.FileSize = filer.FileSize(entry)
}
return fh
@@ -104,22 +105,27 @@ func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (maxSto
func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) {
- fileSize := int64(filer.FileSize(fh.f.entry))
+ entry := fh.f.getEntry()
+ if entry == nil {
+ return 0, io.EOF
+ }
+
+ fileSize := int64(filer.FileSize(entry))
if fileSize == 0 {
glog.V(1).Infof("empty fh %v", fh.f.fullpath())
return 0, io.EOF
}
- if offset+int64(len(buff)) <= int64(len(fh.f.entry.Content)) {
- totalRead := copy(buff, fh.f.entry.Content[offset:])
+ if offset+int64(len(buff)) <= int64(len(entry.Content)) {
+ totalRead := copy(buff, entry.Content[offset:])
glog.V(4).Infof("file handle read cached %s [%d,%d] %d", fh.f.fullpath(), offset, offset+int64(totalRead), totalRead)
return int64(totalRead), nil
}
var chunkResolveErr error
if fh.f.entryViewCache == nil {
- fh.f.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), fh.f.entry.Chunks)
+ fh.f.entryViewCache, chunkResolveErr = filer.NonOverlappingVisibleIntervals(fh.f.wfs.LookupFn(), entry.Chunks)
if chunkResolveErr != nil {
return 0, fmt.Errorf("fail to resolve chunk manifest: %v", chunkResolveErr)
}
@@ -158,8 +164,13 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f
copy(data, req.Data)
}
- fh.f.entry.Content = nil
- fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(fh.f.entry.Attributes.FileSize)))
+ entry := fh.f.getEntry()
+ if entry == nil {
+ return fuse.EIO
+ }
+
+ entry.Content = nil
+ entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(data)), int64(entry.Attributes.FileSize)))
glog.V(4).Infof("%v write [%d,%d) %d", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data)), len(req.Data))
fh.dirtyPages.AddPage(req.Offset, data)
@@ -242,35 +253,40 @@ func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error {
err := fh.f.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error {
- if fh.f.entry.Attributes != nil {
- fh.f.entry.Attributes.Mime = fh.contentType
- if fh.f.entry.Attributes.Uid == 0 {
- fh.f.entry.Attributes.Uid = header.Uid
+ entry := fh.f.getEntry()
+ if entry == nil {
+ return nil
+ }
+
+ if entry.Attributes != nil {
+ entry.Attributes.Mime = fh.contentType
+ if entry.Attributes.Uid == 0 {
+ entry.Attributes.Uid = header.Uid
}
- if fh.f.entry.Attributes.Gid == 0 {
- fh.f.entry.Attributes.Gid = header.Gid
+ if entry.Attributes.Gid == 0 {
+ entry.Attributes.Gid = header.Gid
}
- if fh.f.entry.Attributes.Crtime == 0 {
- fh.f.entry.Attributes.Crtime = time.Now().Unix()
+ if entry.Attributes.Crtime == 0 {
+ entry.Attributes.Crtime = time.Now().Unix()
}
- fh.f.entry.Attributes.Mtime = time.Now().Unix()
- fh.f.entry.Attributes.FileMode = uint32(os.FileMode(fh.f.entry.Attributes.FileMode) &^ fh.f.wfs.option.Umask)
- fh.f.entry.Attributes.Collection = fh.dirtyPages.collection
- fh.f.entry.Attributes.Replication = fh.dirtyPages.replication
+ entry.Attributes.Mtime = time.Now().Unix()
+ entry.Attributes.FileMode = uint32(os.FileMode(entry.Attributes.FileMode) &^ fh.f.wfs.option.Umask)
+ entry.Attributes.Collection = fh.dirtyPages.collection
+ entry.Attributes.Replication = fh.dirtyPages.replication
}
request := &filer_pb.CreateEntryRequest{
Directory: fh.f.dir.FullPath(),
- Entry: fh.f.entry,
+ Entry: entry,
Signatures: []int32{fh.f.wfs.signature},
}
- glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks))
- for i, chunk := range fh.f.entry.Chunks {
+ glog.V(4).Infof("%s set chunks: %v", fh.f.fullpath(), len(entry.Chunks))
+ for i, chunk := range entry.Chunks {
glog.V(4).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size))
}
- manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(fh.f.entry.Chunks)
+ manifestChunks, nonManifestChunks := filer.SeparateManifestChunks(entry.Chunks)
chunks, _ := filer.CompactFileChunks(fh.f.wfs.LookupFn(), nonManifestChunks)
chunks, manifestErr := filer.MaybeManifestize(fh.f.wfs.saveDataAsChunk(fh.f.fullpath()), chunks)
@@ -278,7 +294,7 @@ func (fh *FileHandle) doFlush(ctx context.Context, header fuse.Header) error {
// not good, but should be ok
glog.V(0).Infof("MaybeManifestize: %v", manifestErr)
}
- fh.f.entry.Chunks = append(chunks, manifestChunks...)
+ entry.Chunks = append(chunks, manifestChunks...)
fh.f.wfs.mapPbIdFromLocalToFiler(request.Entry)
defer fh.f.wfs.mapPbIdFromFilerToLocal(request.Entry)
diff --git a/weed/filesys/fscache.go b/weed/filesys/fscache.go
index fdec8253c..6b1012090 100644
--- a/weed/filesys/fscache.go
+++ b/weed/filesys/fscache.go
@@ -124,8 +124,9 @@ func (c *FsCache) Move(oldPath util.FullPath, newPath util.FullPath) *FsNode {
}
if f, ok := src.node.(*File); ok {
f.Name = target.name
- if f.entry != nil {
- f.entry.Name = f.Name
+ entry := f.getEntry()
+ if entry != nil {
+ entry.Name = f.Name
}
}
parent.disconnectChild(target)
diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go
index 70428bb07..9957a04cd 100644
--- a/weed/operation/upload_content.go
+++ b/weed/operation/upload_content.go
@@ -31,6 +31,7 @@ type UploadResult struct {
Mime string `json:"mime,omitempty"`
Gzip uint32 `json:"gzip,omitempty"`
ContentMd5 string `json:"contentMd5,omitempty"`
+ RetryCount int `json:"-"`
}
func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64) *filer_pb.FileChunk {
@@ -96,6 +97,7 @@ func retriedUploadData(uploadUrl string, filename string, cipher bool, data []by
for i := 0; i < 3; i++ {
uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputCompressed, mtype, pairMap, jwt)
if err == nil {
+ uploadResult.RetryCount = i
return
} else {
glog.Warningf("uploading to %s: %v", uploadUrl, err)
diff --git a/weed/pb/grpc_client_server.go b/weed/pb/grpc_client_server.go
index c7a47a91c..9efcd9bdc 100644
--- a/weed/pb/grpc_client_server.go
+++ b/weed/pb/grpc_client_server.go
@@ -109,15 +109,19 @@ func WithCachedGrpcClient(fn func(*grpc.ClientConn) error, address string, opts
}
func ParseServerToGrpcAddress(server string) (serverGrpcAddress string, err error) {
+ return ParseServerAddress(server, 10000)
+}
+
+func ParseServerAddress(server string, deltaPort int) (newServerAddress string, err error) {
host, port, parseErr := hostAndPort(server)
if parseErr != nil {
return "", fmt.Errorf("server port parse error: %v", parseErr)
}
- grpcPort := int(port) + 10000
+ newPort := int(port) + deltaPort
- return fmt.Sprintf("%s:%d", host, grpcPort), nil
+ return fmt.Sprintf("%s:%d", host, newPort), nil
}
func hostAndPort(address string) (host string, port uint64, err error) {
diff --git a/weed/pb/volume_info.go b/weed/pb/volume_info.go
index c4f733f5c..cae9e018f 100644
--- a/weed/pb/volume_info.go
+++ b/weed/pb/volume_info.go
@@ -15,40 +15,49 @@ import (
)
// MaybeLoadVolumeInfo load the file data as *volume_server_pb.VolumeInfo, the returned volumeInfo will not be nil
-func MaybeLoadVolumeInfo(fileName string) (*volume_server_pb.VolumeInfo, bool, error) {
+func MaybeLoadVolumeInfo(fileName string) (volumeInfo *volume_server_pb.VolumeInfo, hasRemoteFile bool, hasVolumeInfoFile bool, err error) {
- volumeInfo := &volume_server_pb.VolumeInfo{}
+ volumeInfo = &volume_server_pb.VolumeInfo{}
glog.V(1).Infof("maybeLoadVolumeInfo checks %s", fileName)
if exists, canRead, _, _, _ := util.CheckFile(fileName); !exists || !canRead {
if !exists {
- return volumeInfo, false, nil
+ return
}
+ hasVolumeInfoFile = true
if !canRead {
glog.Warningf("can not read %s", fileName)
- return volumeInfo, false, fmt.Errorf("can not read %s", fileName)
+ err = fmt.Errorf("can not read %s", fileName)
+ return
}
- return volumeInfo, false, nil
+ return
}
+ hasVolumeInfoFile = true
+
glog.V(1).Infof("maybeLoadVolumeInfo reads %s", fileName)
tierData, readErr := ioutil.ReadFile(fileName)
if readErr != nil {
glog.Warningf("fail to read %s : %v", fileName, readErr)
- return volumeInfo, false, fmt.Errorf("fail to read %s : %v", fileName, readErr)
+ err = fmt.Errorf("fail to read %s : %v", fileName, readErr)
+ return
+
}
glog.V(1).Infof("maybeLoadVolumeInfo Unmarshal volume info %v", fileName)
- if err := jsonpb.Unmarshal(bytes.NewReader(tierData), volumeInfo); err != nil {
+ if err = jsonpb.Unmarshal(bytes.NewReader(tierData), volumeInfo); err != nil {
glog.Warningf("unmarshal error: %v", err)
- return volumeInfo, false, fmt.Errorf("unmarshal error: %v", err)
+ err = fmt.Errorf("unmarshal error: %v", err)
+ return
}
if len(volumeInfo.GetFiles()) == 0 {
- return volumeInfo, false, nil
+ return
}
- return volumeInfo, true, nil
+ hasRemoteFile = true
+
+ return
}
func SaveVolumeInfo(fileName string, volumeInfo *volume_server_pb.VolumeInfo) error {
diff --git a/weed/s3api/s3api_object_handlers.go b/weed/s3api/s3api_object_handlers.go
index 55199d4eb..19d85c495 100644
--- a/weed/s3api/s3api_object_handlers.go
+++ b/weed/s3api/s3api_object_handlers.go
@@ -185,7 +185,7 @@ func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *h
for _, object := range deleteObjects.Objects {
lastSeparator := strings.LastIndex(object.ObjectName, "/")
- parentDirectoryPath, entryName, isDeleteData, isRecursive := "", object.ObjectName, true, true
+ parentDirectoryPath, entryName, isDeleteData, isRecursive := "", object.ObjectName, true, false
if lastSeparator > 0 && lastSeparator+1 < len(object.ObjectName) {
entryName = object.ObjectName[lastSeparator+1:]
parentDirectoryPath = "/" + object.ObjectName[:lastSeparator]
diff --git a/weed/server/filer_server_handlers_write_autochunk.go b/weed/server/filer_server_handlers_write_autochunk.go
index d3ce7e605..318399281 100644
--- a/weed/server/filer_server_handlers_write_autochunk.go
+++ b/weed/server/filer_server_handlers_write_autochunk.go
@@ -38,10 +38,10 @@ func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r *
chunkSize := 1024 * 1024 * maxMB
- stats.FilerRequestCounter.WithLabelValues("postAutoChunk").Inc()
+ stats.FilerRequestCounter.WithLabelValues("chunk").Inc()
start := time.Now()
defer func() {
- stats.FilerRequestHistogram.WithLabelValues("postAutoChunk").Observe(time.Since(start).Seconds())
+ stats.FilerRequestHistogram.WithLabelValues("chunk").Observe(time.Since(start).Seconds())
}()
var reply *FilerPostResult
@@ -302,13 +302,16 @@ func (fs *FilerServer) uploadReaderToChunks(w http.ResponseWriter, r *http.Reque
func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error, []byte) {
- stats.FilerRequestCounter.WithLabelValues("postAutoChunkUpload").Inc()
+ stats.FilerRequestCounter.WithLabelValues("chunkUpload").Inc()
start := time.Now()
defer func() {
- stats.FilerRequestHistogram.WithLabelValues("postAutoChunkUpload").Observe(time.Since(start).Seconds())
+ stats.FilerRequestHistogram.WithLabelValues("chunkUpload").Observe(time.Since(start).Seconds())
}()
uploadResult, err, data := operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth)
+ if uploadResult != nil && uploadResult.RetryCount > 0 {
+ stats.FilerRequestCounter.WithLabelValues("chunkUploadRetry").Add(float64(uploadResult.RetryCount))
+ }
return uploadResult, err, data
}
diff --git a/weed/server/volume_server_tcp_handlers_write.go b/weed/server/volume_server_tcp_handlers_write.go
index 7c2f1a77f..a009611da 100644
--- a/weed/server/volume_server_tcp_handlers_write.go
+++ b/weed/server/volume_server_tcp_handlers_write.go
@@ -6,6 +6,7 @@ import (
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"github.com/chrislusf/seaweedfs/weed/util"
+ "io"
"net"
"strings"
)
@@ -21,9 +22,12 @@ func (vs *VolumeServer) HandleTcpConnection(c net.Conn) {
for {
cmd, err := bufReader.ReadString('\n')
if err != nil {
- glog.Errorf("read command from %s: %v", c.RemoteAddr().String(), err)
+ if err != io.EOF {
+ glog.Errorf("read command from %s: %v", c.RemoteAddr().String(), err)
+ }
return
}
+ cmd = cmd[:len(cmd)-1]
switch cmd[0] {
case '+':
fileId := cmd[1:]
diff --git a/weed/storage/erasure_coding/ec_volume.go b/weed/storage/erasure_coding/ec_volume.go
index 85d6a5fc8..171db92a4 100644
--- a/weed/storage/erasure_coding/ec_volume.go
+++ b/weed/storage/erasure_coding/ec_volume.go
@@ -63,7 +63,7 @@ func NewEcVolume(diskType types.DiskType, dir string, dirIdx string, collection
// read volume info
ev.Version = needle.Version3
- if volumeInfo, found, _ := pb.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found {
+ if volumeInfo, _, found, _ := pb.MaybeLoadVolumeInfo(dataBaseFileName + ".vif"); found {
ev.Version = needle.Version(volumeInfo.Version)
} else {
pb.SaveVolumeInfo(dataBaseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)})
diff --git a/weed/storage/needle/crc.go b/weed/storage/needle/crc.go
index e1bac829a..22456faa2 100644
--- a/weed/storage/needle/crc.go
+++ b/weed/storage/needle/crc.go
@@ -2,7 +2,6 @@ package needle
import (
"fmt"
- "hash"
"io"
"github.com/klauspost/crc32"
@@ -35,21 +34,21 @@ func (n *Needle) Etag() string {
func NewCRCwriter(w io.Writer) *CRCwriter {
return &CRCwriter{
- h: crc32.New(table),
+ crc: CRC(0),
w: w,
}
}
type CRCwriter struct {
- h hash.Hash32
- w io.Writer
+ crc CRC
+ w io.Writer
}
func (c *CRCwriter) Write(p []byte) (n int, err error) {
n, err = c.w.Write(p) // with each write ...
- c.h.Write(p) // ... update the hash
+ c.crc = c.crc.Update(p)
return
}
-func (c *CRCwriter) Sum() uint32 { return c.h.Sum32() } // final hash
+func (c *CRCwriter) Sum() uint32 { return c.crc.Value() } // final hash
diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go
index 9716e9729..31c86d124 100644
--- a/weed/storage/needle_map_leveldb.go
+++ b/weed/storage/needle_map_leveldb.go
@@ -152,8 +152,10 @@ func (m *LevelDbNeedleMap) Close() {
glog.Warningf("close index file %s failed: %v", indexFileName, err)
}
- if err := m.db.Close(); err != nil {
- glog.Warningf("close levelDB failed: %v", err)
+ if m.db != nil {
+ if err := m.db.Close(); err != nil {
+ glog.Warningf("close levelDB failed: %v", err)
+ }
}
}
diff --git a/weed/storage/needle_map_sorted_file.go b/weed/storage/needle_map_sorted_file.go
index 3449ff9dc..662b90531 100644
--- a/weed/storage/needle_map_sorted_file.go
+++ b/weed/storage/needle_map_sorted_file.go
@@ -94,8 +94,12 @@ func (m *SortedFileNeedleMap) Delete(key NeedleId, offset Offset) error {
}
func (m *SortedFileNeedleMap) Close() {
- m.indexFile.Close()
- m.dbFile.Close()
+ if m.indexFile != nil {
+ m.indexFile.Close()
+ }
+ if m.dbFile != nil {
+ m.dbFile.Close()
+ }
}
func (m *SortedFileNeedleMap) Destroy() error {
diff --git a/weed/storage/store.go b/weed/storage/store.go
index 47829666a..fb33a708c 100644
--- a/weed/storage/store.go
+++ b/weed/storage/store.go
@@ -220,20 +220,30 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
if maxFileKey < curMaxFileKey {
maxFileKey = curMaxFileKey
}
+ deleteVolume := false
if !v.expired(volumeMessage.Size, s.GetVolumeSizeLimit()) {
volumeMessages = append(volumeMessages, volumeMessage)
} else {
if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
deleteVids = append(deleteVids, v.Id)
+ deleteVolume = true
} else {
glog.V(0).Infof("volume %d is expired", v.Id)
}
if v.lastIoError != nil {
deleteVids = append(deleteVids, v.Id)
+ deleteVolume = true
glog.Warningf("volume %d has IO error: %v", v.Id, v.lastIoError)
}
}
- collectionVolumeSize[v.Collection] += volumeMessage.Size
+
+ if _, exist := collectionVolumeSize[v.Collection]; !exist {
+ collectionVolumeSize[v.Collection] = 0
+ }
+ if !deleteVolume {
+ collectionVolumeSize[v.Collection] += volumeMessage.Size
+ }
+
if _, exist := collectionVolumeReadOnlyCount[v.Collection]; !exist {
collectionVolumeReadOnlyCount[v.Collection] = map[string]uint8{
"IsReadOnly": 0,
@@ -242,7 +252,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
"isDiskSpaceLow": 0,
}
}
- if v.IsReadOnly() {
+ if !deleteVolume && v.IsReadOnly() {
collectionVolumeReadOnlyCount[v.Collection]["IsReadOnly"] += 1
if v.noWriteOrDelete {
collectionVolumeReadOnlyCount[v.Collection]["noWriteOrDelete"] += 1
@@ -267,7 +277,7 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat {
glog.V(0).Infof("volume %d is deleted", vid)
}
} else {
- glog.V(0).Infof("delete volume %d: %v", vid, err)
+ glog.Warningf("delete volume %d: %v", vid, err)
}
}
location.volumesLock.Unlock()
@@ -446,7 +456,7 @@ func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error {
// load, modify, save
baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name()))
vifFile := filepath.Join(location.Directory, baseFileName+".vif")
- volumeInfo, _, err := pb.MaybeLoadVolumeInfo(vifFile)
+ volumeInfo, _, _, err := pb.MaybeLoadVolumeInfo(vifFile)
if err != nil {
return fmt.Errorf("volume %d fail to load vif", i)
}
diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go
index 07376bc88..1853e458a 100644
--- a/weed/storage/volume_read_write.go
+++ b/weed/storage/volume_read_write.go
@@ -104,47 +104,8 @@ func (v *Volume) syncWrite(n *needle.Needle) (offset uint64, size Size, isUnchan
err = fmt.Errorf("volume size limit %d exceeded! current size is %d", MaxPossibleVolumeSize, v.nm.ContentSize())
return
}
- if v.isFileUnchanged(n) {
- size = Size(n.DataSize)
- isUnchanged = true
- return
- }
-
- // check whether existing needle cookie matches
- nv, ok := v.nm.Get(n.Id)
- if ok {
- existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToActualOffset())
- if existingNeedleReadErr != nil {
- err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr)
- return
- }
- if existingNeedle.Cookie != n.Cookie {
- glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie)
- err = fmt.Errorf("mismatching cookie %x", n.Cookie)
- return
- }
- }
-
- // append to dat file
- n.AppendAtNs = uint64(time.Now().UnixNano())
- offset, size, _, err = n.Append(v.DataBackend, v.Version())
- v.checkReadWriteError(err)
- if err != nil {
- return
- }
-
- v.lastAppendAtNs = n.AppendAtNs
- // add to needle map
- if !ok || uint64(nv.Offset.ToActualOffset()) < offset {
- if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil {
- glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
- }
- }
- if v.lastModifiedTsSeconds < n.LastModified {
- v.lastModifiedTsSeconds = n.LastModified
- }
- return
+ return v.doWriteRequest(n)
}
func (v *Volume) writeNeedle2(n *needle.Needle, fsync bool) (offset uint64, size Size, isUnchanged bool, err error) {
@@ -223,24 +184,7 @@ func (v *Volume) syncDelete(n *needle.Needle) (Size, error) {
return 0, err
}
- nv, ok := v.nm.Get(n.Id)
- // fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
- if ok && nv.Size.IsValid() {
- size := nv.Size
- n.Data = nil
- n.AppendAtNs = uint64(time.Now().UnixNano())
- offset, _, _, err := n.Append(v.DataBackend, v.Version())
- v.checkReadWriteError(err)
- if err != nil {
- return size, err
- }
- v.lastAppendAtNs = n.AppendAtNs
- if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil {
- return size, err
- }
- return size, err
- }
- return 0, nil
+ return v.doDeleteRequest(n)
}
func (v *Volume) deleteNeedle2(n *needle.Needle) (Size, error) {
diff --git a/weed/storage/volume_stream_write.go b/weed/storage/volume_stream_write.go
index f619de30b..955875aa2 100644
--- a/weed/storage/volume_stream_write.go
+++ b/weed/storage/volume_stream_write.go
@@ -48,8 +48,6 @@ func (v *Volume) StreamWrite(n *needle.Needle, data io.Reader, dataSize uint32)
// data checksum
util.Uint32toBytes(header[0:needle.NeedleChecksumSize], crcWriter.Sum())
- df.Write(header[0:needle.NeedleChecksumSize])
-
// write timestamp, padding
n.AppendAtNs = uint64(time.Now().UnixNano())
util.Uint64toBytes(header[needle.NeedleChecksumSize:needle.NeedleChecksumSize+TimestampSize], n.AppendAtNs)
diff --git a/weed/storage/volume_tier.go b/weed/storage/volume_tier.go
index da93221b2..23160906b 100644
--- a/weed/storage/volume_tier.go
+++ b/weed/storage/volume_tier.go
@@ -16,7 +16,7 @@ func (v *Volume) GetVolumeInfo() *volume_server_pb.VolumeInfo {
func (v *Volume) maybeLoadVolumeInfo() (found bool) {
var err error
- v.volumeInfo, v.hasRemoteFile, err = pb.MaybeLoadVolumeInfo(v.FileName(".vif"))
+ v.volumeInfo, v.hasRemoteFile, found, err = pb.MaybeLoadVolumeInfo(v.FileName(".vif"))
if v.volumeInfo.Version == 0 {
v.volumeInfo.Version = uint32(needle.CurrentVersion)
@@ -29,10 +29,10 @@ func (v *Volume) maybeLoadVolumeInfo() (found bool) {
if err != nil {
glog.Warningf("load volume %d.vif file: %v", v.Id, err)
- return false
+ return
}
- return true
+ return
}
diff --git a/weed/util/constants.go b/weed/util/constants.go
index 2a7e57c5d..1cd2f160a 100644
--- a/weed/util/constants.go
+++ b/weed/util/constants.go
@@ -5,7 +5,7 @@ import (
)
var (
- VERSION = fmt.Sprintf("%s %d.%02d", sizeLimit, 2, 29)
+ VERSION = fmt.Sprintf("%s %d.%02d", sizeLimit, 2, 31)
COMMIT = ""
)
diff --git a/weed/wdclient/net2/base_connection_pool.go b/weed/wdclient/net2/base_connection_pool.go
new file mode 100644
index 000000000..5cc037d0f
--- /dev/null
+++ b/weed/wdclient/net2/base_connection_pool.go
@@ -0,0 +1,159 @@
+package net2
+
+import (
+ "net"
+ "strings"
+ "time"
+
+ rp "github.com/chrislusf/seaweedfs/weed/wdclient/resource_pool"
+)
+
+const defaultDialTimeout = 1 * time.Second
+
+func defaultDialFunc(network string, address string) (net.Conn, error) {
+ return net.DialTimeout(network, address, defaultDialTimeout)
+}
+
+func parseResourceLocation(resourceLocation string) (
+ network string,
+ address string) {
+
+ idx := strings.Index(resourceLocation, " ")
+ if idx >= 0 {
+ return resourceLocation[:idx], resourceLocation[idx+1:]
+ }
+
+ return "", resourceLocation
+}
+
+// A thin wrapper around the underlying resource pool.
+type connectionPoolImpl struct {
+ options ConnectionOptions
+
+ pool rp.ResourcePool
+}
+
+// This returns a connection pool where all connections are connected
+// to the same (network, address)
+func newBaseConnectionPool(
+ options ConnectionOptions,
+ createPool func(rp.Options) rp.ResourcePool) ConnectionPool {
+
+ dial := options.Dial
+ if dial == nil {
+ dial = defaultDialFunc
+ }
+
+ openFunc := func(loc string) (interface{}, error) {
+ network, address := parseResourceLocation(loc)
+ return dial(network, address)
+ }
+
+ closeFunc := func(handle interface{}) error {
+ return handle.(net.Conn).Close()
+ }
+
+ poolOptions := rp.Options{
+ MaxActiveHandles: options.MaxActiveConnections,
+ MaxIdleHandles: options.MaxIdleConnections,
+ MaxIdleTime: options.MaxIdleTime,
+ OpenMaxConcurrency: options.DialMaxConcurrency,
+ Open: openFunc,
+ Close: closeFunc,
+ NowFunc: options.NowFunc,
+ }
+
+ return &connectionPoolImpl{
+ options: options,
+ pool: createPool(poolOptions),
+ }
+}
+
+// This returns a connection pool where all connections are connected
+// to the same (network, address)
+func NewSimpleConnectionPool(options ConnectionOptions) ConnectionPool {
+ return newBaseConnectionPool(options, rp.NewSimpleResourcePool)
+}
+
+// This returns a connection pool that manages multiple (network, address)
+// entries. The connections to each (network, address) entry acts
+// independently. For example ("tcp", "localhost:11211") could act as memcache
+// shard 0 and ("tcp", "localhost:11212") could act as memcache shard 1.
+func NewMultiConnectionPool(options ConnectionOptions) ConnectionPool {
+ return newBaseConnectionPool(
+ options,
+ func(poolOptions rp.Options) rp.ResourcePool {
+ return rp.NewMultiResourcePool(poolOptions, nil)
+ })
+}
+
+// See ConnectionPool for documentation.
+func (p *connectionPoolImpl) NumActive() int32 {
+ return p.pool.NumActive()
+}
+
+// See ConnectionPool for documentation.
+func (p *connectionPoolImpl) ActiveHighWaterMark() int32 {
+ return p.pool.ActiveHighWaterMark()
+}
+
+// This returns the number of alive idle connections. This method is not part
+// of ConnectionPool's API. It is used only for testing.
+func (p *connectionPoolImpl) NumIdle() int {
+ return p.pool.NumIdle()
+}
+
+// BaseConnectionPool can only register a single (network, address) entry.
+// Register should be call before any Get calls.
+func (p *connectionPoolImpl) Register(network string, address string) error {
+ return p.pool.Register(network + " " + address)
+}
+
+// BaseConnectionPool has nothing to do on Unregister.
+func (p *connectionPoolImpl) Unregister(network string, address string) error {
+ return nil
+}
+
+func (p *connectionPoolImpl) ListRegistered() []NetworkAddress {
+ result := make([]NetworkAddress, 0, 1)
+ for _, location := range p.pool.ListRegistered() {
+ network, address := parseResourceLocation(location)
+
+ result = append(
+ result,
+ NetworkAddress{
+ Network: network,
+ Address: address,
+ })
+ }
+ return result
+}
+
+// This gets an active connection from the connection pool. Note that network
+// and address arguments are ignored (The connections with point to the
+// network/address provided by the first Register call).
+func (p *connectionPoolImpl) Get(
+ network string,
+ address string) (ManagedConn, error) {
+
+ handle, err := p.pool.Get(network + " " + address)
+ if err != nil {
+ return nil, err
+ }
+ return NewManagedConn(network, address, handle, p, p.options), nil
+}
+
+// See ConnectionPool for documentation.
+func (p *connectionPoolImpl) Release(conn ManagedConn) error {
+ return conn.ReleaseConnection()
+}
+
+// See ConnectionPool for documentation.
+func (p *connectionPoolImpl) Discard(conn ManagedConn) error {
+ return conn.DiscardConnection()
+}
+
+// See ConnectionPool for documentation.
+func (p *connectionPoolImpl) EnterLameDuckMode() {
+ p.pool.EnterLameDuckMode()
+}
diff --git a/weed/wdclient/net2/connection_pool.go b/weed/wdclient/net2/connection_pool.go
new file mode 100644
index 000000000..5b8d4d232
--- /dev/null
+++ b/weed/wdclient/net2/connection_pool.go
@@ -0,0 +1,97 @@
+package net2
+
+import (
+ "net"
+ "time"
+)
+
+type ConnectionOptions struct {
+ // The maximum number of connections that can be active per host at any
+ // given time (A non-positive value indicates the number of connections
+ // is unbounded).
+ MaxActiveConnections int32
+
+ // The maximum number of idle connections per host that are kept alive by
+ // the connection pool.
+ MaxIdleConnections uint32
+
+ // The maximum amount of time an idle connection can alive (if specified).
+ MaxIdleTime *time.Duration
+
+ // This limits the number of concurrent Dial calls (there's no limit when
+ // DialMaxConcurrency is non-positive).
+ DialMaxConcurrency int
+
+ // Dial specifies the dial function for creating network connections.
+ // If Dial is nil, net.DialTimeout is used, with timeout set to 1 second.
+ Dial func(network string, address string) (net.Conn, error)
+
+ // This specifies the now time function. When the function is non-nil, the
+ // connection pool will use the specified function instead of time.Now to
+ // generate the current time.
+ NowFunc func() time.Time
+
+ // This specifies the timeout for any Read() operation.
+ // Note that setting this to 0 (i.e. not setting it) will make
+ // read operations block indefinitely.
+ ReadTimeout time.Duration
+
+ // This specifies the timeout for any Write() operation.
+ // Note that setting this to 0 (i.e. not setting it) will make
+ // write operations block indefinitely.
+ WriteTimeout time.Duration
+}
+
+func (o ConnectionOptions) getCurrentTime() time.Time {
+ if o.NowFunc == nil {
+ return time.Now()
+ } else {
+ return o.NowFunc()
+ }
+}
+
+// A generic interface for managed connection pool. All connection pool
+// implementations must be threadsafe.
+type ConnectionPool interface {
+ // This returns the number of active connections that are on loan.
+ NumActive() int32
+
+ // This returns the highest number of active connections for the entire
+ // lifetime of the pool.
+ ActiveHighWaterMark() int32
+
+ // This returns the number of idle connections that are in the pool.
+ NumIdle() int
+
+ // This associates (network, address) to the connection pool; afterwhich,
+ // the user can get connections to (network, address).
+ Register(network string, address string) error
+
+ // This dissociate (network, address) from the connection pool;
+ // afterwhich, the user can no longer get connections to
+ // (network, address).
+ Unregister(network string, address string) error
+
+ // This returns the list of registered (network, address) entries.
+ ListRegistered() []NetworkAddress
+
+ // This gets an active connection from the connection pool. The connection
+ // will remain active until one of the following is called:
+ // 1. conn.ReleaseConnection()
+ // 2. conn.DiscardConnection()
+ // 3. pool.Release(conn)
+ // 4. pool.Discard(conn)
+ Get(network string, address string) (ManagedConn, error)
+
+ // This releases an active connection back to the connection pool.
+ Release(conn ManagedConn) error
+
+ // This discards an active connection from the connection pool.
+ Discard(conn ManagedConn) error
+
+ // Enter the connection pool into lame duck mode. The connection pool
+ // will no longer return connections, and all idle connections are closed
+ // immediately (including active connections that are released back to the
+ // pool afterward).
+ EnterLameDuckMode()
+}
diff --git a/weed/wdclient/net2/doc.go b/weed/wdclient/net2/doc.go
new file mode 100644
index 000000000..f4d6552e4
--- /dev/null
+++ b/weed/wdclient/net2/doc.go
@@ -0,0 +1,6 @@
+// net2 is a collection of functions meant to supplement the capabilities
+// provided by the standard "net" package.
+package net2
+
+// copied from https://github.com/dropbox/godropbox/tree/master/net2
+// removed other dependencies \ No newline at end of file
diff --git a/weed/wdclient/net2/ip.go b/weed/wdclient/net2/ip.go
new file mode 100644
index 000000000..60e46342f
--- /dev/null
+++ b/weed/wdclient/net2/ip.go
@@ -0,0 +1,177 @@
+package net2
+
+import (
+ "fmt"
+ "log"
+ "net"
+ "os"
+ "strings"
+ "sync"
+)
+
+var myHostname string
+var myHostnameOnce sync.Once
+
+// Like os.Hostname but caches first successful result, making it cheap to call it
+// over and over.
+// It will also crash whole process if fetching Hostname fails!
+func MyHostname() string {
+ myHostnameOnce.Do(func() {
+ var err error
+ myHostname, err = os.Hostname()
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ return myHostname
+}
+
+var myIp4 *net.IPAddr
+var myIp4Once sync.Once
+
+// Resolves `MyHostname()` to an Ip4 address. Caches first successful result, making it
+// cheap to call it over and over.
+// It will also crash whole process if resolving the IP fails!
+func MyIp4() *net.IPAddr {
+ myIp4Once.Do(func() {
+ var err error
+ myIp4, err = net.ResolveIPAddr("ip4", MyHostname())
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ return myIp4
+}
+
+var myIp6 *net.IPAddr
+var myIp6Once sync.Once
+
+// Resolves `MyHostname()` to an Ip6 address. Caches first successful result, making it
+// cheap to call it over and over.
+// It will also crash whole process if resolving the IP fails!
+func MyIp6() *net.IPAddr {
+ myIp6Once.Do(func() {
+ var err error
+ myIp6, err = net.ResolveIPAddr("ip6", MyHostname())
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ return myIp6
+}
+
+// This returns the list of local ip addresses which other hosts can connect
+// to (NOTE: Loopback ip is ignored).
+// Also resolves Hostname to an address and adds it to the list too, so
+// IPs from /etc/hosts can work too.
+func GetLocalIPs() ([]*net.IP, error) {
+ hostname, err := os.Hostname()
+ if err != nil {
+ return nil, fmt.Errorf("Failed to lookup hostname: %v", err)
+ }
+ // Resolves IP Address from Hostname, this way overrides in /etc/hosts
+ // can work too for IP resolution.
+ ipInfo, err := net.ResolveIPAddr("ip4", hostname)
+ if err != nil {
+ return nil, fmt.Errorf("Failed to resolve ip: %v", err)
+ }
+ ips := []*net.IP{&ipInfo.IP}
+
+ // TODO(zviad): Is rest of the code really necessary?
+ addrs, err := net.InterfaceAddrs()
+ if err != nil {
+ return nil, fmt.Errorf( "Failed to get interface addresses: %v", err)
+ }
+ for _, addr := range addrs {
+ ipnet, ok := addr.(*net.IPNet)
+ if !ok {
+ continue
+ }
+
+ if ipnet.IP.IsLoopback() {
+ continue
+ }
+
+ ips = append(ips, &ipnet.IP)
+ }
+ return ips, nil
+}
+
+var localhostIPNets []*net.IPNet
+
+func init() {
+ for _, mask := range []string{"127.0.0.1/8", "::1/128"} {
+ _, ipnet, err := net.ParseCIDR(mask)
+ if err != nil {
+ panic(err)
+ }
+ localhostIPNets = append(localhostIPNets, ipnet)
+ }
+}
+
+func IsLocalhostIp(ipStr string) bool {
+ ip := net.ParseIP(ipStr)
+ if ip == nil {
+ return false
+ }
+ for _, ipnet := range localhostIPNets {
+ if ipnet.Contains(ip) {
+ return true
+ }
+ }
+ return false
+}
+
+// Given a host string, return true if the host is an ip (v4/v6) localhost.
+func IsLocalhost(host string) bool {
+ return IsLocalhostIp(host) ||
+ host == "localhost" ||
+ host == "ip6-localhost" ||
+ host == "ipv6-localhost"
+}
+
+// Resolves hostnames in addresses to actual IP4 addresses. Skips all invalid addresses
+// and all addresses that can't be resolved.
+// `addrs` are assumed to be of form: ["<hostname>:<port>", ...]
+// Returns an error in addition to resolved addresses if not all resolutions succeed.
+func ResolveIP4s(addrs []string) ([]string, error) {
+ resolvedAddrs := make([]string, 0, len(addrs))
+ var lastErr error
+
+ for _, server := range addrs {
+ hostPort := strings.Split(server, ":")
+ if len(hostPort) != 2 {
+ lastErr = fmt.Errorf("Skipping invalid address: %s", server)
+ continue
+ }
+
+ ip, err := net.ResolveIPAddr("ip4", hostPort[0])
+ if err != nil {
+ lastErr = err
+ continue
+ }
+ resolvedAddrs = append(resolvedAddrs, ip.IP.String()+":"+hostPort[1])
+ }
+ return resolvedAddrs, lastErr
+}
+
+func LookupValidAddrs() (map[string]bool, error) {
+ hostName, err := os.Hostname()
+ if err != nil {
+ return nil, err
+ }
+ addrs, err := net.LookupHost(hostName)
+ if err != nil {
+ return nil, err
+ }
+ validAddrs := make(map[string]bool)
+ validAddrs[hostName] = true
+ for _, addr := range addrs {
+ validAddrs[addr] = true
+ }
+ // Special case localhost/127.0.0.1 so that this works on devVMs. It should
+ // have no affect in production.
+ validAddrs["127.0.0.1"] = true
+ validAddrs["localhost"] = true
+ return validAddrs, nil
+}
diff --git a/weed/wdclient/net2/managed_connection.go b/weed/wdclient/net2/managed_connection.go
new file mode 100644
index 000000000..a886210d1
--- /dev/null
+++ b/weed/wdclient/net2/managed_connection.go
@@ -0,0 +1,185 @@
+package net2
+
+import (
+ "fmt"
+ "net"
+ "time"
+
+ "errors"
+ "github.com/chrislusf/seaweedfs/weed/wdclient/resource_pool"
+)
+
+// Dial's arguments.
+type NetworkAddress struct {
+ Network string
+ Address string
+}
+
+// A connection managed by a connection pool. NOTE: SetDeadline,
+// SetReadDeadline and SetWriteDeadline are disabled for managed connections.
+// (The deadlines are set by the connection pool).
+type ManagedConn interface {
+ net.Conn
+
+ // This returns the original (network, address) entry used for creating
+ // the connection.
+ Key() NetworkAddress
+
+ // This returns the underlying net.Conn implementation.
+ RawConn() net.Conn
+
+ // This returns the connection pool which owns this connection.
+ Owner() ConnectionPool
+
+ // This indictes a user is done with the connection and releases the
+ // connection back to the connection pool.
+ ReleaseConnection() error
+
+ // This indicates the connection is an invalid state, and that the
+ // connection should be discarded from the connection pool.
+ DiscardConnection() error
+}
+
+// A physical implementation of ManagedConn
+type managedConnImpl struct {
+ addr NetworkAddress
+ handle resource_pool.ManagedHandle
+ pool ConnectionPool
+ options ConnectionOptions
+}
+
+// This creates a managed connection wrapper.
+func NewManagedConn(
+ network string,
+ address string,
+ handle resource_pool.ManagedHandle,
+ pool ConnectionPool,
+ options ConnectionOptions) ManagedConn {
+
+ addr := NetworkAddress{
+ Network: network,
+ Address: address,
+ }
+
+ return &managedConnImpl{
+ addr: addr,
+ handle: handle,
+ pool: pool,
+ options: options,
+ }
+}
+
+func (c *managedConnImpl) rawConn() (net.Conn, error) {
+ h, err := c.handle.Handle()
+ return h.(net.Conn), err
+}
+
+// See ManagedConn for documentation.
+func (c *managedConnImpl) RawConn() net.Conn {
+ h, _ := c.handle.Handle()
+ return h.(net.Conn)
+}
+
+// See ManagedConn for documentation.
+func (c *managedConnImpl) Key() NetworkAddress {
+ return c.addr
+}
+
+// See ManagedConn for documentation.
+func (c *managedConnImpl) Owner() ConnectionPool {
+ return c.pool
+}
+
+// See ManagedConn for documentation.
+func (c *managedConnImpl) ReleaseConnection() error {
+ return c.handle.Release()
+}
+
+// See ManagedConn for documentation.
+func (c *managedConnImpl) DiscardConnection() error {
+ return c.handle.Discard()
+}
+
+// See net.Conn for documentation
+func (c *managedConnImpl) Read(b []byte) (n int, err error) {
+ conn, err := c.rawConn()
+ if err != nil {
+ return 0, err
+ }
+
+ if c.options.ReadTimeout > 0 {
+ deadline := c.options.getCurrentTime().Add(c.options.ReadTimeout)
+ _ = conn.SetReadDeadline(deadline)
+ }
+ n, err = conn.Read(b)
+ if err != nil {
+ var localAddr string
+ if conn.LocalAddr() != nil {
+ localAddr = conn.LocalAddr().String()
+ } else {
+ localAddr = "(nil)"
+ }
+
+ var remoteAddr string
+ if conn.RemoteAddr() != nil {
+ remoteAddr = conn.RemoteAddr().String()
+ } else {
+ remoteAddr = "(nil)"
+ }
+ err = fmt.Errorf("Read error from host: %s <-> %s: %v", localAddr, remoteAddr, err)
+ }
+ return
+}
+
+// See net.Conn for documentation
+func (c *managedConnImpl) Write(b []byte) (n int, err error) {
+ conn, err := c.rawConn()
+ if err != nil {
+ return 0, err
+ }
+
+ if c.options.WriteTimeout > 0 {
+ deadline := c.options.getCurrentTime().Add(c.options.WriteTimeout)
+ _ = conn.SetWriteDeadline(deadline)
+ }
+ n, err = conn.Write(b)
+ if err != nil {
+ err = fmt.Errorf("Write error: %v", err)
+ }
+ return
+}
+
+// See net.Conn for documentation
+func (c *managedConnImpl) Close() error {
+ return c.handle.Discard()
+}
+
+// See net.Conn for documentation
+func (c *managedConnImpl) LocalAddr() net.Addr {
+ conn, _ := c.rawConn()
+ return conn.LocalAddr()
+}
+
+// See net.Conn for documentation
+func (c *managedConnImpl) RemoteAddr() net.Addr {
+ conn, _ := c.rawConn()
+ return conn.RemoteAddr()
+}
+
+// SetDeadline is disabled for managed connection (The deadline is set by
+// us, with respect to the read/write timeouts specified in ConnectionOptions).
+func (c *managedConnImpl) SetDeadline(t time.Time) error {
+ return errors.New("Cannot set deadline for managed connection")
+}
+
+// SetReadDeadline is disabled for managed connection (The deadline is set by
+// us with respect to the read timeout specified in ConnectionOptions).
+func (c *managedConnImpl) SetReadDeadline(t time.Time) error {
+ return errors.New("Cannot set read deadline for managed connection")
+}
+
+// SetWriteDeadline is disabled for managed connection (The deadline is set by
+// us with respect to the write timeout specified in ConnectionOptions).
+func (c *managedConnImpl) SetWriteDeadline(t time.Time) error {
+ return errors.New("Cannot set write deadline for managed connection")
+}
diff --git a/weed/wdclient/net2/port.go b/weed/wdclient/net2/port.go
new file mode 100644
index 000000000..f83adba28
--- /dev/null
+++ b/weed/wdclient/net2/port.go
@@ -0,0 +1,19 @@
+package net2
+
+import (
+ "net"
+ "strconv"
+)
+
+// Returns the port information.
+func GetPort(addr net.Addr) (int, error) {
+ _, lport, err := net.SplitHostPort(addr.String())
+ if err != nil {
+ return -1, err
+ }
+ lportInt, err := strconv.Atoi(lport)
+ if err != nil {
+ return -1, err
+ }
+ return lportInt, nil
+}
diff --git a/weed/wdclient/resource_pool/doc.go b/weed/wdclient/resource_pool/doc.go
new file mode 100644
index 000000000..c17b17c6c
--- /dev/null
+++ b/weed/wdclient/resource_pool/doc.go
@@ -0,0 +1,5 @@
+// A generic resource pool for managing resources such as network connections.
+package resource_pool
+
+// copied from https://github.com/dropbox/godropbox/tree/master/resource_pool
+// removed other dependencies \ No newline at end of file
diff --git a/weed/wdclient/resource_pool/managed_handle.go b/weed/wdclient/resource_pool/managed_handle.go
new file mode 100644
index 000000000..e1d82ca7b
--- /dev/null
+++ b/weed/wdclient/resource_pool/managed_handle.go
@@ -0,0 +1,97 @@
+package resource_pool
+
+import (
+ "sync/atomic"
+
+ "errors"
+)
+
+// A resource handle managed by a resource pool.
+type ManagedHandle interface {
+ // This returns the handle's resource location.
+ ResourceLocation() string
+
+ // This returns the underlying resource handle (or error if the handle
+ // is no longer active).
+ Handle() (interface{}, error)
+
+ // This returns the resource pool which owns this handle.
+ Owner() ResourcePool
+
+ // The releases the underlying resource handle to the caller and marks the
+ // managed handle as inactive. The caller is responsible for cleaning up
+ // the released handle. This returns nil if the managed handle no longer
+ // owns the resource.
+ ReleaseUnderlyingHandle() interface{}
+
+ // This indictes a user is done with the handle and releases the handle
+ // back to the resource pool.
+ Release() error
+
+ // This indicates the handle is an invalid state, and that the
+ // connection should be discarded from the connection pool.
+ Discard() error
+}
+
+// A physical implementation of ManagedHandle
+type managedHandleImpl struct {
+ location string
+ handle interface{}
+ pool ResourcePool
+ isActive int32 // atomic bool
+ options Options
+}
+
+// This creates a managed handle wrapper.
+func NewManagedHandle(
+ resourceLocation string,
+ handle interface{},
+ pool ResourcePool,
+ options Options) ManagedHandle {
+
+ h := &managedHandleImpl{
+ location: resourceLocation,
+ handle: handle,
+ pool: pool,
+ options: options,
+ }
+ atomic.StoreInt32(&h.isActive, 1)
+
+ return h
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) ResourceLocation() string {
+ return c.location
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) Handle() (interface{}, error) {
+ if atomic.LoadInt32(&c.isActive) == 0 {
+ return c.handle, errors.New("Resource handle is no longer valid")
+ }
+ return c.handle, nil
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) Owner() ResourcePool {
+ return c.pool
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) ReleaseUnderlyingHandle() interface{} {
+ if atomic.CompareAndSwapInt32(&c.isActive, 1, 0) {
+ return c.handle
+ }
+ return nil
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) Release() error {
+ return c.pool.Release(c)
+}
+
+// See ManagedHandle for documentation.
+func (c *managedHandleImpl) Discard() error {
+ return c.pool.Discard(c)
+}
diff --git a/weed/wdclient/resource_pool/multi_resource_pool.go b/weed/wdclient/resource_pool/multi_resource_pool.go
new file mode 100644
index 000000000..9ac25526d
--- /dev/null
+++ b/weed/wdclient/resource_pool/multi_resource_pool.go
@@ -0,0 +1,200 @@
+package resource_pool
+
+import (
+ "fmt"
+ "sync"
+
+ "errors"
+)
+
+// A resource pool implementation that manages multiple resource location
+// entries. The handles to each resource location entry acts independently.
+// For example "tcp localhost:11211" could act as memcache
+// shard 0 and "tcp localhost:11212" could act as memcache shard 1.
+type multiResourcePool struct {
+ options Options
+
+ createPool func(Options) ResourcePool
+
+ rwMutex sync.RWMutex
+ isLameDuck bool // guarded by rwMutex
+ // NOTE: the locationPools is guarded by rwMutex, but the pool entries
+ // are not.
+ locationPools map[string]ResourcePool
+}
+
+// This returns a MultiResourcePool, which manages multiple
+// resource location entries. The handles to each resource location
+// entry acts independently.
+//
+// When createPool is nil, NewSimpleResourcePool is used as default.
+func NewMultiResourcePool(
+ options Options,
+ createPool func(Options) ResourcePool) ResourcePool {
+
+ if createPool == nil {
+ createPool = NewSimpleResourcePool
+ }
+
+ return &multiResourcePool{
+ options: options,
+ createPool: createPool,
+ rwMutex: sync.RWMutex{},
+ isLameDuck: false,
+ locationPools: make(map[string]ResourcePool),
+ }
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) NumActive() int32 {
+ total := int32(0)
+
+ p.rwMutex.RLock()
+ defer p.rwMutex.RUnlock()
+
+ for _, pool := range p.locationPools {
+ total += pool.NumActive()
+ }
+ return total
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) ActiveHighWaterMark() int32 {
+ high := int32(0)
+
+ p.rwMutex.RLock()
+ defer p.rwMutex.RUnlock()
+
+ for _, pool := range p.locationPools {
+ val := pool.ActiveHighWaterMark()
+ if val > high {
+ high = val
+ }
+ }
+ return high
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) NumIdle() int {
+ total := 0
+
+ p.rwMutex.RLock()
+ defer p.rwMutex.RUnlock()
+
+ for _, pool := range p.locationPools {
+ total += pool.NumIdle()
+ }
+ return total
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) Register(resourceLocation string) error {
+ if resourceLocation == "" {
+ return errors.New("Registering invalid resource location")
+ }
+
+ p.rwMutex.Lock()
+ defer p.rwMutex.Unlock()
+
+ if p.isLameDuck {
+ return fmt.Errorf(
+ "Cannot register %s to lame duck resource pool",
+ resourceLocation)
+ }
+
+ if _, inMap := p.locationPools[resourceLocation]; inMap {
+ return nil
+ }
+
+ pool := p.createPool(p.options)
+ if err := pool.Register(resourceLocation); err != nil {
+ return err
+ }
+
+ p.locationPools[resourceLocation] = pool
+ return nil
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) Unregister(resourceLocation string) error {
+ p.rwMutex.Lock()
+ defer p.rwMutex.Unlock()
+
+ if pool, inMap := p.locationPools[resourceLocation]; inMap {
+ _ = pool.Unregister("")
+ pool.EnterLameDuckMode()
+ delete(p.locationPools, resourceLocation)
+ }
+ return nil
+}
+
+func (p *multiResourcePool) ListRegistered() []string {
+ p.rwMutex.RLock()
+ defer p.rwMutex.RUnlock()
+
+ result := make([]string, 0, len(p.locationPools))
+ for key, _ := range p.locationPools {
+ result = append(result, key)
+ }
+
+ return result
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) Get(
+ resourceLocation string) (ManagedHandle, error) {
+
+ pool := p.getPool(resourceLocation)
+ if pool == nil {
+ return nil, fmt.Errorf(
+ "%s is not registered in the resource pool",
+ resourceLocation)
+ }
+ return pool.Get(resourceLocation)
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) Release(handle ManagedHandle) error {
+ pool := p.getPool(handle.ResourceLocation())
+ if pool == nil {
+ return errors.New(
+ "Resource pool cannot take control of a handle owned " +
+ "by another resource pool")
+ }
+
+ return pool.Release(handle)
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) Discard(handle ManagedHandle) error {
+ pool := p.getPool(handle.ResourceLocation())
+ if pool == nil {
+ return errors.New(
+ "Resource pool cannot take control of a handle owned " +
+ "by another resource pool")
+ }
+
+ return pool.Discard(handle)
+}
+
+// See ResourcePool for documentation.
+func (p *multiResourcePool) EnterLameDuckMode() {
+ p.rwMutex.Lock()
+ defer p.rwMutex.Unlock()
+
+ p.isLameDuck = true
+
+ for _, pool := range p.locationPools {
+ pool.EnterLameDuckMode()
+ }
+}
+
+func (p *multiResourcePool) getPool(resourceLocation string) ResourcePool {
+ p.rwMutex.RLock()
+ defer p.rwMutex.RUnlock()
+
+ if pool, inMap := p.locationPools[resourceLocation]; inMap {
+ return pool
+ }
+ return nil
+}
diff --git a/weed/wdclient/resource_pool/resource_pool.go b/weed/wdclient/resource_pool/resource_pool.go
new file mode 100644
index 000000000..26c433f50
--- /dev/null
+++ b/weed/wdclient/resource_pool/resource_pool.go
@@ -0,0 +1,96 @@
+package resource_pool
+
+import (
+ "time"
+)
+
+type Options struct {
+ // The maximum number of active resource handles per resource location. (A
+ // non-positive value indicates the number of active resource handles is
+ // unbounded).
+ MaxActiveHandles int32
+
+ // The maximum number of idle resource handles per resource location that
+ // are kept alive by the resource pool.
+ MaxIdleHandles uint32
+
+ // The maximum amount of time an idle resource handle can remain alive (if
+ // specified).
+ MaxIdleTime *time.Duration
+
+ // This limits the number of concurrent Open calls (there's no limit when
+ // OpenMaxConcurrency is non-positive).
+ OpenMaxConcurrency int
+
+ // This function creates a resource handle (e.g., a connection) for a
+ // resource location. The function must be thread-safe.
+ Open func(resourceLocation string) (
+ handle interface{},
+ err error)
+
+ // This function destroys a resource handle and performs the necessary
+ // cleanup to free up resources. The function must be thread-safe.
+ Close func(handle interface{}) error
+
+ // This specifies the now time function. When the function is non-nil, the
+ // resource pool will use the specified function instead of time.Now to
+ // generate the current time.
+ NowFunc func() time.Time
+}
+
+func (o Options) getCurrentTime() time.Time {
+ if o.NowFunc == nil {
+ return time.Now()
+ } else {
+ return o.NowFunc()
+ }
+}
+
+// A generic interface for managed resource pool. All resource pool
+// implementations must be threadsafe.
+type ResourcePool interface {
+ // This returns the number of active resource handles.
+ NumActive() int32
+
+ // This returns the highest number of actives handles for the entire
+ // lifetime of the pool. If the pool contains multiple sub-pools, the
+ // high water mark is the max of the sub-pools' high water marks.
+ ActiveHighWaterMark() int32
+
+ // This returns the number of alive idle handles. NOTE: This is only used
+ // for testing.
+ NumIdle() int
+
+ // This associates a resource location to the resource pool; afterwhich,
+ // the user can get resource handles for the resource location.
+ Register(resourceLocation string) error
+
+ // This dissociates a resource location from the resource pool; afterwhich,
+ // the user can no longer get resource handles for the resource location.
+ // If the given resource location corresponds to a sub-pool, the unregistered
+ // sub-pool will enter lame duck mode.
+ Unregister(resourceLocation string) error
+
+ // This returns the list of registered resource location entries.
+ ListRegistered() []string
+
+ // This gets an active resource handle from the resource pool. The
+ // handle will remain active until one of the following is called:
+ // 1. handle.Release()
+ // 2. handle.Discard()
+ // 3. pool.Release(handle)
+ // 4. pool.Discard(handle)
+ Get(key string) (ManagedHandle, error)
+
+ // This releases an active resource handle back to the resource pool.
+ Release(handle ManagedHandle) error
+
+ // This discards an active resource from the resource pool.
+ Discard(handle ManagedHandle) error
+
+ // Enter the resource pool into lame duck mode. The resource pool
+ // will no longer return resource handles, and all idle resource handles
+ // are closed immediately (including active resource handles that are
+ // released back to the pool afterward).
+ EnterLameDuckMode()
+}
diff --git a/weed/wdclient/resource_pool/semaphore.go b/weed/wdclient/resource_pool/semaphore.go
new file mode 100644
index 000000000..ff35d5bc5
--- /dev/null
+++ b/weed/wdclient/resource_pool/semaphore.go
@@ -0,0 +1,154 @@
+package resource_pool
+
+import (
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type Semaphore interface {
+ // Increment the semaphore counter by one.
+ Release()
+
+ // Decrement the semaphore counter by one, and block if counter < 0
+ Acquire()
+
+ // Decrement the semaphore counter by one, and block if counter < 0
+ // Wait for up to the given duration. Returns true if did not timeout
+ TryAcquire(timeout time.Duration) bool
+}
+
+// A simple counting Semaphore.
+type boundedSemaphore struct {
+ slots chan struct{}
+}
+
+// Create a bounded semaphore. The count parameter must be a positive number.
+// NOTE: The bounded semaphore will panic if the user tries to Release
+// beyond the specified count.
+func NewBoundedSemaphore(count uint) Semaphore {
+ sem := &boundedSemaphore{
+ slots: make(chan struct{}, int(count)),
+ }
+ for i := 0; i < cap(sem.slots); i++ {
+ sem.slots <- struct{}{}
+ }
+ return sem
+}
+
+// Acquire returns on successful acquisition.
+func (sem *boundedSemaphore) Acquire() {
+ <-sem.slots
+}
+
+// TryAcquire returns true if it acquires a resource slot within the
+// timeout, false otherwise.
+func (sem *boundedSemaphore) TryAcquire(timeout time.Duration) bool {
+ if timeout > 0 {
+ // Wait until we get a slot or timeout expires.
+ tm := time.NewTimer(timeout)
+ defer tm.Stop()
+ select {
+ case <-sem.slots:
+ return true
+ case <-tm.C:
+ // Timeout expired. In very rare cases this might happen even if
+ // there is a slot available, e.g. GC pause after we create the timer
+ // and select randomly picked this one out of the two available channels.
+ // We should do one final immediate check below.
+ }
+ }
+
+ // Return true if we have a slot available immediately and false otherwise.
+ select {
+ case <-sem.slots:
+ return true
+ default:
+ return false
+ }
+}
+
+// Release the acquired semaphore. You must not release more than you
+// have acquired.
+func (sem *boundedSemaphore) Release() {
+ select {
+ case sem.slots <- struct{}{}:
+ default:
+ // slots is buffered. If a send blocks, it indicates a programming
+ // error.
+ panic(fmt.Errorf("too many releases for boundedSemaphore"))
+ }
+}
+
+// This returns an unbound counting semaphore with the specified initial count.
+// The semaphore counter can be arbitrary large (i.e., Release can be called
+// unlimited amount of times).
+//
+// NOTE: In general, users should use bounded semaphore since it is more
+// efficient than unbounded semaphore.
+func NewUnboundedSemaphore(initialCount int) Semaphore {
+ res := &unboundedSemaphore{
+ counter: int64(initialCount),
+ }
+ res.cond.L = &res.lock
+ return res
+}
+
+type unboundedSemaphore struct {
+ lock sync.Mutex
+ cond sync.Cond
+ counter int64
+}
+
+func (s *unboundedSemaphore) Release() {
+ s.lock.Lock()
+ s.counter += 1
+ if s.counter > 0 {
+ // Not broadcasting here since it's unlike we can satify all waiting
+ // goroutines. Instead, we will Signal again if there are left over
+ // quota after Acquire, in case of lost wakeups.
+ s.cond.Signal()
+ }
+ s.lock.Unlock()
+}
+
+func (s *unboundedSemaphore) Acquire() {
+ s.lock.Lock()
+ for s.counter < 1 {
+ s.cond.Wait()
+ }
+ s.counter -= 1
+ if s.counter > 0 {
+ s.cond.Signal()
+ }
+ s.lock.Unlock()
+}
+
+func (s *unboundedSemaphore) TryAcquire(timeout time.Duration) bool {
+ done := make(chan bool, 1)
+ // Gate used to communicate between the threads and decide what the result
+ // is. If the main thread decides, we have timed out, otherwise we succeed.
+ decided := new(int32)
+ atomic.StoreInt32(decided, 0)
+ go func() {
+ s.Acquire()
+ if atomic.SwapInt32(decided, 1) == 0 {
+ // Acquire won the race
+ done <- true
+ } else {
+ // If we already decided the result, and this thread did not win
+ s.Release()
+ }
+ }()
+ select {
+ case <-done:
+ return true
+ case <-time.After(timeout):
+ if atomic.SwapInt32(decided, 1) == 1 {
+ // The other thread already decided the result
+ return true
+ }
+ return false
+ }
+}
diff --git a/weed/wdclient/resource_pool/simple_resource_pool.go b/weed/wdclient/resource_pool/simple_resource_pool.go
new file mode 100644
index 000000000..b0c539100
--- /dev/null
+++ b/weed/wdclient/resource_pool/simple_resource_pool.go
@@ -0,0 +1,343 @@
+package resource_pool
+
+import (
+ "errors"
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type idleHandle struct {
+ handle interface{}
+ keepUntil *time.Time
+}
+
+type TooManyHandles struct {
+ location string
+}
+
+func (t TooManyHandles) Error() string {
+ return fmt.Sprintf("Too many handles to %s", t.location)
+}
+
+type OpenHandleError struct {
+ location string
+ err error
+}
+
+func (o OpenHandleError) Error() string {
+ return fmt.Sprintf("Failed to open resource handle: %s (%v)", o.location, o.err)
+}
+
+// A resource pool implementation where all handles are associated to the
+// same resource location.
+type simpleResourcePool struct {
+ options Options
+
+ numActive *int32 // atomic counter
+
+ activeHighWaterMark *int32 // atomic / monotonically increasing value
+
+ openTokens Semaphore
+
+ mutex sync.Mutex
+ location string // guard by mutex
+ idleHandles []*idleHandle // guarded by mutex
+ isLameDuck bool // guarded by mutex
+}
+
+// This returns a SimpleResourcePool, where all handles are associated to a
+// single resource location.
+func NewSimpleResourcePool(options Options) ResourcePool {
+ numActive := new(int32)
+ atomic.StoreInt32(numActive, 0)
+
+ activeHighWaterMark := new(int32)
+ atomic.StoreInt32(activeHighWaterMark, 0)
+
+ var tokens Semaphore
+ if options.OpenMaxConcurrency > 0 {
+ tokens = NewBoundedSemaphore(uint(options.OpenMaxConcurrency))
+ }
+
+ return &simpleResourcePool{
+ location: "",
+ options: options,
+ numActive: numActive,
+ activeHighWaterMark: activeHighWaterMark,
+ openTokens: tokens,
+ mutex: sync.Mutex{},
+ idleHandles: make([]*idleHandle, 0, 0),
+ isLameDuck: false,
+ }
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) NumActive() int32 {
+ return atomic.LoadInt32(p.numActive)
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) ActiveHighWaterMark() int32 {
+ return atomic.LoadInt32(p.activeHighWaterMark)
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) NumIdle() int {
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+ return len(p.idleHandles)
+}
+
+// SimpleResourcePool can only register a single (network, address) entry.
+// Register should be call before any Get calls.
+func (p *simpleResourcePool) Register(resourceLocation string) error {
+ if resourceLocation == "" {
+ return errors.New("Invalid resource location")
+ }
+
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+
+ if p.isLameDuck {
+ return fmt.Errorf(
+ "cannot register %s to lame duck resource pool",
+ resourceLocation)
+ }
+
+ if p.location == "" {
+ p.location = resourceLocation
+ return nil
+ }
+ return errors.New("SimpleResourcePool can only register one location")
+}
+
+// SimpleResourcePool will enter lame duck mode upon calling Unregister.
+func (p *simpleResourcePool) Unregister(resourceLocation string) error {
+ p.EnterLameDuckMode()
+ return nil
+}
+
+func (p *simpleResourcePool) ListRegistered() []string {
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+
+ if p.location != "" {
+ return []string{p.location}
+ }
+ return []string{}
+}
+
+func (p *simpleResourcePool) getLocation() (string, error) {
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+
+ if p.location == "" {
+ return "", fmt.Errorf(
+ "resource location is not set for SimpleResourcePool")
+ }
+
+ if p.isLameDuck {
+ return "", fmt.Errorf(
+ "lame duck resource pool cannot return handles to %s",
+ p.location)
+ }
+
+ return p.location, nil
+}
+
+// This gets an active resource from the resource pool. Note that the
+// resourceLocation argument is ignored (The handles are associated to the
+// resource location provided by the first Register call).
+func (p *simpleResourcePool) Get(unused string) (ManagedHandle, error) {
+ activeCount := atomic.AddInt32(p.numActive, 1)
+ if p.options.MaxActiveHandles > 0 &&
+ activeCount > p.options.MaxActiveHandles {
+
+ atomic.AddInt32(p.numActive, -1)
+ return nil, TooManyHandles{p.location}
+ }
+
+ highest := atomic.LoadInt32(p.activeHighWaterMark)
+ for activeCount > highest &&
+ !atomic.CompareAndSwapInt32(
+ p.activeHighWaterMark,
+ highest,
+ activeCount) {
+
+ highest = atomic.LoadInt32(p.activeHighWaterMark)
+ }
+
+ if h := p.getIdleHandle(); h != nil {
+ return h, nil
+ }
+
+ location, err := p.getLocation()
+ if err != nil {
+ atomic.AddInt32(p.numActive, -1)
+ return nil, err
+ }
+
+ if p.openTokens != nil {
+ // Current implementation does not wait for tokens to become available.
+ // If that causes availability hits, we could increase the wait,
+ // similar to simple_pool.go.
+ if p.openTokens.TryAcquire(0) {
+ defer p.openTokens.Release()
+ } else {
+ // We could not immediately acquire a token.
+ // Instead of waiting
+ atomic.AddInt32(p.numActive, -1)
+ return nil, OpenHandleError{
+ p.location, errors.New("Open Error: reached OpenMaxConcurrency")}
+ }
+ }
+
+ handle, err := p.options.Open(location)
+ if err != nil {
+ atomic.AddInt32(p.numActive, -1)
+ return nil, OpenHandleError{p.location, err}
+ }
+
+ return NewManagedHandle(p.location, handle, p, p.options), nil
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) Release(handle ManagedHandle) error {
+ if pool, ok := handle.Owner().(*simpleResourcePool); !ok || pool != p {
+ return errors.New(
+ "Resource pool cannot take control of a handle owned " +
+ "by another resource pool")
+ }
+
+ h := handle.ReleaseUnderlyingHandle()
+ if h != nil {
+ // We can unref either before or after queuing the idle handle.
+ // The advantage of unref-ing before queuing is that there is
+ // a higher chance of successful Get when number of active handles
+ // is close to the limit (but potentially more handle creation).
+ // The advantage of queuing before unref-ing is that there's a
+ // higher chance of reusing handle (but potentially more Get failures).
+ atomic.AddInt32(p.numActive, -1)
+ p.queueIdleHandles(h)
+ }
+
+ return nil
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) Discard(handle ManagedHandle) error {
+ if pool, ok := handle.Owner().(*simpleResourcePool); !ok || pool != p {
+ return errors.New(
+ "Resource pool cannot take control of a handle owned " +
+ "by another resource pool")
+ }
+
+ h := handle.ReleaseUnderlyingHandle()
+ if h != nil {
+ atomic.AddInt32(p.numActive, -1)
+ if err := p.options.Close(h); err != nil {
+ return fmt.Errorf("failed to close resource handle: %v", err)
+ }
+ }
+ return nil
+}
+
+// See ResourcePool for documentation.
+func (p *simpleResourcePool) EnterLameDuckMode() {
+ p.mutex.Lock()
+
+ toClose := p.idleHandles
+ p.isLameDuck = true
+ p.idleHandles = []*idleHandle{}
+
+ p.mutex.Unlock()
+
+ p.closeHandles(toClose)
+}
+
+// This returns an idle resource, if there is one.
+func (p *simpleResourcePool) getIdleHandle() ManagedHandle {
+ var toClose []*idleHandle
+ defer func() {
+ // NOTE: Must keep the closure around to late bind the toClose slice.
+ p.closeHandles(toClose)
+ }()
+
+ now := p.options.getCurrentTime()
+
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+
+ var i int
+ for i = 0; i < len(p.idleHandles); i++ {
+ idle := p.idleHandles[i]
+ if idle.keepUntil == nil || now.Before(*idle.keepUntil) {
+ break
+ }
+ }
+ if i > 0 {
+ toClose = p.idleHandles[0:i]
+ }
+
+ if i < len(p.idleHandles) {
+ idle := p.idleHandles[i]
+ p.idleHandles = p.idleHandles[i+1:]
+ return NewManagedHandle(p.location, idle.handle, p, p.options)
+ }
+
+ if len(p.idleHandles) > 0 {
+ p.idleHandles = []*idleHandle{}
+ }
+ return nil
+}
+
+// This adds an idle resource to the pool.
+func (p *simpleResourcePool) queueIdleHandles(handle interface{}) {
+ var toClose []*idleHandle
+ defer func() {
+ // NOTE: Must keep the closure around to late bind the toClose slice.
+ p.closeHandles(toClose)
+ }()
+
+ now := p.options.getCurrentTime()
+ var keepUntil *time.Time
+ if p.options.MaxIdleTime != nil {
+ // NOTE: Assign to temp variable first to work around compiler bug
+ x := now.Add(*p.options.MaxIdleTime)
+ keepUntil = &x
+ }
+
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+
+ if p.isLameDuck {
+ toClose = []*idleHandle{
+ {handle: handle},
+ }
+ return
+ }
+
+ p.idleHandles = append(
+ p.idleHandles,
+ &idleHandle{
+ handle: handle,
+ keepUntil: keepUntil,
+ })
+
+ nIdleHandles := uint32(len(p.idleHandles))
+ if nIdleHandles > p.options.MaxIdleHandles {
+ handlesToClose := nIdleHandles - p.options.MaxIdleHandles
+ toClose = p.idleHandles[0:handlesToClose]
+ p.idleHandles = p.idleHandles[handlesToClose:nIdleHandles]
+ }
+}
+
+// Closes resources, at this point it is assumed that this resources
+// are no longer referenced from the main idleHandles slice.
+func (p *simpleResourcePool) closeHandles(handles []*idleHandle) {
+ for _, handle := range handles {
+ _ = p.options.Close(handle.handle)
+ }
+}
diff --git a/weed/wdclient/volume_tcp_client.go b/weed/wdclient/volume_tcp_client.go
new file mode 100644
index 000000000..afebd71eb
--- /dev/null
+++ b/weed/wdclient/volume_tcp_client.go
@@ -0,0 +1,97 @@
+package wdclient
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/pb"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/chrislusf/seaweedfs/weed/wdclient/net2"
+ "io"
+ "net"
+ "time"
+)
+
+// VolumeTcpClient put/get/delete file chunks directly on volume servers without replication
+type VolumeTcpClient struct {
+ cp net2.ConnectionPool
+}
+
+type VolumeTcpConn struct {
+ net.Conn
+ bufWriter *bufio.Writer
+ bufReader *bufio.Reader
+}
+
+func NewVolumeTcpClient() *VolumeTcpClient {
+ MaxIdleTime := 10 * time.Second
+ return &VolumeTcpClient{
+ cp: net2.NewMultiConnectionPool(net2.ConnectionOptions{
+ MaxActiveConnections: 16,
+ MaxIdleConnections: 1,
+ MaxIdleTime: &MaxIdleTime,
+ DialMaxConcurrency: 0,
+ Dial: func(network string, address string) (net.Conn, error) {
+ conn, err := net.Dial(network, address)
+ return &VolumeTcpConn{
+ conn,
+ bufio.NewWriter(conn),
+ bufio.NewReader(conn),
+ }, err
+ },
+ NowFunc: nil,
+ ReadTimeout: 0,
+ WriteTimeout: 0,
+ }),
+ }
+}
+func (c *VolumeTcpClient) PutFileChunk(volumeServerAddress string, fileId string, fileSize uint32, fileReader io.Reader) (err error) {
+
+ tcpAddress, parseErr := pb.ParseServerAddress(volumeServerAddress, 20000)
+ if parseErr != nil {
+ return parseErr
+ }
+
+ c.cp.Register("tcp", tcpAddress)
+ tcpConn, getErr := c.cp.Get("tcp", tcpAddress)
+ if getErr != nil {
+ return fmt.Errorf("get connection to %s: %v", tcpAddress, getErr)
+ }
+ conn := tcpConn.RawConn().(*VolumeTcpConn)
+ defer func() {
+ if err != nil {
+ tcpConn.DiscardConnection()
+ } else {
+ tcpConn.ReleaseConnection()
+ }
+ }()
+
+ buf := []byte("+" + fileId + "\n")
+ _, err = conn.bufWriter.Write([]byte(buf))
+ if err != nil {
+ return
+ }
+ util.Uint32toBytes(buf[0:4], fileSize)
+ _, err = conn.bufWriter.Write(buf[0:4])
+ if err != nil {
+ return
+ }
+ _, err = io.Copy(conn.bufWriter, fileReader)
+ if err != nil {
+ return
+ }
+ conn.bufWriter.Write([]byte("!\n"))
+ conn.bufWriter.Flush()
+
+ ret, _, err := conn.bufReader.ReadLine()
+ if err != nil {
+ glog.V(0).Infof("upload by tcp: %v", err)
+ return
+ }
+ if !bytes.HasPrefix(ret, []byte("+OK")) {
+ glog.V(0).Infof("upload by tcp: %v", string(ret))
+ }
+
+ return nil
+}