aboutsummaryrefslogtreecommitdiff
path: root/weed
diff options
context:
space:
mode:
authorChris Lu <chris.lu@gmail.com>2016-06-02 18:09:14 -0700
committerChris Lu <chris.lu@gmail.com>2016-06-02 18:09:14 -0700
commit5ce6bbf07672bf3f3c8d26cd2ce0e3e853a47c44 (patch)
tree2e4dd2ad0a618ab2b7cdebcdb9c503526c31e2e8 /weed
parentcaeffa3998adc060fa66c4cd77af971ff2d26c57 (diff)
downloadseaweedfs-5ce6bbf07672bf3f3c8d26cd2ce0e3e853a47c44.tar.xz
seaweedfs-5ce6bbf07672bf3f3c8d26cd2ce0e3e853a47c44.zip
directory structure change to work with glide
glide has its own requirements. My previous workaround caused me some code checkin errors. Need to fix this.
Diffstat (limited to 'weed')
-rw-r--r--weed/command/backup.go90
-rw-r--r--weed/command/benchmark.go532
-rw-r--r--weed/command/command.go71
-rw-r--r--weed/command/compact.go45
-rw-r--r--weed/command/download.go130
-rw-r--r--weed/command/export.go213
-rw-r--r--weed/command/filer.go105
-rw-r--r--weed/command/fix.go70
-rw-r--r--weed/command/master.go91
-rw-r--r--weed/command/mount.go35
-rw-r--r--weed/command/mount_notsupported.go15
-rw-r--r--weed/command/mount_std.go106
-rw-r--r--weed/command/server.go291
-rw-r--r--weed/command/shell.go61
-rw-r--r--weed/command/signal_handling.go31
-rw-r--r--weed/command/signal_handling_notsupported.go6
-rw-r--r--weed/command/upload.go108
-rw-r--r--weed/command/version.go24
-rw-r--r--weed/command/volume.go165
-rw-r--r--weed/command/volume_test.go13
-rw-r--r--weed/compress/compression_test.go45
-rw-r--r--weed/compress/delta_binary_pack32.go32
-rw-r--r--weed/filer/cassandra_store/cassandra_store.go87
-rw-r--r--weed/filer/cassandra_store/schema.cql22
-rw-r--r--weed/filer/client_operations.go70
-rw-r--r--weed/filer/embedded_filer/design.txt26
-rw-r--r--weed/filer/embedded_filer/directory.go15
-rw-r--r--weed/filer/embedded_filer/directory_in_map.go310
-rw-r--r--weed/filer/embedded_filer/directory_test.go86
-rw-r--r--weed/filer/embedded_filer/filer_embedded.go141
-rw-r--r--weed/filer/embedded_filer/files_in_leveldb.go85
-rw-r--r--weed/filer/filer.go28
-rw-r--r--weed/filer/flat_namespace/flat_namespace_filer.go50
-rw-r--r--weed/filer/flat_namespace/flat_namespace_store.go9
-rw-r--r--weed/filer/redis_store/redis_store.go48
-rw-r--r--weed/glide.lock93
-rw-r--r--weed/glide.yaml31
-rw-r--r--weed/glog/LICENSE191
-rw-r--r--weed/glog/README44
-rw-r--r--weed/glog/convenient_api.go6
-rw-r--r--weed/glog/glog.go1181
-rw-r--r--weed/glog/glog_file.go124
-rw-r--r--weed/glog/glog_test.go415
-rw-r--r--weed/images/favicon.go236
-rw-r--r--weed/images/favicon/favicon.icobin0 -> 3638 bytes
-rw-r--r--weed/images/orientation.go182
-rw-r--r--weed/images/orientation_test.go17
-rw-r--r--weed/images/preprocess.go27
-rw-r--r--weed/images/resizing.go46
-rw-r--r--weed/images/sample1.jpgbin0 -> 2068480 bytes
-rw-r--r--weed/operation/assign_file_id.go48
-rw-r--r--weed/operation/chunked_file.go213
-rw-r--r--weed/operation/compress.go59
-rw-r--r--weed/operation/data_struts.go7
-rw-r--r--weed/operation/delete_content.go117
-rw-r--r--weed/operation/list_masters.go32
-rw-r--r--weed/operation/lookup.go118
-rw-r--r--weed/operation/lookup_vid_cache.go51
-rw-r--r--weed/operation/lookup_vid_cache_test.go26
-rw-r--r--weed/operation/submit.go194
-rw-r--r--weed/operation/sync_volume.go54
-rw-r--r--weed/operation/system_message.pb.go203
-rw-r--r--weed/operation/system_message_test.go59
-rw-r--r--weed/operation/upload_content.go96
-rw-r--r--weed/proto/Makefile4
-rw-r--r--weed/proto/system_message.proto27
-rw-r--r--weed/security/guard.go162
-rw-r--r--weed/security/jwt.go72
-rw-r--r--weed/sequence/memory_sequencer.go36
-rw-r--r--weed/sequence/sequence.go7
-rw-r--r--weed/server/common.go179
-rw-r--r--weed/server/filer_server.go67
-rw-r--r--weed/server/filer_server_handlers.go265
-rw-r--r--weed/server/filer_server_handlers_admin.go29
-rw-r--r--weed/server/master_server.go131
-rw-r--r--weed/server/master_server_handlers.go104
-rw-r--r--weed/server/master_server_handlers_admin.go193
-rw-r--r--weed/server/master_server_handlers_ui.go30
-rw-r--r--weed/server/master_ui/templates.go102
-rw-r--r--weed/server/raft_server.go217
-rw-r--r--weed/server/raft_server_handlers.go64
-rw-r--r--weed/server/volume_server.go125
-rw-r--r--weed/server/volume_server_handlers.go57
-rw-r--r--weed/server/volume_server_handlers_admin.go50
-rw-r--r--weed/server/volume_server_handlers_helper.go115
-rw-r--r--weed/server/volume_server_handlers_read.go301
-rw-r--r--weed/server/volume_server_handlers_sync.go87
-rw-r--r--weed/server/volume_server_handlers_ui.go38
-rw-r--r--weed/server/volume_server_handlers_vacuum.go35
-rw-r--r--weed/server/volume_server_handlers_write.go165
-rw-r--r--weed/server/volume_server_ui/templates.go135
-rw-r--r--weed/stats/disk.go14
-rw-r--r--weed/stats/disk_notsupported.go7
-rw-r--r--weed/stats/disk_supported.go19
-rw-r--r--weed/stats/duration_counter.go94
-rw-r--r--weed/stats/duration_counter_test.go19
-rw-r--r--weed/stats/memory.go28
-rw-r--r--weed/stats/memory_notsupported.go7
-rw-r--r--weed/stats/memory_supported.go18
-rw-r--r--weed/stats/stats.go113
-rw-r--r--weed/storage/compact_map.go207
-rw-r--r--weed/storage/compact_map_perf_test.go45
-rw-r--r--weed/storage/compact_map_test.go77
-rw-r--r--weed/storage/crc.go30
-rw-r--r--weed/storage/disk_location.go73
-rw-r--r--weed/storage/file_id.go43
-rw-r--r--weed/storage/needle.go231
-rw-r--r--weed/storage/needle_byte_cache.go75
-rw-r--r--weed/storage/needle_map.go123
-rw-r--r--weed/storage/needle_map_boltdb.go165
-rw-r--r--weed/storage/needle_map_leveldb.go134
-rw-r--r--weed/storage/needle_map_memory.go106
-rw-r--r--weed/storage/needle_read_write.go291
-rw-r--r--weed/storage/needle_test.go45
-rw-r--r--weed/storage/replica_placement.go53
-rw-r--r--weed/storage/replica_placement_test.go14
-rw-r--r--weed/storage/store.go340
-rw-r--r--weed/storage/store_vacuum.go44
-rw-r--r--weed/storage/volume.go430
-rw-r--r--weed/storage/volume_id.go18
-rw-r--r--weed/storage/volume_info.go65
-rw-r--r--weed/storage/volume_info_test.go23
-rw-r--r--weed/storage/volume_super_block.go81
-rw-r--r--weed/storage/volume_super_block_test.go23
-rw-r--r--weed/storage/volume_sync.go213
-rw-r--r--weed/storage/volume_ttl.go135
-rw-r--r--weed/storage/volume_ttl_test.go60
-rw-r--r--weed/storage/volume_vacuum.go93
-rw-r--r--weed/storage/volume_version.go9
-rw-r--r--weed/tools/read_index.go28
-rw-r--r--weed/topology/allocate_volume.go35
-rw-r--r--weed/topology/cluster_commands.go31
-rw-r--r--weed/topology/collection.go57
-rw-r--r--weed/topology/configuration.go65
-rw-r--r--weed/topology/configuration_test.go42
-rw-r--r--weed/topology/data_center.go40
-rw-r--r--weed/topology/data_node.go115
-rw-r--r--weed/topology/node.go272
-rw-r--r--weed/topology/rack.go65
-rw-r--r--weed/topology/store_replicate.go150
-rw-r--r--weed/topology/topo_test.go17
-rw-r--r--weed/topology/topology.go189
-rw-r--r--weed/topology/topology_event_handling.go74
-rw-r--r--weed/topology/topology_map.go53
-rw-r--r--weed/topology/topology_vacuum.go158
-rw-r--r--weed/topology/volume_growth.go211
-rw-r--r--weed/topology/volume_growth_test.go135
-rw-r--r--weed/topology/volume_layout.go226
-rw-r--r--weed/topology/volume_location_list.go65
-rw-r--r--weed/util/bytes.go45
-rw-r--r--weed/util/bytes_pool.go127
-rw-r--r--weed/util/bytes_pool_test.go41
-rw-r--r--weed/util/concurrent_read_map.go60
-rw-r--r--weed/util/config.go130
-rw-r--r--weed/util/constants.go5
-rw-r--r--weed/util/file_util.go38
-rw-r--r--weed/util/http_util.go163
-rw-r--r--weed/util/net_timeout.go81
-rw-r--r--weed/util/parse.go26
-rw-r--r--weed/weed.go170
160 files changed, 16402 insertions, 0 deletions
diff --git a/weed/command/backup.go b/weed/command/backup.go
new file mode 100644
index 000000000..0b3994027
--- /dev/null
+++ b/weed/command/backup.go
@@ -0,0 +1,90 @@
+package command
+
+import (
+ "fmt"
+
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+var (
+ s BackupOptions
+)
+
+type BackupOptions struct {
+ master *string
+ collection *string
+ dir *string
+ volumeId *int
+}
+
+func init() {
+ cmdBackup.Run = runBackup // break init cycle
+ s.master = cmdBackup.Flag.String("server", "localhost:9333", "SeaweedFS master location")
+ s.collection = cmdBackup.Flag.String("collection", "", "collection name")
+ s.dir = cmdBackup.Flag.String("dir", ".", "directory to store volume data files")
+ s.volumeId = cmdBackup.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
+}
+
+var cmdBackup = &Command{
+ UsageLine: "backup -dir=. -volumeId=234 -server=localhost:9333",
+ Short: "incrementally backup a volume to local folder",
+ Long: `Incrementally backup volume data.
+
+ It is expected that you use this inside a script, to loop through
+ all possible volume ids that needs to be backup to local folder.
+
+ The volume id does not need to exist locally or even remotely.
+ This will help to backup future new volumes.
+
+ Usually backing up is just copying the .dat (and .idx) files.
+ But it's tricky to incremententally copy the differences.
+
+ The complexity comes when there are multiple addition, deletion and compaction.
+ This tool will handle them correctly and efficiently, avoiding unnecessary data transporation.
+ `,
+}
+
+func runBackup(cmd *Command, args []string) bool {
+ if *s.volumeId == -1 {
+ return false
+ }
+ vid := storage.VolumeId(*s.volumeId)
+
+ // find volume location, replication, ttl info
+ lookup, err := operation.Lookup(*s.master, vid.String())
+ if err != nil {
+ fmt.Printf("Error looking up volume %d: %v\n", vid, err)
+ return true
+ }
+ volumeServer := lookup.Locations[0].Url
+
+ stats, err := operation.GetVolumeSyncStatus(volumeServer, vid.String())
+ if err != nil {
+ fmt.Printf("Error get volume %d status: %v\n", vid, err)
+ return true
+ }
+ ttl, err := storage.ReadTTL(stats.Ttl)
+ if err != nil {
+ fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err)
+ return true
+ }
+ replication, err := storage.NewReplicaPlacementFromString(stats.Replication)
+ if err != nil {
+ fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err)
+ return true
+ }
+
+ v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl)
+ if err != nil {
+ fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err)
+ return true
+ }
+
+ if err := v.Synchronize(volumeServer); err != nil {
+ fmt.Printf("Error synchronizing volume %d: %v\n", vid, err)
+ return true
+ }
+
+ return true
+}
diff --git a/weed/command/benchmark.go b/weed/command/benchmark.go
new file mode 100644
index 000000000..7e0802e30
--- /dev/null
+++ b/weed/command/benchmark.go
@@ -0,0 +1,532 @@
+package command
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "math"
+ "math/rand"
+ "os"
+ "runtime"
+ "runtime/pprof"
+ "sort"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type BenchmarkOptions struct {
+ server *string
+ concurrency *int
+ numberOfFiles *int
+ fileSize *int
+ idListFile *string
+ write *bool
+ deletePercentage *int
+ read *bool
+ sequentialRead *bool
+ collection *string
+ cpuprofile *string
+ maxCpu *int
+ secretKey *string
+}
+
+var (
+ b BenchmarkOptions
+ sharedBytes []byte
+)
+
+func init() {
+ cmdBenchmark.Run = runbenchmark // break init cycle
+ cmdBenchmark.IsDebug = cmdBenchmark.Flag.Bool("debug", false, "verbose debug information")
+ b.server = cmdBenchmark.Flag.String("server", "localhost:9333", "SeaweedFS master location")
+ b.concurrency = cmdBenchmark.Flag.Int("c", 16, "number of concurrent write or read processes")
+ b.fileSize = cmdBenchmark.Flag.Int("size", 1024, "simulated file size in bytes, with random(0~63) bytes padding")
+ b.numberOfFiles = cmdBenchmark.Flag.Int("n", 1024*1024, "number of files to write for each thread")
+ b.idListFile = cmdBenchmark.Flag.String("list", os.TempDir()+"/benchmark_list.txt", "list of uploaded file ids")
+ b.write = cmdBenchmark.Flag.Bool("write", true, "enable write")
+ b.deletePercentage = cmdBenchmark.Flag.Int("deletePercent", 0, "the percent of writes that are deletes")
+ b.read = cmdBenchmark.Flag.Bool("read", true, "enable read")
+ b.sequentialRead = cmdBenchmark.Flag.Bool("readSequentially", false, "randomly read by ids from \"-list\" specified file")
+ b.collection = cmdBenchmark.Flag.String("collection", "benchmark", "write data to this collection")
+ b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "cpu profile output file")
+ b.maxCpu = cmdBenchmark.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs")
+ b.secretKey = cmdBenchmark.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)")
+ sharedBytes = make([]byte, 1024)
+}
+
+var cmdBenchmark = &Command{
+ UsageLine: "benchmark -server=localhost:9333 -c=10 -n=100000",
+ Short: "benchmark on writing millions of files and read out",
+ Long: `benchmark on an empty SeaweedFS file system.
+
+ Two tests during benchmark:
+ 1) write lots of small files to the system
+ 2) read the files out
+
+ The file content is mostly zero, but no compression is done.
+
+ You can choose to only benchmark read or write.
+ During write, the list of uploaded file ids is stored in "-list" specified file.
+ You can also use your own list of file ids to run read test.
+
+ Write speed and read speed will be collected.
+ The numbers are used to get a sense of the system.
+ Usually your network or the hard drive is the real bottleneck.
+
+ Another thing to watch is whether the volumes are evenly distributed
+ to each volume server. Because the 7 more benchmark volumes are randomly distributed
+ to servers with free slots, it's highly possible some servers have uneven amount of
+ benchmark volumes. To remedy this, you can use this to grow the benchmark volumes
+ before starting the benchmark command:
+ http://localhost:9333/vol/grow?collection=benchmark&count=5
+
+ After benchmarking, you can clean up the written data by deleting the benchmark collection
+ http://localhost:9333/col/delete?collection=benchmark
+
+ `,
+}
+
+var (
+ wait sync.WaitGroup
+ writeStats *stats
+ readStats *stats
+)
+
+func runbenchmark(cmd *Command, args []string) bool {
+ fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH)
+ if *b.maxCpu < 1 {
+ *b.maxCpu = runtime.NumCPU()
+ }
+ runtime.GOMAXPROCS(*b.maxCpu)
+ if *b.cpuprofile != "" {
+ f, err := os.Create(*b.cpuprofile)
+ if err != nil {
+ glog.Fatal(err)
+ }
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+
+ if *b.write {
+ bench_write()
+ }
+
+ if *b.read {
+ bench_read()
+ }
+
+ return true
+}
+
+func bench_write() {
+ fileIdLineChan := make(chan string)
+ finishChan := make(chan bool)
+ writeStats = newStats(*b.concurrency)
+ idChan := make(chan int)
+ go writeFileIds(*b.idListFile, fileIdLineChan, finishChan)
+ for i := 0; i < *b.concurrency; i++ {
+ wait.Add(1)
+ go writeFiles(idChan, fileIdLineChan, &writeStats.localStats[i])
+ }
+ writeStats.start = time.Now()
+ writeStats.total = *b.numberOfFiles
+ go writeStats.checkProgress("Writing Benchmark", finishChan)
+ for i := 0; i < *b.numberOfFiles; i++ {
+ idChan <- i
+ }
+ close(idChan)
+ wait.Wait()
+ writeStats.end = time.Now()
+ wait.Add(2)
+ finishChan <- true
+ finishChan <- true
+ wait.Wait()
+ close(finishChan)
+ writeStats.printStats()
+}
+
+func bench_read() {
+ fileIdLineChan := make(chan string)
+ finishChan := make(chan bool)
+ readStats = newStats(*b.concurrency)
+ go readFileIds(*b.idListFile, fileIdLineChan)
+ readStats.start = time.Now()
+ readStats.total = *b.numberOfFiles
+ go readStats.checkProgress("Randomly Reading Benchmark", finishChan)
+ for i := 0; i < *b.concurrency; i++ {
+ wait.Add(1)
+ go readFiles(fileIdLineChan, &readStats.localStats[i])
+ }
+ wait.Wait()
+ wait.Add(1)
+ finishChan <- true
+ wait.Wait()
+ close(finishChan)
+ readStats.end = time.Now()
+ readStats.printStats()
+}
+
+type delayedFile struct {
+ enterTime time.Time
+ fp *operation.FilePart
+}
+
+func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) {
+ defer wait.Done()
+ delayedDeleteChan := make(chan *delayedFile, 100)
+ var waitForDeletions sync.WaitGroup
+ secret := security.Secret(*b.secretKey)
+
+ for i := 0; i < 7; i++ {
+ waitForDeletions.Add(1)
+ go func() {
+ defer waitForDeletions.Done()
+ for df := range delayedDeleteChan {
+ if df.enterTime.After(time.Now()) {
+ time.Sleep(df.enterTime.Sub(time.Now()))
+ }
+ if e := util.Delete("http://"+df.fp.Server+"/"+df.fp.Fid,
+ security.GenJwt(secret, df.fp.Fid)); e == nil {
+ s.completed++
+ } else {
+ s.failed++
+ }
+ }
+ }()
+ }
+
+ for id := range idChan {
+ start := time.Now()
+ fileSize := int64(*b.fileSize + rand.Intn(64))
+ fp := &operation.FilePart{Reader: &FakeReader{id: uint64(id), size: fileSize}, FileSize: fileSize}
+ if assignResult, err := operation.Assign(*b.server, 1, "", *b.collection, ""); err == nil {
+ fp.Server, fp.Fid, fp.Collection = assignResult.Url, assignResult.Fid, *b.collection
+ if _, err := fp.Upload(0, *b.server, secret); err == nil {
+ if rand.Intn(100) < *b.deletePercentage {
+ s.total++
+ delayedDeleteChan <- &delayedFile{time.Now().Add(time.Second), fp}
+ } else {
+ fileIdLineChan <- fp.Fid
+ }
+ s.completed++
+ s.transferred += fileSize
+ } else {
+ s.failed++
+ fmt.Printf("Failed to write with error:%v\n", err)
+ }
+ writeStats.addSample(time.Now().Sub(start))
+ if *cmdBenchmark.IsDebug {
+ fmt.Printf("writing %d file %s\n", id, fp.Fid)
+ }
+ } else {
+ s.failed++
+ println("writing file error:", err.Error())
+ }
+ }
+ close(delayedDeleteChan)
+ waitForDeletions.Wait()
+}
+
+func readFiles(fileIdLineChan chan string, s *stat) {
+ defer wait.Done()
+ for fid := range fileIdLineChan {
+ if len(fid) == 0 {
+ continue
+ }
+ if fid[0] == '#' {
+ continue
+ }
+ if *cmdBenchmark.IsDebug {
+ fmt.Printf("reading file %s\n", fid)
+ }
+ parts := strings.SplitN(fid, ",", 2)
+ vid := parts[0]
+ start := time.Now()
+ ret, err := operation.Lookup(*b.server, vid)
+ if err != nil || len(ret.Locations) == 0 {
+ s.failed++
+ println("!!!! volume id ", vid, " location not found!!!!!")
+ continue
+ }
+ server := ret.Locations[rand.Intn(len(ret.Locations))].Url
+ url := "http://" + server + "/" + fid
+ if bytesRead, err := util.Get(url); err == nil {
+ s.completed++
+ s.transferred += int64(len(bytesRead))
+ readStats.addSample(time.Now().Sub(start))
+ } else {
+ s.failed++
+ fmt.Printf("Failed to read %s error:%v\n", url, err)
+ }
+ }
+}
+
+func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan bool) {
+ file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ glog.Fatalf("File to create file %s: %s\n", fileName, err)
+ }
+ defer file.Close()
+
+ for {
+ select {
+ case <-finishChan:
+ wait.Done()
+ return
+ case line := <-fileIdLineChan:
+ file.Write([]byte(line))
+ file.Write([]byte("\n"))
+ }
+ }
+}
+
+func readFileIds(fileName string, fileIdLineChan chan string) {
+ file, err := os.Open(fileName) // For read access.
+ if err != nil {
+ glog.Fatalf("File to read file %s: %s\n", fileName, err)
+ }
+ defer file.Close()
+
+ r := bufio.NewReader(file)
+ if *b.sequentialRead {
+ for {
+ if line, err := Readln(r); err == nil {
+ fileIdLineChan <- string(line)
+ } else {
+ break
+ }
+ }
+ } else {
+ lines := make([]string, 0, readStats.total)
+ for {
+ if line, err := Readln(r); err == nil {
+ lines = append(lines, string(line))
+ } else {
+ break
+ }
+ }
+ if len(lines) > 0 {
+ for i := 0; i < readStats.total; i++ {
+ fileIdLineChan <- lines[rand.Intn(len(lines))]
+ }
+ }
+ }
+
+ close(fileIdLineChan)
+}
+
+const (
+ benchResolution = 10000 //0.1 microsecond
+ benchBucket = 1000000000 / benchResolution
+)
+
+// An efficient statics collecting and rendering
+type stats struct {
+ data []int
+ overflow []int
+ localStats []stat
+ start time.Time
+ end time.Time
+ total int
+}
+type stat struct {
+ completed int
+ failed int
+ total int
+ transferred int64
+}
+
+var percentages = []int{50, 66, 75, 80, 90, 95, 98, 99, 100}
+
+func newStats(n int) *stats {
+ return &stats{
+ data: make([]int, benchResolution),
+ overflow: make([]int, 0),
+ localStats: make([]stat, n),
+ }
+}
+
+func (s *stats) addSample(d time.Duration) {
+ index := int(d / benchBucket)
+ if index < 0 {
+ fmt.Printf("This request takes %3.1f seconds, skipping!\n", float64(index)/10000)
+ } else if index < len(s.data) {
+ s.data[int(d/benchBucket)]++
+ } else {
+ s.overflow = append(s.overflow, index)
+ }
+}
+
+func (s *stats) checkProgress(testName string, finishChan chan bool) {
+ fmt.Printf("\n------------ %s ----------\n", testName)
+ ticker := time.Tick(time.Second)
+ lastCompleted, lastTransferred, lastTime := 0, int64(0), time.Now()
+ for {
+ select {
+ case <-finishChan:
+ wait.Done()
+ return
+ case t := <-ticker:
+ completed, transferred, taken, total := 0, int64(0), t.Sub(lastTime), s.total
+ for _, localStat := range s.localStats {
+ completed += localStat.completed
+ transferred += localStat.transferred
+ total += localStat.total
+ }
+ fmt.Printf("Completed %d of %d requests, %3.1f%% %3.1f/s %3.1fMB/s\n",
+ completed, total, float64(completed)*100/float64(total),
+ float64(completed-lastCompleted)*float64(int64(time.Second))/float64(int64(taken)),
+ float64(transferred-lastTransferred)*float64(int64(time.Second))/float64(int64(taken))/float64(1024*1024),
+ )
+ lastCompleted, lastTransferred, lastTime = completed, transferred, t
+ }
+ }
+}
+
+func (s *stats) printStats() {
+ completed, failed, transferred, total := 0, 0, int64(0), s.total
+ for _, localStat := range s.localStats {
+ completed += localStat.completed
+ failed += localStat.failed
+ transferred += localStat.transferred
+ total += localStat.total
+ }
+ timeTaken := float64(int64(s.end.Sub(s.start))) / 1000000000
+ fmt.Printf("\nConcurrency Level: %d\n", *b.concurrency)
+ fmt.Printf("Time taken for tests: %.3f seconds\n", timeTaken)
+ fmt.Printf("Complete requests: %d\n", completed)
+ fmt.Printf("Failed requests: %d\n", failed)
+ fmt.Printf("Total transferred: %d bytes\n", transferred)
+ fmt.Printf("Requests per second: %.2f [#/sec]\n", float64(completed)/timeTaken)
+ fmt.Printf("Transfer rate: %.2f [Kbytes/sec]\n", float64(transferred)/1024/timeTaken)
+ n, sum := 0, 0
+ min, max := 10000000, 0
+ for i := 0; i < len(s.data); i++ {
+ n += s.data[i]
+ sum += s.data[i] * i
+ if s.data[i] > 0 {
+ if min > i {
+ min = i
+ }
+ if max < i {
+ max = i
+ }
+ }
+ }
+ n += len(s.overflow)
+ for i := 0; i < len(s.overflow); i++ {
+ sum += s.overflow[i]
+ if min > s.overflow[i] {
+ min = s.overflow[i]
+ }
+ if max < s.overflow[i] {
+ max = s.overflow[i]
+ }
+ }
+ avg := float64(sum) / float64(n)
+ varianceSum := 0.0
+ for i := 0; i < len(s.data); i++ {
+ if s.data[i] > 0 {
+ d := float64(i) - avg
+ varianceSum += d * d * float64(s.data[i])
+ }
+ }
+ for i := 0; i < len(s.overflow); i++ {
+ d := float64(s.overflow[i]) - avg
+ varianceSum += d * d
+ }
+ std := math.Sqrt(varianceSum / float64(n))
+ fmt.Printf("\nConnection Times (ms)\n")
+ fmt.Printf(" min avg max std\n")
+ fmt.Printf("Total: %2.1f %3.1f %3.1f %3.1f\n", float32(min)/10, float32(avg)/10, float32(max)/10, std/10)
+ //printing percentiles
+ fmt.Printf("\nPercentage of the requests served within a certain time (ms)\n")
+ percentiles := make([]int, len(percentages))
+ for i := 0; i < len(percentages); i++ {
+ percentiles[i] = n * percentages[i] / 100
+ }
+ percentiles[len(percentiles)-1] = n
+ percentileIndex := 0
+ currentSum := 0
+ for i := 0; i < len(s.data); i++ {
+ currentSum += s.data[i]
+ if s.data[i] > 0 && percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] {
+ fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(i)/10.0)
+ percentileIndex++
+ for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] {
+ percentileIndex++
+ }
+ }
+ }
+ sort.Ints(s.overflow)
+ for i := 0; i < len(s.overflow); i++ {
+ currentSum++
+ if percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] {
+ fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(s.overflow[i])/10.0)
+ percentileIndex++
+ for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] {
+ percentileIndex++
+ }
+ }
+ }
+}
+
+// a fake reader to generate content to upload
+type FakeReader struct {
+ id uint64 // an id number
+ size int64 // max bytes
+}
+
+func (l *FakeReader) Read(p []byte) (n int, err error) {
+ if l.size <= 0 {
+ return 0, io.EOF
+ }
+ if int64(len(p)) > l.size {
+ n = int(l.size)
+ } else {
+ n = len(p)
+ }
+ if n >= 8 {
+ for i := 0; i < 8; i++ {
+ p[i] = byte(l.id >> uint(i*8))
+ }
+ }
+ l.size -= int64(n)
+ return
+}
+
+func (l *FakeReader) WriteTo(w io.Writer) (n int64, err error) {
+ size := int(l.size)
+ bufferSize := len(sharedBytes)
+ for size > 0 {
+ tempBuffer := sharedBytes
+ if size < bufferSize {
+ tempBuffer = sharedBytes[0:size]
+ }
+ count, e := w.Write(tempBuffer)
+ if e != nil {
+ return int64(size), e
+ }
+ size -= count
+ }
+ return l.size, nil
+}
+
+func Readln(r *bufio.Reader) ([]byte, error) {
+ var (
+ isPrefix = true
+ err error
+ line, ln []byte
+ )
+ for isPrefix && err == nil {
+ line, isPrefix, err = r.ReadLine()
+ ln = append(ln, line...)
+ }
+ return ln, err
+}
diff --git a/weed/command/command.go b/weed/command/command.go
new file mode 100644
index 000000000..d654f57cd
--- /dev/null
+++ b/weed/command/command.go
@@ -0,0 +1,71 @@
+package command
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "strings"
+)
+
+var Commands = []*Command{
+ cmdBenchmark,
+ cmdBackup,
+ cmdCompact,
+ cmdFix,
+ cmdServer,
+ cmdMaster,
+ cmdFiler,
+ cmdUpload,
+ cmdDownload,
+ cmdShell,
+ cmdVersion,
+ cmdVolume,
+ cmdExport,
+ cmdMount,
+}
+
+type Command struct {
+ // Run runs the command.
+ // The args are the arguments after the command name.
+ Run func(cmd *Command, args []string) bool
+
+ // UsageLine is the one-line usage message.
+ // The first word in the line is taken to be the command name.
+ UsageLine string
+
+ // Short is the short description shown in the 'go help' output.
+ Short string
+
+ // Long is the long message shown in the 'go help <this-command>' output.
+ Long string
+
+ // Flag is a set of flags specific to this command.
+ Flag flag.FlagSet
+
+ IsDebug *bool
+}
+
+// Name returns the command's name: the first word in the usage line.
+func (c *Command) Name() string {
+ name := c.UsageLine
+ i := strings.Index(name, " ")
+ if i >= 0 {
+ name = name[:i]
+ }
+ return name
+}
+
+func (c *Command) Usage() {
+ fmt.Fprintf(os.Stderr, "Example: weed %s\n", c.UsageLine)
+ fmt.Fprintf(os.Stderr, "Default Usage:\n")
+ c.Flag.PrintDefaults()
+ fmt.Fprintf(os.Stderr, "Description:\n")
+ fmt.Fprintf(os.Stderr, " %s\n", strings.TrimSpace(c.Long))
+ os.Exit(2)
+}
+
+// Runnable reports whether the command can be run; otherwise
+// it is a documentation pseudo-command such as importpath.
+func (c *Command) Runnable() bool {
+ return c.Run != nil
+}
diff --git a/weed/command/compact.go b/weed/command/compact.go
new file mode 100644
index 000000000..ba2fbf867
--- /dev/null
+++ b/weed/command/compact.go
@@ -0,0 +1,45 @@
+package command
+
+import (
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+func init() {
+ cmdCompact.Run = runCompact // break init cycle
+}
+
+var cmdCompact = &Command{
+ UsageLine: "compact -dir=/tmp -volumeId=234",
+ Short: "run weed tool compact on volume file",
+ Long: `Force an compaction to remove deleted files from volume files.
+ The compacted .dat file is stored as .cpd file.
+ The compacted .idx file is stored as .cpx file.
+
+ `,
+}
+
+var (
+ compactVolumePath = cmdCompact.Flag.String("dir", ".", "data directory to store files")
+ compactVolumeCollection = cmdCompact.Flag.String("collection", "", "volume collection name")
+ compactVolumeId = cmdCompact.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir.")
+)
+
+func runCompact(cmd *Command, args []string) bool {
+
+ if *compactVolumeId == -1 {
+ return false
+ }
+
+ vid := storage.VolumeId(*compactVolumeId)
+ v, err := storage.NewVolume(*compactVolumePath, *compactVolumeCollection, vid,
+ storage.NeedleMapInMemory, nil, nil)
+ if err != nil {
+ glog.Fatalf("Load Volume [ERROR] %s\n", err)
+ }
+ if err = v.Compact(); err != nil {
+ glog.Fatalf("Compact Volume [ERROR] %s\n", err)
+ }
+
+ return true
+}
diff --git a/weed/command/download.go b/weed/command/download.go
new file mode 100644
index 000000000..39ed2b38e
--- /dev/null
+++ b/weed/command/download.go
@@ -0,0 +1,130 @@
+package command
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var (
+ d DownloadOptions
+)
+
+type DownloadOptions struct {
+ server *string
+ dir *string
+}
+
+func init() {
+ cmdDownload.Run = runDownload // break init cycle
+ d.server = cmdDownload.Flag.String("server", "localhost:9333", "SeaweedFS master location")
+ d.dir = cmdDownload.Flag.String("dir", ".", "Download the whole folder recursively if specified.")
+}
+
+var cmdDownload = &Command{
+ UsageLine: "download -server=localhost:9333 -dir=one_directory fid1 [fid2 fid3 ...]",
+ Short: "download files by file id",
+ Long: `download files by file id.
+
+ Usually you just need to use curl to lookup the file's volume server, and then download them directly.
+ This download tool combine the two steps into one.
+
+ What's more, if you use "weed upload -maxMB=..." option to upload a big file divided into chunks, you can
+ use this tool to download the chunks and merge them automatically.
+
+ `,
+}
+
+func runDownload(cmd *Command, args []string) bool {
+ for _, fid := range args {
+ if e := downloadToFile(*d.server, fid, *d.dir); e != nil {
+ fmt.Println("Download Error: ", fid, e)
+ }
+ }
+ return true
+}
+
+func downloadToFile(server, fileId, saveDir string) error {
+ fileUrl, lookupError := operation.LookupFileId(server, fileId)
+ if lookupError != nil {
+ return lookupError
+ }
+ filename, rc, err := util.DownloadUrl(fileUrl)
+ if err != nil {
+ return err
+ }
+ defer rc.Close()
+ if filename == "" {
+ filename = fileId
+ }
+ isFileList := false
+ if strings.HasSuffix(filename, "-list") {
+ // old command compatible
+ isFileList = true
+ filename = filename[0 : len(filename)-len("-list")]
+ }
+ f, err := os.OpenFile(path.Join(saveDir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ if isFileList {
+ content, err := ioutil.ReadAll(rc)
+ if err != nil {
+ return err
+ }
+ fids := strings.Split(string(content), "\n")
+ for _, partId := range fids {
+ var n int
+ _, part, err := fetchContent(*d.server, partId)
+ if err == nil {
+ n, err = f.Write(part)
+ }
+ if err == nil && n < len(part) {
+ err = io.ErrShortWrite
+ }
+ if err != nil {
+ return err
+ }
+ }
+ } else {
+ if _, err = io.Copy(f, rc); err != nil {
+ return err
+ }
+
+ }
+ return nil
+}
+
+func fetchContent(server string, fileId string) (filename string, content []byte, e error) {
+ fileUrl, lookupError := operation.LookupFileId(server, fileId)
+ if lookupError != nil {
+ return "", nil, lookupError
+ }
+ var rc io.ReadCloser
+ if filename, rc, e = util.DownloadUrl(fileUrl); e != nil {
+ return "", nil, e
+ }
+ content, e = ioutil.ReadAll(rc)
+ rc.Close()
+ return
+}
+
+func WriteFile(filename string, data []byte, perm os.FileMode) error {
+ f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
+ if err != nil {
+ return err
+ }
+ n, err := f.Write(data)
+ f.Close()
+ if err == nil && n < len(data) {
+ err = io.ErrShortWrite
+ }
+ return err
+}
diff --git a/weed/command/export.go b/weed/command/export.go
new file mode 100644
index 000000000..481aa111b
--- /dev/null
+++ b/weed/command/export.go
@@ -0,0 +1,213 @@
+package command
+
+import (
+ "archive/tar"
+ "bytes"
+ "fmt"
+ "os"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "text/template"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+const (
+ defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}`
+ timeFormat = "2006-01-02T15:04:05"
+)
+
+var (
+ export ExportOptions
+)
+
+type ExportOptions struct {
+ dir *string
+ collection *string
+ volumeId *int
+}
+
+var cmdExport = &Command{
+ UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'",
+ Short: "list or export files from one volume data file",
+ Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified.
+
+ The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}.
+
+ `,
+}
+
+func init() {
+ cmdExport.Run = runExport // break init cycle
+ export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files")
+ export.collection = cmdExport.Flag.String("collection", "", "the volume collection name")
+ export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.")
+}
+
+var (
+ output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout")
+ format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}")
+ newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone")
+
+ tarOutputFile *tar.Writer
+ tarHeader tar.Header
+ fileNameTemplate *template.Template
+ fileNameTemplateBuffer = bytes.NewBuffer(nil)
+ newerThan time.Time
+ newerThanUnix int64 = -1
+ localLocation, _ = time.LoadLocation("Local")
+)
+
+func runExport(cmd *Command, args []string) bool {
+
+ var err error
+
+ if *newer != "" {
+ if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil {
+ fmt.Println("cannot parse 'newer' argument: " + err.Error())
+ return false
+ }
+ newerThanUnix = newerThan.Unix()
+ }
+
+ if *export.volumeId == -1 {
+ return false
+ }
+
+ if *output != "" {
+ if *output != "-" && !strings.HasSuffix(*output, ".tar") {
+ fmt.Println("the output file", *output, "should be '-' or end with .tar")
+ return false
+ }
+
+ if fileNameTemplate, err = template.New("name").Parse(*format); err != nil {
+ fmt.Println("cannot parse format " + *format + ": " + err.Error())
+ return false
+ }
+
+ var outputFile *os.File
+ if *output == "-" {
+ outputFile = os.Stdout
+ } else {
+ if outputFile, err = os.Create(*output); err != nil {
+ glog.Fatalf("cannot open output tar %s: %s", *output, err)
+ }
+ }
+ defer outputFile.Close()
+ tarOutputFile = tar.NewWriter(outputFile)
+ defer tarOutputFile.Close()
+ t := time.Now()
+ tarHeader = tar.Header{Mode: 0644,
+ ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(),
+ Typeflag: tar.TypeReg,
+ AccessTime: t, ChangeTime: t}
+ }
+
+ fileName := strconv.Itoa(*export.volumeId)
+ if *export.collection != "" {
+ fileName = *export.collection + "_" + fileName
+ }
+ vid := storage.VolumeId(*export.volumeId)
+ indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644)
+ if err != nil {
+ glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
+ }
+ defer indexFile.Close()
+
+ needleMap, err := storage.LoadNeedleMap(indexFile)
+ if err != nil {
+ glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err)
+ }
+
+ var version storage.Version
+
+ err = storage.ScanVolumeFile(*export.dir, *export.collection, vid,
+ storage.NeedleMapInMemory,
+ func(superBlock storage.SuperBlock) error {
+ version = superBlock.Version()
+ return nil
+ }, true, func(n *storage.Needle, offset int64) error {
+ nv, ok := needleMap.Get(n.Id)
+ glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v",
+ n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv)
+ if ok && nv.Size > 0 && int64(nv.Offset)*8 == offset {
+ if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) {
+ glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d",
+ n.LastModified, newerThanUnix)
+ return nil
+ }
+ return walker(vid, n, version)
+ }
+ if !ok {
+ glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size)
+ } else {
+ glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size)
+ }
+ return nil
+ })
+ if err != nil {
+ glog.Fatalf("Export Volume File [ERROR] %s\n", err)
+ }
+ return true
+}
+
+type nameParams struct {
+ Name string
+ Id uint64
+ Mime string
+ Key string
+ Ext string
+}
+
+func walker(vid storage.VolumeId, n *storage.Needle, version storage.Version) (err error) {
+ key := storage.NewFileIdFromNeedle(vid, n).String()
+ if tarOutputFile != nil {
+ fileNameTemplateBuffer.Reset()
+ if err = fileNameTemplate.Execute(fileNameTemplateBuffer,
+ nameParams{
+ Name: string(n.Name),
+ Id: n.Id,
+ Mime: string(n.Mime),
+ Key: key,
+ Ext: filepath.Ext(string(n.Name)),
+ },
+ ); err != nil {
+ return err
+ }
+
+ fileName := fileNameTemplateBuffer.String()
+
+ if n.IsGzipped() && path.Ext(fileName) != ".gz" {
+ fileName = fileName + ".gz"
+ }
+
+ tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data))
+ if n.HasLastModifiedDate() {
+ tarHeader.ModTime = time.Unix(int64(n.LastModified), 0)
+ } else {
+ tarHeader.ModTime = time.Unix(0, 0)
+ }
+ tarHeader.ChangeTime = tarHeader.ModTime
+ if err = tarOutputFile.WriteHeader(&tarHeader); err != nil {
+ return err
+ }
+ _, err = tarOutputFile.Write(n.Data)
+ } else {
+ size := n.DataSize
+ if version == storage.Version1 {
+ size = n.Size
+ }
+ fmt.Printf("key=%s Name=%s Size=%d gzip=%t mime=%s\n",
+ key,
+ n.Name,
+ size,
+ n.IsGzipped(),
+ n.Mime,
+ )
+ }
+ return
+}
diff --git a/weed/command/filer.go b/weed/command/filer.go
new file mode 100644
index 000000000..582d4e9c8
--- /dev/null
+++ b/weed/command/filer.go
@@ -0,0 +1,105 @@
+package command
+
+import (
+ "net/http"
+ "os"
+ "strconv"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/server"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var (
+ f FilerOptions
+)
+
+type FilerOptions struct {
+ master *string
+ ip *string
+ port *int
+ collection *string
+ defaultReplicaPlacement *string
+ dir *string
+ redirectOnRead *bool
+ disableDirListing *bool
+ secretKey *string
+ cassandra_server *string
+ cassandra_keyspace *string
+ redis_server *string
+ redis_password *string
+ redis_database *int
+}
+
+func init() {
+ cmdFiler.Run = runFiler // break init cycle
+ f.master = cmdFiler.Flag.String("master", "localhost:9333", "master server location")
+ f.collection = cmdFiler.Flag.String("collection", "", "all data will be stored in this collection")
+ f.ip = cmdFiler.Flag.String("ip", "", "filer server http listen ip address")
+ f.port = cmdFiler.Flag.Int("port", 8888, "filer server http listen port")
+ f.dir = cmdFiler.Flag.String("dir", os.TempDir(), "directory to store meta data")
+ f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "000", "default replication type if not specified")
+ f.redirectOnRead = cmdFiler.Flag.Bool("redirectOnRead", false, "whether proxy or redirect to volume server during file GET request")
+ f.disableDirListing = cmdFiler.Flag.Bool("disableDirListing", false, "turn off directory listing")
+ f.cassandra_server = cmdFiler.Flag.String("cassandra.server", "", "host[:port] of the cassandra server")
+ f.cassandra_keyspace = cmdFiler.Flag.String("cassandra.keyspace", "seaweed", "keyspace of the cassandra server")
+ f.redis_server = cmdFiler.Flag.String("redis.server", "", "host:port of the redis server, e.g., 127.0.0.1:6379")
+ f.redis_password = cmdFiler.Flag.String("redis.password", "", "password in clear text")
+ f.redis_database = cmdFiler.Flag.Int("redis.database", 0, "the database on the redis server")
+ f.secretKey = cmdFiler.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)")
+
+}
+
+var cmdFiler = &Command{
+ UsageLine: "filer -port=8888 -dir=/tmp -master=<ip:port>",
+ Short: "start a file server that points to a master server",
+ Long: `start a file server which accepts REST operation for any files.
+
+ //create or overwrite the file, the directories /path/to will be automatically created
+ POST /path/to/file
+ //get the file content
+ GET /path/to/file
+ //create or overwrite the file, the filename in the multipart request will be used
+ POST /path/to/
+ //return a json format subdirectory and files listing
+ GET /path/to/
+
+ Current <fullpath~fileid> mapping metadata store is local embedded leveldb.
+ It should be highly scalable to hundreds of millions of files on a modest machine.
+
+ Future we will ensure it can avoid of being SPOF.
+
+ `,
+}
+
+func runFiler(cmd *Command, args []string) bool {
+
+ if err := util.TestFolderWritable(*f.dir); err != nil {
+ glog.Fatalf("Check Meta Folder (-dir) Writable %s : %s", *f.dir, err)
+ }
+
+ r := http.NewServeMux()
+ _, nfs_err := weed_server.NewFilerServer(r, *f.ip, *f.port, *f.master, *f.dir, *f.collection,
+ *f.defaultReplicaPlacement, *f.redirectOnRead, *f.disableDirListing,
+ *f.secretKey,
+ *f.cassandra_server, *f.cassandra_keyspace,
+ *f.redis_server, *f.redis_password, *f.redis_database,
+ )
+ if nfs_err != nil {
+ glog.Fatalf("Filer startup error: %v", nfs_err)
+ }
+ glog.V(0).Infoln("Start Seaweed Filer", util.VERSION, "at port", strconv.Itoa(*f.port))
+ filerListener, e := util.NewListener(
+ ":"+strconv.Itoa(*f.port),
+ time.Duration(10)*time.Second,
+ )
+ if e != nil {
+ glog.Fatalf("Filer listener error: %v", e)
+ }
+ if e := http.Serve(filerListener, r); e != nil {
+ glog.Fatalf("Filer Fail to serve: %v", e)
+ }
+
+ return true
+}
diff --git a/weed/command/fix.go b/weed/command/fix.go
new file mode 100644
index 000000000..2ec74d026
--- /dev/null
+++ b/weed/command/fix.go
@@ -0,0 +1,70 @@
+package command
+
+import (
+ "os"
+ "path"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+func init() {
+ cmdFix.Run = runFix // break init cycle
+}
+
+var cmdFix = &Command{
+ UsageLine: "fix -dir=/tmp -volumeId=234",
+ Short: "run weed tool fix on index file if corrupted",
+ Long: `Fix runs the SeaweedFS fix command to re-create the index .idx file.
+
+ `,
+}
+
+var (
+ fixVolumePath = cmdFix.Flag.String("dir", ".", "data directory to store files")
+ fixVolumeCollection = cmdFix.Flag.String("collection", "", "the volume collection name")
+ fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
+)
+
+func runFix(cmd *Command, args []string) bool {
+
+ if *fixVolumeId == -1 {
+ return false
+ }
+
+ fileName := strconv.Itoa(*fixVolumeId)
+ if *fixVolumeCollection != "" {
+ fileName = *fixVolumeCollection + "_" + fileName
+ }
+ indexFile, err := os.OpenFile(path.Join(*fixVolumePath, fileName+".idx"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ glog.Fatalf("Create Volume Index [ERROR] %s\n", err)
+ }
+ defer indexFile.Close()
+
+ nm := storage.NewNeedleMap(indexFile)
+ defer nm.Close()
+
+ vid := storage.VolumeId(*fixVolumeId)
+ err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid,
+ storage.NeedleMapInMemory,
+ func(superBlock storage.SuperBlock) error {
+ return nil
+ }, false, func(n *storage.Needle, offset int64) error {
+ glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped())
+ if n.Size > 0 {
+ pe := nm.Put(n.Id, uint32(offset/storage.NeedlePaddingSize), n.Size)
+ glog.V(2).Infof("saved %d with error %v", n.Size, pe)
+ } else {
+ glog.V(2).Infof("skipping deleted file ...")
+ return nm.Delete(n.Id)
+ }
+ return nil
+ })
+ if err != nil {
+ glog.Fatalf("Export Volume File [ERROR] %s\n", err)
+ }
+
+ return true
+}
diff --git a/weed/command/master.go b/weed/command/master.go
new file mode 100644
index 000000000..aed8fc793
--- /dev/null
+++ b/weed/command/master.go
@@ -0,0 +1,91 @@
+package command
+
+import (
+ "net/http"
+ "os"
+ "runtime"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/server"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/gorilla/mux"
+)
+
+func init() {
+ cmdMaster.Run = runMaster // break init cycle
+}
+
+var cmdMaster = &Command{
+ UsageLine: "master -port=9333",
+ Short: "start a master server",
+ Long: `start a master server to provide volume=>location mapping service
+ and sequence number of file ids
+
+ `,
+}
+
+var (
+ mport = cmdMaster.Flag.Int("port", 9333, "http listen port")
+ masterIp = cmdMaster.Flag.String("ip", "localhost", "master <ip>|<server> address")
+ masterBindIp = cmdMaster.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to")
+ metaFolder = cmdMaster.Flag.String("mdir", os.TempDir(), "data directory to store meta data")
+ masterPeers = cmdMaster.Flag.String("peers", "", "other master nodes in comma separated ip:port list, example: 127.0.0.1:9093,127.0.0.1:9094")
+ volumeSizeLimitMB = cmdMaster.Flag.Uint("volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.")
+ mpulse = cmdMaster.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats")
+ confFile = cmdMaster.Flag.String("conf", "/etc/weedfs/weedfs.conf", "Deprecating! xml configuration file")
+ defaultReplicaPlacement = cmdMaster.Flag.String("defaultReplication", "000", "Default replication type if not specified.")
+ mTimeout = cmdMaster.Flag.Int("idleTimeout", 10, "connection idle seconds")
+ mMaxCpu = cmdMaster.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs")
+ garbageThreshold = cmdMaster.Flag.String("garbageThreshold", "0.3", "threshold to vacuum and reclaim spaces")
+ masterWhiteListOption = cmdMaster.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.")
+ masterSecureKey = cmdMaster.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)")
+
+ masterWhiteList []string
+)
+
+func runMaster(cmd *Command, args []string) bool {
+ if *mMaxCpu < 1 {
+ *mMaxCpu = runtime.NumCPU()
+ }
+ runtime.GOMAXPROCS(*mMaxCpu)
+ if err := util.TestFolderWritable(*metaFolder); err != nil {
+ glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *metaFolder, err)
+ }
+ if *masterWhiteListOption != "" {
+ masterWhiteList = strings.Split(*masterWhiteListOption, ",")
+ }
+
+ r := mux.NewRouter()
+ ms := weed_server.NewMasterServer(r, *mport, *metaFolder,
+ *volumeSizeLimitMB, *mpulse, *confFile, *defaultReplicaPlacement, *garbageThreshold,
+ masterWhiteList, *masterSecureKey,
+ )
+
+ listeningAddress := *masterBindIp + ":" + strconv.Itoa(*mport)
+
+ glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", listeningAddress)
+
+ listener, e := util.NewListener(listeningAddress, time.Duration(*mTimeout)*time.Second)
+ if e != nil {
+ glog.Fatalf("Master startup error: %v", e)
+ }
+
+ go func() {
+ time.Sleep(100 * time.Millisecond)
+ myMasterAddress := *masterIp + ":" + strconv.Itoa(*mport)
+ var peers []string
+ if *masterPeers != "" {
+ peers = strings.Split(*masterPeers, ",")
+ }
+ raftServer := weed_server.NewRaftServer(r, peers, myMasterAddress, *metaFolder, ms.Topo, *mpulse)
+ ms.SetRaftServer(raftServer)
+ }()
+
+ if e := http.Serve(listener, r); e != nil {
+ glog.Fatalf("Fail to serve: %v", e)
+ }
+ return true
+}
diff --git a/weed/command/mount.go b/weed/command/mount.go
new file mode 100644
index 000000000..d6e87d76c
--- /dev/null
+++ b/weed/command/mount.go
@@ -0,0 +1,35 @@
+package command
+
+type MountOptions struct {
+ filer *string
+ dir *string
+}
+
+var (
+ mountOptions MountOptions
+)
+
+func init() {
+ cmdMount.Run = runMount // break init cycle
+ cmdMount.IsDebug = cmdMount.Flag.Bool("debug", false, "verbose debug information")
+ mountOptions.filer = cmdMount.Flag.String("filer", "localhost:8888", "weed filer location")
+ mountOptions.dir = cmdMount.Flag.String("dir", ".", "mount weed filer to this directory")
+}
+
+var cmdMount = &Command{
+ UsageLine: "mount -filer=localhost:8888 -dir=/some/dir",
+ Short: "mount weed filer to a directory as file system in userspace(FUSE)",
+ Long: `mount weed filer to userspace.
+
+ Pre-requisites:
+ 1) have SeaweedFS master and volume servers running
+ 2) have a "weed filer" running
+ These 2 requirements can be achieved with one command "weed server -filer=true"
+
+ This uses bazil.org/fuse, whichenables writing FUSE file systems on
+ Linux, and OS X.
+
+ On OS X, it requires OSXFUSE (http://osxfuse.github.com/).
+
+ `,
+}
diff --git a/weed/command/mount_notsupported.go b/weed/command/mount_notsupported.go
new file mode 100644
index 000000000..3bf22ddc4
--- /dev/null
+++ b/weed/command/mount_notsupported.go
@@ -0,0 +1,15 @@
+// +build !linux
+// +build !darwin
+
+package command
+
+import (
+ "fmt"
+ "runtime"
+)
+
+func runMount(cmd *Command, args []string) bool {
+ fmt.Printf("Mount is not supported on %s %s\n", runtime.GOOS, runtime.GOARCH)
+
+ return true
+}
diff --git a/weed/command/mount_std.go b/weed/command/mount_std.go
new file mode 100644
index 000000000..b086d8cbf
--- /dev/null
+++ b/weed/command/mount_std.go
@@ -0,0 +1,106 @@
+// +build linux darwin
+
+package command
+
+import (
+ "fmt"
+ "runtime"
+
+ "bazil.org/fuse"
+ "bazil.org/fuse/fs"
+ "github.com/chrislusf/seaweedfs/weed/filer"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "golang.org/x/net/context"
+)
+
+func runMount(cmd *Command, args []string) bool {
+ fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH)
+ if *mountOptions.dir == "" {
+ fmt.Printf("Please specify the mount directory via \"-dir\"")
+ return false
+ }
+
+ c, err := fuse.Mount(*mountOptions.dir)
+ if err != nil {
+ glog.Fatal(err)
+ return false
+ }
+
+ OnInterrupt(func() {
+ fuse.Unmount(*mountOptions.dir)
+ c.Close()
+ })
+
+ err = fs.Serve(c, WFS{})
+ if err != nil {
+ fuse.Unmount(*mountOptions.dir)
+ }
+
+ // check if the mount process has an error to report
+ <-c.Ready
+ if err := c.MountError; err != nil {
+ glog.Fatal(err)
+ }
+
+ return true
+}
+
+type File struct {
+ FileId filer.FileId
+ Name string
+}
+
+func (File) Attr(context context.Context, attr *fuse.Attr) error {
+ return nil
+}
+func (File) ReadAll(ctx context.Context) ([]byte, error) {
+ return []byte("hello, world\n"), nil
+}
+
+type Dir struct {
+ Path string
+ Id uint64
+}
+
+func (dir Dir) Attr(context context.Context, attr *fuse.Attr) error {
+ return nil
+}
+
+func (dir Dir) Lookup(ctx context.Context, name string) (fs.Node, error) {
+ files_result, e := filer.ListFiles(*mountOptions.filer, dir.Path, name)
+ if e != nil {
+ return nil, fuse.ENOENT
+ }
+ if len(files_result.Files) > 0 {
+ return File{files_result.Files[0].Id, files_result.Files[0].Name}, nil
+ }
+ return nil, fmt.Errorf("File Not Found for %s", name)
+}
+
+type WFS struct{}
+
+func (WFS) Root() (fs.Node, error) {
+ return Dir{}, nil
+}
+
+func (dir *Dir) ReadDir(ctx context.Context) ([]fuse.Dirent, error) {
+ var ret []fuse.Dirent
+ if dirs, e := filer.ListDirectories(*mountOptions.filer, dir.Path); e == nil {
+ for _, d := range dirs.Directories {
+ dirId := uint64(d.Id)
+ ret = append(ret, fuse.Dirent{Inode: dirId, Name: d.Name, Type: fuse.DT_Dir})
+ }
+ }
+ if files, e := filer.ListFiles(*mountOptions.filer, dir.Path, ""); e == nil {
+ for _, f := range files.Files {
+ if fileId, e := storage.ParseFileId(string(f.Id)); e == nil {
+ fileInode := uint64(fileId.VolumeId)<<48 + fileId.Key
+ ret = append(ret, fuse.Dirent{Inode: fileInode, Name: f.Name, Type: fuse.DT_File})
+ }
+
+ }
+ }
+ return ret, nil
+}
diff --git a/weed/command/server.go b/weed/command/server.go
new file mode 100644
index 000000000..6ed1e5228
--- /dev/null
+++ b/weed/command/server.go
@@ -0,0 +1,291 @@
+package command
+
+import (
+ "net/http"
+ "os"
+ "runtime"
+ "runtime/pprof"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/server"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/gorilla/mux"
+)
+
+type ServerOptions struct {
+ cpuprofile *string
+}
+
+var (
+ serverOptions ServerOptions
+ filerOptions FilerOptions
+)
+
+func init() {
+ cmdServer.Run = runServer // break init cycle
+}
+
+var cmdServer = &Command{
+ UsageLine: "server -port=8080 -dir=/tmp -volume.max=5 -ip=server_name",
+ Short: "start a server, including volume server, and automatically elect a master server",
+ Long: `start both a volume server to provide storage spaces
+ and a master server to provide volume=>location mapping service and sequence number of file ids
+
+ This is provided as a convenient way to start both volume server and master server.
+ The servers are exactly the same as starting them separately.
+
+ So other volume servers can use this embedded master server also.
+
+ Optionally, one filer server can be started. Logically, filer servers should not be in a cluster.
+ They run with meta data on disk, not shared. So each filer server is different.
+
+ `,
+}
+
+var (
+ serverIp = cmdServer.Flag.String("ip", "localhost", "ip or server name")
+ serverBindIp = cmdServer.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to")
+ serverMaxCpu = cmdServer.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs")
+ serverTimeout = cmdServer.Flag.Int("idleTimeout", 10, "connection idle seconds")
+ serverDataCenter = cmdServer.Flag.String("dataCenter", "", "current volume server's data center name")
+ serverRack = cmdServer.Flag.String("rack", "", "current volume server's rack name")
+ serverWhiteListOption = cmdServer.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.")
+ serverPeers = cmdServer.Flag.String("master.peers", "", "other master nodes in comma separated ip:masterPort list")
+ serverSecureKey = cmdServer.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)")
+ serverGarbageThreshold = cmdServer.Flag.String("garbageThreshold", "0.3", "threshold to vacuum and reclaim spaces")
+ masterPort = cmdServer.Flag.Int("master.port", 9333, "master server http listen port")
+ masterMetaFolder = cmdServer.Flag.String("master.dir", "", "data directory to store meta data, default to same as -dir specified")
+ masterVolumeSizeLimitMB = cmdServer.Flag.Uint("master.volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.")
+ masterConfFile = cmdServer.Flag.String("master.conf", "/etc/weedfs/weedfs.conf", "xml configuration file")
+ masterDefaultReplicaPlacement = cmdServer.Flag.String("master.defaultReplicaPlacement", "000", "Default replication type if not specified.")
+ volumePort = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port")
+ volumePublicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port")
+ volumeDataFolders = cmdServer.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...")
+ volumeMaxDataVolumeCounts = cmdServer.Flag.String("volume.max", "7", "maximum numbers of volumes, count[,count]...")
+ volumePulse = cmdServer.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats")
+ volumeIndexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|boltdb] mode for memory~performance balance.")
+ volumeFixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", true, "Adjust jpg orientation when uploading.")
+ volumeReadRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.")
+ volumeServerPublicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address")
+ isStartingFiler = cmdServer.Flag.Bool("filer", false, "whether to start filer")
+
+ serverWhiteList []string
+)
+
+func init() {
+ serverOptions.cpuprofile = cmdServer.Flag.String("cpuprofile", "", "cpu profile output file")
+ filerOptions.master = cmdServer.Flag.String("filer.master", "", "default to current master server")
+ filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
+ filerOptions.port = cmdServer.Flag.Int("filer.port", 8888, "filer server http listen port")
+ filerOptions.dir = cmdServer.Flag.String("filer.dir", "", "directory to store meta data, default to a 'filer' sub directory of what -mdir is specified")
+ filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "Default replication type if not specified during runtime.")
+ filerOptions.redirectOnRead = cmdServer.Flag.Bool("filer.redirectOnRead", false, "whether proxy or redirect to volume server during file GET request")
+ filerOptions.disableDirListing = cmdServer.Flag.Bool("filer.disableDirListing", false, "turn off directory listing")
+ filerOptions.cassandra_server = cmdServer.Flag.String("filer.cassandra.server", "", "host[:port] of the cassandra server")
+ filerOptions.cassandra_keyspace = cmdServer.Flag.String("filer.cassandra.keyspace", "seaweed", "keyspace of the cassandra server")
+ filerOptions.redis_server = cmdServer.Flag.String("filer.redis.server", "", "host:port of the redis server, e.g., 127.0.0.1:6379")
+ filerOptions.redis_password = cmdServer.Flag.String("filer.redis.password", "", "redis password in clear text")
+ filerOptions.redis_database = cmdServer.Flag.Int("filer.redis.database", 0, "the database on the redis server")
+}
+
+func runServer(cmd *Command, args []string) bool {
+ filerOptions.secretKey = serverSecureKey
+ if *serverOptions.cpuprofile != "" {
+ f, err := os.Create(*serverOptions.cpuprofile)
+ if err != nil {
+ glog.Fatal(err)
+ }
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+
+ if *filerOptions.redirectOnRead {
+ *isStartingFiler = true
+ }
+
+ *filerOptions.master = *serverIp + ":" + strconv.Itoa(*masterPort)
+
+ if *filerOptions.defaultReplicaPlacement == "" {
+ *filerOptions.defaultReplicaPlacement = *masterDefaultReplicaPlacement
+ }
+
+ if *volumePublicPort == 0 {
+ *volumePublicPort = *volumePort
+ }
+
+ if *serverMaxCpu < 1 {
+ *serverMaxCpu = runtime.NumCPU()
+ }
+ runtime.GOMAXPROCS(*serverMaxCpu)
+
+ folders := strings.Split(*volumeDataFolders, ",")
+ maxCountStrings := strings.Split(*volumeMaxDataVolumeCounts, ",")
+ var maxCounts []int
+ for _, maxString := range maxCountStrings {
+ if max, e := strconv.Atoi(maxString); e == nil {
+ maxCounts = append(maxCounts, max)
+ } else {
+ glog.Fatalf("The max specified in -max not a valid number %s", maxString)
+ }
+ }
+ if len(folders) != len(maxCounts) {
+ glog.Fatalf("%d directories by -dir, but only %d max is set by -max", len(folders), len(maxCounts))
+ }
+ for _, folder := range folders {
+ if err := util.TestFolderWritable(folder); err != nil {
+ glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err)
+ }
+ }
+
+ if *masterMetaFolder == "" {
+ *masterMetaFolder = folders[0]
+ }
+ if *isStartingFiler {
+ if *filerOptions.dir == "" {
+ *filerOptions.dir = *masterMetaFolder + "/filer"
+ os.MkdirAll(*filerOptions.dir, 0700)
+ }
+ if err := util.TestFolderWritable(*filerOptions.dir); err != nil {
+ glog.Fatalf("Check Mapping Meta Folder (-filer.dir=\"%s\") Writable: %s", *filerOptions.dir, err)
+ }
+ }
+ if err := util.TestFolderWritable(*masterMetaFolder); err != nil {
+ glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterMetaFolder, err)
+ }
+
+ if *serverWhiteListOption != "" {
+ serverWhiteList = strings.Split(*serverWhiteListOption, ",")
+ }
+
+ if *isStartingFiler {
+ go func() {
+ r := http.NewServeMux()
+ _, nfs_err := weed_server.NewFilerServer(r, *serverBindIp, *filerOptions.port, *filerOptions.master, *filerOptions.dir, *filerOptions.collection,
+ *filerOptions.defaultReplicaPlacement,
+ *filerOptions.redirectOnRead, *filerOptions.disableDirListing,
+ *filerOptions.secretKey,
+ *filerOptions.cassandra_server, *filerOptions.cassandra_keyspace,
+ *filerOptions.redis_server, *filerOptions.redis_password, *filerOptions.redis_database,
+ )
+ if nfs_err != nil {
+ glog.Fatalf("Filer startup error: %v", nfs_err)
+ }
+ glog.V(0).Infoln("Start Seaweed Filer", util.VERSION, "at port", strconv.Itoa(*filerOptions.port))
+ filerListener, e := util.NewListener(
+ ":"+strconv.Itoa(*filerOptions.port),
+ time.Duration(10)*time.Second,
+ )
+ if e != nil {
+ glog.Fatalf("Filer listener error: %v", e)
+ }
+ if e := http.Serve(filerListener, r); e != nil {
+ glog.Fatalf("Filer Fail to serve: %v", e)
+ }
+ }()
+ }
+
+ var raftWaitForMaster sync.WaitGroup
+ var volumeWait sync.WaitGroup
+
+ raftWaitForMaster.Add(1)
+ volumeWait.Add(1)
+
+ go func() {
+ r := mux.NewRouter()
+ ms := weed_server.NewMasterServer(r, *masterPort, *masterMetaFolder,
+ *masterVolumeSizeLimitMB, *volumePulse, *masterConfFile, *masterDefaultReplicaPlacement, *serverGarbageThreshold,
+ serverWhiteList, *serverSecureKey,
+ )
+
+ glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", *serverIp+":"+strconv.Itoa(*masterPort))
+ masterListener, e := util.NewListener(*serverBindIp+":"+strconv.Itoa(*masterPort), time.Duration(*serverTimeout)*time.Second)
+ if e != nil {
+ glog.Fatalf("Master startup error: %v", e)
+ }
+
+ go func() {
+ raftWaitForMaster.Wait()
+ time.Sleep(100 * time.Millisecond)
+ myAddress := *serverIp + ":" + strconv.Itoa(*masterPort)
+ var peers []string
+ if *serverPeers != "" {
+ peers = strings.Split(*serverPeers, ",")
+ }
+ raftServer := weed_server.NewRaftServer(r, peers, myAddress, *masterMetaFolder, ms.Topo, *volumePulse)
+ ms.SetRaftServer(raftServer)
+ volumeWait.Done()
+ }()
+
+ raftWaitForMaster.Done()
+ if e := http.Serve(masterListener, r); e != nil {
+ glog.Fatalf("Master Fail to serve:%s", e.Error())
+ }
+ }()
+
+ volumeWait.Wait()
+ time.Sleep(100 * time.Millisecond)
+ if *volumePublicPort == 0 {
+ *volumePublicPort = *volumePort
+ }
+ if *volumeServerPublicUrl == "" {
+ *volumeServerPublicUrl = *serverIp + ":" + strconv.Itoa(*volumePublicPort)
+ }
+ isSeperatedPublicPort := *volumePublicPort != *volumePort
+ volumeMux := http.NewServeMux()
+ publicVolumeMux := volumeMux
+ if isSeperatedPublicPort {
+ publicVolumeMux = http.NewServeMux()
+ }
+ volumeNeedleMapKind := storage.NeedleMapInMemory
+ switch *volumeIndexType {
+ case "leveldb":
+ volumeNeedleMapKind = storage.NeedleMapLevelDb
+ case "boltdb":
+ volumeNeedleMapKind = storage.NeedleMapBoltDb
+ }
+ volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux,
+ *serverIp, *volumePort, *volumeServerPublicUrl,
+ folders, maxCounts,
+ volumeNeedleMapKind,
+ *serverIp+":"+strconv.Itoa(*masterPort), *volumePulse, *serverDataCenter, *serverRack,
+ serverWhiteList, *volumeFixJpgOrientation, *volumeReadRedirect,
+ )
+
+ glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "at", *serverIp+":"+strconv.Itoa(*volumePort))
+ volumeListener, eListen := util.NewListener(
+ *serverBindIp+":"+strconv.Itoa(*volumePort),
+ time.Duration(*serverTimeout)*time.Second,
+ )
+ if eListen != nil {
+ glog.Fatalf("Volume server listener error: %v", eListen)
+ }
+ if isSeperatedPublicPort {
+ publicListeningAddress := *serverIp + ":" + strconv.Itoa(*volumePublicPort)
+ glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress)
+ publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*serverTimeout)*time.Second)
+ if e != nil {
+ glog.Fatalf("Volume server listener error:%v", e)
+ }
+ go func() {
+ if e := http.Serve(publicListener, publicVolumeMux); e != nil {
+ glog.Fatalf("Volume server fail to serve public: %v", e)
+ }
+ }()
+ }
+
+ OnInterrupt(func() {
+ volumeServer.Shutdown()
+ pprof.StopCPUProfile()
+ })
+
+ if e := http.Serve(volumeListener, volumeMux); e != nil {
+ glog.Fatalf("Volume server fail to serve:%v", e)
+ }
+
+ return true
+}
diff --git a/weed/command/shell.go b/weed/command/shell.go
new file mode 100644
index 000000000..19c5049c5
--- /dev/null
+++ b/weed/command/shell.go
@@ -0,0 +1,61 @@
+package command
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func init() {
+ cmdShell.Run = runShell // break init cycle
+}
+
+var cmdShell = &Command{
+ UsageLine: "shell",
+ Short: "run interactive commands, now just echo",
+ Long: `run interactive commands.
+
+ `,
+}
+
+var ()
+
+func runShell(command *Command, args []string) bool {
+ r := bufio.NewReader(os.Stdin)
+ o := bufio.NewWriter(os.Stdout)
+ e := bufio.NewWriter(os.Stderr)
+ prompt := func() {
+ var err error
+ if _, err = o.WriteString("> "); err != nil {
+ glog.V(0).Infoln("error writing to stdout:", err)
+ }
+ if err = o.Flush(); err != nil {
+ glog.V(0).Infoln("error flushing stdout:", err)
+ }
+ }
+ readLine := func() string {
+ ret, err := r.ReadString('\n')
+ if err != nil {
+ fmt.Fprint(e, err)
+ os.Exit(1)
+ }
+ return ret
+ }
+ execCmd := func(cmd string) int {
+ if cmd != "" {
+ if _, err := o.WriteString(cmd); err != nil {
+ glog.V(0).Infoln("error writing to stdout:", err)
+ }
+ }
+ return 0
+ }
+
+ cmd := ""
+ for {
+ prompt()
+ cmd = readLine()
+ execCmd(cmd)
+ }
+}
diff --git a/weed/command/signal_handling.go b/weed/command/signal_handling.go
new file mode 100644
index 000000000..182e2754d
--- /dev/null
+++ b/weed/command/signal_handling.go
@@ -0,0 +1,31 @@
+// +build !plan9
+
+package command
+
+import (
+ "os"
+ "os/signal"
+ "syscall"
+)
+
+func OnInterrupt(fn func()) {
+ // deal with control+c,etc
+ signalChan := make(chan os.Signal, 1)
+ // controlling terminal close, daemon not exit
+ signal.Ignore(syscall.SIGHUP)
+ signal.Notify(signalChan,
+ os.Interrupt,
+ os.Kill,
+ syscall.SIGALRM,
+ // syscall.SIGHUP,
+ syscall.SIGINT,
+ syscall.SIGTERM,
+ // syscall.SIGQUIT,
+ )
+ go func() {
+ for _ = range signalChan {
+ fn()
+ os.Exit(0)
+ }
+ }()
+}
diff --git a/weed/command/signal_handling_notsupported.go b/weed/command/signal_handling_notsupported.go
new file mode 100644
index 000000000..dfcc24a3e
--- /dev/null
+++ b/weed/command/signal_handling_notsupported.go
@@ -0,0 +1,6 @@
+// +build plan9
+
+package command
+
+func OnInterrupt(fn func()) {
+}
diff --git a/weed/command/upload.go b/weed/command/upload.go
new file mode 100644
index 000000000..0dfa115bb
--- /dev/null
+++ b/weed/command/upload.go
@@ -0,0 +1,108 @@
+package command
+
+import (
+ "encoding/json"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/security"
+)
+
+var (
+ upload UploadOptions
+)
+
+type UploadOptions struct {
+ server *string
+ dir *string
+ include *string
+ replication *string
+ collection *string
+ ttl *string
+ maxMB *int
+ secretKey *string
+}
+
+func init() {
+ cmdUpload.Run = runUpload // break init cycle
+ cmdUpload.IsDebug = cmdUpload.Flag.Bool("debug", false, "verbose debug information")
+ upload.server = cmdUpload.Flag.String("server", "localhost:9333", "SeaweedFS master location")
+ upload.dir = cmdUpload.Flag.String("dir", "", "Upload the whole folder recursively if specified.")
+ upload.include = cmdUpload.Flag.String("include", "", "pattens of files to upload, e.g., *.pdf, *.html, ab?d.txt, works together with -dir")
+ upload.replication = cmdUpload.Flag.String("replication", "", "replication type")
+ upload.collection = cmdUpload.Flag.String("collection", "", "optional collection name")
+ upload.ttl = cmdUpload.Flag.String("ttl", "", "time to live, e.g.: 1m, 1h, 1d, 1M, 1y")
+ upload.maxMB = cmdUpload.Flag.Int("maxMB", 0, "split files larger than the limit")
+ upload.secretKey = cmdUpload.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)")
+}
+
+var cmdUpload = &Command{
+ UsageLine: "upload -server=localhost:9333 file1 [file2 file3]\n weed upload -server=localhost:9333 -dir=one_directory -include=*.pdf",
+ Short: "upload one or a list of files",
+ Long: `upload one or a list of files, or batch upload one whole folder recursively.
+
+ If uploading a list of files:
+ It uses consecutive file keys for the list of files.
+ e.g. If the file1 uses key k, file2 can be read via k_1
+
+ If uploading a whole folder recursively:
+ All files under the folder and subfolders will be uploaded, each with its own file key.
+ Optional parameter "-include" allows you to specify the file name patterns.
+
+ If any file has a ".gz" extension, the content are considered gzipped already, and will be stored as is.
+ This can save volume server's gzipped processing and allow customizable gzip compression level.
+ The file name will strip out ".gz" and stored. For example, "jquery.js.gz" will be stored as "jquery.js".
+
+ If "maxMB" is set to a positive number, files larger than it would be split into chunks and uploaded separatedly.
+ The list of file ids of those chunks would be stored in an additional chunk, and this additional chunk's file id would be returned.
+
+ `,
+}
+
+func runUpload(cmd *Command, args []string) bool {
+ secret := security.Secret(*upload.secretKey)
+ if len(cmdUpload.Flag.Args()) == 0 {
+ if *upload.dir == "" {
+ return false
+ }
+ filepath.Walk(*upload.dir, func(path string, info os.FileInfo, err error) error {
+ if err == nil {
+ if !info.IsDir() {
+ if *upload.include != "" {
+ if ok, _ := filepath.Match(*upload.include, filepath.Base(path)); !ok {
+ return nil
+ }
+ }
+ parts, e := operation.NewFileParts([]string{path})
+ if e != nil {
+ return e
+ }
+ results, e := operation.SubmitFiles(*upload.server, parts,
+ *upload.replication, *upload.collection,
+ *upload.ttl, *upload.maxMB, secret)
+ bytes, _ := json.Marshal(results)
+ fmt.Println(string(bytes))
+ if e != nil {
+ return e
+ }
+ }
+ } else {
+ fmt.Println(err)
+ }
+ return err
+ })
+ } else {
+ parts, e := operation.NewFileParts(args)
+ if e != nil {
+ fmt.Println(e.Error())
+ }
+ results, _ := operation.SubmitFiles(*upload.server, parts,
+ *upload.replication, *upload.collection,
+ *upload.ttl, *upload.maxMB, secret)
+ bytes, _ := json.Marshal(results)
+ fmt.Println(string(bytes))
+ }
+ return true
+}
diff --git a/weed/command/version.go b/weed/command/version.go
new file mode 100644
index 000000000..8fdd68ec8
--- /dev/null
+++ b/weed/command/version.go
@@ -0,0 +1,24 @@
+package command
+
+import (
+ "fmt"
+ "runtime"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var cmdVersion = &Command{
+ Run: runVersion,
+ UsageLine: "version",
+ Short: "print SeaweedFS version",
+ Long: `Version prints the SeaweedFS version`,
+}
+
+func runVersion(cmd *Command, args []string) bool {
+ if len(args) != 0 {
+ cmd.Usage()
+ }
+
+ fmt.Printf("version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH)
+ return true
+}
diff --git a/weed/command/volume.go b/weed/command/volume.go
new file mode 100644
index 000000000..21369cbe9
--- /dev/null
+++ b/weed/command/volume.go
@@ -0,0 +1,165 @@
+package command
+
+import (
+ "net/http"
+ "os"
+ "runtime"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/server"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var (
+ v VolumeServerOptions
+)
+
+type VolumeServerOptions struct {
+ port *int
+ publicPort *int
+ folders []string
+ folderMaxLimits []int
+ ip *string
+ publicUrl *string
+ bindIp *string
+ master *string
+ pulseSeconds *int
+ idleConnectionTimeout *int
+ maxCpu *int
+ dataCenter *string
+ rack *string
+ whiteList []string
+ indexType *string
+ fixJpgOrientation *bool
+ readRedirect *bool
+}
+
+func init() {
+ cmdVolume.Run = runVolume // break init cycle
+ v.port = cmdVolume.Flag.Int("port", 8080, "http listen port")
+ v.publicPort = cmdVolume.Flag.Int("port.public", 0, "port opened to public")
+ v.ip = cmdVolume.Flag.String("ip", "", "ip or server name")
+ v.publicUrl = cmdVolume.Flag.String("publicUrl", "", "Publicly accessible address")
+ v.bindIp = cmdVolume.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to")
+ v.master = cmdVolume.Flag.String("mserver", "localhost:9333", "master server location")
+ v.pulseSeconds = cmdVolume.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats, must be smaller than or equal to the master's setting")
+ v.idleConnectionTimeout = cmdVolume.Flag.Int("idleTimeout", 10, "connection idle seconds")
+ v.maxCpu = cmdVolume.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs")
+ v.dataCenter = cmdVolume.Flag.String("dataCenter", "", "current volume server's data center name")
+ v.rack = cmdVolume.Flag.String("rack", "", "current volume server's rack name")
+ v.indexType = cmdVolume.Flag.String("index", "memory", "Choose [memory|leveldb|boltdb] mode for memory~performance balance.")
+ v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", true, "Adjust jpg orientation when uploading.")
+ v.readRedirect = cmdVolume.Flag.Bool("read.redirect", true, "Redirect moved or non-local volumes.")
+}
+
+var cmdVolume = &Command{
+ UsageLine: "volume -port=8080 -dir=/tmp -max=5 -ip=server_name -mserver=localhost:9333",
+ Short: "start a volume server",
+ Long: `start a volume server to provide storage spaces
+
+ `,
+}
+
+var (
+ volumeFolders = cmdVolume.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...")
+ maxVolumeCounts = cmdVolume.Flag.String("max", "7", "maximum numbers of volumes, count[,count]...")
+ volumeWhiteListOption = cmdVolume.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.")
+)
+
+func runVolume(cmd *Command, args []string) bool {
+ if *v.maxCpu < 1 {
+ *v.maxCpu = runtime.NumCPU()
+ }
+ runtime.GOMAXPROCS(*v.maxCpu)
+
+ //Set multiple folders and each folder's max volume count limit'
+ v.folders = strings.Split(*volumeFolders, ",")
+ maxCountStrings := strings.Split(*maxVolumeCounts, ",")
+ for _, maxString := range maxCountStrings {
+ if max, e := strconv.Atoi(maxString); e == nil {
+ v.folderMaxLimits = append(v.folderMaxLimits, max)
+ } else {
+ glog.Fatalf("The max specified in -max not a valid number %s", maxString)
+ }
+ }
+ if len(v.folders) != len(v.folderMaxLimits) {
+ glog.Fatalf("%d directories by -dir, but only %d max is set by -max", len(v.folders), len(v.folderMaxLimits))
+ }
+ for _, folder := range v.folders {
+ if err := util.TestFolderWritable(folder); err != nil {
+ glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err)
+ }
+ }
+
+ //security related white list configuration
+ if *volumeWhiteListOption != "" {
+ v.whiteList = strings.Split(*volumeWhiteListOption, ",")
+ }
+
+ if *v.ip == "" {
+ *v.ip = "127.0.0.1"
+ }
+
+ if *v.publicPort == 0 {
+ *v.publicPort = *v.port
+ }
+ if *v.publicUrl == "" {
+ *v.publicUrl = *v.ip + ":" + strconv.Itoa(*v.publicPort)
+ }
+ isSeperatedPublicPort := *v.publicPort != *v.port
+
+ volumeMux := http.NewServeMux()
+ publicVolumeMux := volumeMux
+ if isSeperatedPublicPort {
+ publicVolumeMux = http.NewServeMux()
+ }
+
+ volumeNeedleMapKind := storage.NeedleMapInMemory
+ switch *v.indexType {
+ case "leveldb":
+ volumeNeedleMapKind = storage.NeedleMapLevelDb
+ case "boltdb":
+ volumeNeedleMapKind = storage.NeedleMapBoltDb
+ }
+ volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux,
+ *v.ip, *v.port, *v.publicUrl,
+ v.folders, v.folderMaxLimits,
+ volumeNeedleMapKind,
+ *v.master, *v.pulseSeconds, *v.dataCenter, *v.rack,
+ v.whiteList,
+ *v.fixJpgOrientation, *v.readRedirect,
+ )
+
+ listeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.port)
+ glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "at", listeningAddress)
+ listener, e := util.NewListener(listeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second)
+ if e != nil {
+ glog.Fatalf("Volume server listener error:%v", e)
+ }
+ if isSeperatedPublicPort {
+ publicListeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.publicPort)
+ glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress)
+ publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second)
+ if e != nil {
+ glog.Fatalf("Volume server listener error:%v", e)
+ }
+ go func() {
+ if e := http.Serve(publicListener, publicVolumeMux); e != nil {
+ glog.Fatalf("Volume server fail to serve public: %v", e)
+ }
+ }()
+ }
+
+ OnInterrupt(func() {
+ volumeServer.Shutdown()
+ })
+
+ if e := http.Serve(listener, volumeMux); e != nil {
+ glog.Fatalf("Volume server fail to serve: %v", e)
+ }
+ return true
+}
diff --git a/weed/command/volume_test.go b/weed/command/volume_test.go
new file mode 100644
index 000000000..7399f1248
--- /dev/null
+++ b/weed/command/volume_test.go
@@ -0,0 +1,13 @@
+package command
+
+import (
+ "net/http"
+ "testing"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func TestXYZ(t *testing.T) {
+ glog.V(0).Infoln("Last-Modified", time.Unix(int64(1373273596), 0).UTC().Format(http.TimeFormat))
+}
diff --git a/weed/compress/compression_test.go b/weed/compress/compression_test.go
new file mode 100644
index 000000000..83b7c0055
--- /dev/null
+++ b/weed/compress/compression_test.go
@@ -0,0 +1,45 @@
+package compress
+
+import (
+ "math/rand"
+ "testing"
+)
+
+func TestSortedData(t *testing.T) {
+ data := make([]int32, 102400)
+ for i := 1; i < len(data); i++ {
+ data[i] = data[i-1] + rand.Int31n(15)
+ }
+ testCompressAndUncompress(t, data, "Sorted data")
+}
+
+func TestUnsortedData(t *testing.T) {
+ data := make([]int32, 102400)
+ for i := 0; i < len(data); i++ {
+ data[i] = rand.Int31n(255)
+ }
+ testCompressAndUncompress(t, data, "Unsorted data")
+}
+
+func testCompressAndUncompress(t *testing.T, data []int32, desc string) {
+
+ compressed_data, err := Compress32(data)
+ if err != nil {
+ t.Fatal("Compress error", err.Error())
+ }
+ uncompressed_data, err := Uncompress32(compressed_data, make([]int32, len(data)*2))
+ if err != nil {
+ t.Fatal("Compress error", err.Error())
+ }
+ if len(uncompressed_data) != len(data) {
+ t.Fatal("Len differs", len(data), len(uncompressed_data))
+ }
+ for i := 0; i < len(data); i++ {
+ if data[i] != uncompressed_data[i] {
+ t.Fatal("Data differs:", i, data[i], uncompressed_data[i])
+ }
+ }
+
+ println(desc, " Data length:", len(data), " => Compressed length:", len(compressed_data))
+
+}
diff --git a/weed/compress/delta_binary_pack32.go b/weed/compress/delta_binary_pack32.go
new file mode 100644
index 000000000..42ae8d42d
--- /dev/null
+++ b/weed/compress/delta_binary_pack32.go
@@ -0,0 +1,32 @@
+package compress
+
+import (
+ "github.com/reducedb/encoding/cursor"
+ "github.com/reducedb/encoding/delta/bp32"
+)
+
+// Compress compresses in[]int32 to out[]int32
+func Compress32(in []int32) (out []int32, err error) {
+ out = make([]int32, len(in)*2)
+ inpos := cursor.New()
+ outpos := cursor.New()
+
+ if err = bp32.New().Compress(in, inpos, len(in), out, outpos); err != nil {
+ return nil, err
+ }
+
+ return out[:outpos.Get()], nil
+}
+
+// Uncompress uncompresses in[]int32 to out[]int32
+func Uncompress32(in []int32, buffer []int32) (out []int32, err error) {
+ out = buffer
+ inpos := cursor.New()
+ outpos := cursor.New()
+
+ if err = bp32.New().Uncompress(in, inpos, len(in), out, outpos); err != nil {
+ return nil, err
+ }
+
+ return out[:outpos.Get()], nil
+}
diff --git a/weed/filer/cassandra_store/cassandra_store.go b/weed/filer/cassandra_store/cassandra_store.go
new file mode 100644
index 000000000..4ee2f65be
--- /dev/null
+++ b/weed/filer/cassandra_store/cassandra_store.go
@@ -0,0 +1,87 @@
+package cassandra_store
+
+import (
+ "fmt"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+
+ "github.com/gocql/gocql"
+)
+
+/*
+
+Basically you need a table just like this:
+
+CREATE TABLE seaweed_files (
+ path varchar,
+ fids list<varchar>,
+ PRIMARY KEY (path)
+);
+
+Need to match flat_namespace.FlatNamespaceStore interface
+ Put(fullFileName string, fid string) (err error)
+ Get(fullFileName string) (fid string, err error)
+ Delete(fullFileName string) (fid string, err error)
+
+*/
+type CassandraStore struct {
+ cluster *gocql.ClusterConfig
+ session *gocql.Session
+}
+
+func NewCassandraStore(keyspace string, hosts ...string) (c *CassandraStore, err error) {
+ c = &CassandraStore{}
+ c.cluster = gocql.NewCluster(hosts...)
+ c.cluster.Keyspace = keyspace
+ c.cluster.Consistency = gocql.Quorum
+ c.session, err = c.cluster.CreateSession()
+ if err != nil {
+ glog.V(0).Infof("Failed to open cassandra store, hosts %v, keyspace %s", hosts, keyspace)
+ }
+ return
+}
+
+func (c *CassandraStore) Put(fullFileName string, fid string) (err error) {
+ var input []string
+ input = append(input, fid)
+ if err := c.session.Query(
+ `INSERT INTO seaweed_files (path, fids) VALUES (?, ?)`,
+ fullFileName, input).Exec(); err != nil {
+ glog.V(0).Infof("Failed to save file %s with id %s: %v", fullFileName, fid, err)
+ return err
+ }
+ return nil
+}
+func (c *CassandraStore) Get(fullFileName string) (fid string, err error) {
+ var output []string
+ if err := c.session.Query(
+ `select fids FROM seaweed_files WHERE path = ? LIMIT 1`,
+ fullFileName).Consistency(gocql.One).Scan(&output); err != nil {
+ if err != gocql.ErrNotFound {
+ glog.V(0).Infof("Failed to find file %s: %v", fullFileName, fid, err)
+ }
+ }
+ if len(output) == 0 {
+ return "", fmt.Errorf("No file id found for %s", fullFileName)
+ }
+ return output[0], nil
+}
+
+// Currently the fid is not returned
+func (c *CassandraStore) Delete(fullFileName string) (fid string, err error) {
+ if err := c.session.Query(
+ `DELETE FROM seaweed_files WHERE path = ?`,
+ fullFileName).Exec(); err != nil {
+ if err != gocql.ErrNotFound {
+ glog.V(0).Infof("Failed to delete file %s: %v", fullFileName, err)
+ }
+ return "", err
+ }
+ return "", nil
+}
+
+func (c *CassandraStore) Close() {
+ if c.session != nil {
+ c.session.Close()
+ }
+}
diff --git a/weed/filer/cassandra_store/schema.cql b/weed/filer/cassandra_store/schema.cql
new file mode 100644
index 000000000..d6f2bb093
--- /dev/null
+++ b/weed/filer/cassandra_store/schema.cql
@@ -0,0 +1,22 @@
+/*
+
+Here is the CQL to create the table.CassandraStore
+
+Optionally you can adjust the keyspace name and replication settings.
+
+For production server, very likely you want to set replication_factor to 3
+
+*/
+
+create keyspace seaweed WITH replication = {
+ 'class':'SimpleStrategy',
+ 'replication_factor':1
+};
+
+use seaweed;
+
+CREATE TABLE seaweed_files (
+ path varchar,
+ fids list<varchar>,
+ PRIMARY KEY (path)
+);
diff --git a/weed/filer/client_operations.go b/weed/filer/client_operations.go
new file mode 100644
index 000000000..13e4854a4
--- /dev/null
+++ b/weed/filer/client_operations.go
@@ -0,0 +1,70 @@
+package filer
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+
+ "net/url"
+)
+
+type ApiRequest struct {
+ Command string //"listFiles", "listDirectories"
+ Directory string
+ FileName string
+}
+
+type ListFilesResult struct {
+ Files []FileEntry
+ Error string `json:"error,omitempty"`
+}
+
+func ListFiles(server string, directory string, fileName string) (*ListFilesResult, error) {
+ var ret ListFilesResult
+ if err := call(server, ApiRequest{Command: "listFiles", Directory: directory, FileName: fileName}, &ret); err == nil {
+ if ret.Error != "" {
+ return nil, errors.New(ret.Error)
+ }
+ return &ret, nil
+ } else {
+ return nil, err
+ }
+}
+
+type ListDirectoriesResult struct {
+ Directories []DirectoryEntry
+ Error string `json:"error,omitempty"`
+}
+
+func ListDirectories(server string, directory string) (*ListDirectoriesResult, error) {
+ var ret ListDirectoriesResult
+ if err := call(server, ApiRequest{Command: "listDirectories", Directory: directory}, &ret); err == nil {
+ if ret.Error != "" {
+ return nil, errors.New(ret.Error)
+ }
+ return &ret, nil
+ } else {
+ return nil, err
+ }
+}
+
+func call(server string, request ApiRequest, ret interface{}) error {
+ b, err := json.Marshal(request)
+ if err != nil {
+ fmt.Println("error:", err)
+ return nil
+ }
+ values := make(url.Values)
+ values.Add("request", string(b))
+ jsonBlob, err := util.Post("http://"+server+"/__api__", values)
+ if err != nil {
+ return err
+ }
+ err = json.Unmarshal(jsonBlob, ret)
+ if err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/weed/filer/embedded_filer/design.txt b/weed/filer/embedded_filer/design.txt
new file mode 100644
index 000000000..45fec8fbe
--- /dev/null
+++ b/weed/filer/embedded_filer/design.txt
@@ -0,0 +1,26 @@
+Design Assumptions:
+1. the number of directories are magnitudely smaller than the number of files
+2. unlimited number of files under any directories
+Phylosophy:
+ metadata for directories and files should be separated
+Design:
+ Store directories in normal map
+ all of directories hopefully all be in memory
+ efficient to move/rename/list_directories
+ Log directory changes to append only log file
+ Store files in sorted string table in <dir_id/filename> format
+ efficient to list_files, just simple iterator
+ efficient to locate files, binary search
+
+Testing:
+1. starting server, "weed server -filer=true"
+2. posting files to different folders
+curl -F "filename=@design.txt" "http://localhost:8888/sources/"
+curl -F "filename=@design.txt" "http://localhost:8888/design/"
+curl -F "filename=@directory.go" "http://localhost:8888/sources/weed/go/"
+curl -F "filename=@directory.go" "http://localhost:8888/sources/testing/go/"
+curl -F "filename=@filer.go" "http://localhost:8888/sources/weed/go/"
+curl -F "filename=@filer_in_leveldb.go" "http://localhost:8888/sources/weed/go/"
+curl "http://localhost:8888/?pretty=y"
+curl "http://localhost:8888/sources/weed/go/?pretty=y"
+curl "http://localhost:8888/sources/weed/go/?pretty=y"
diff --git a/weed/filer/embedded_filer/directory.go b/weed/filer/embedded_filer/directory.go
new file mode 100644
index 000000000..4d4bd1c59
--- /dev/null
+++ b/weed/filer/embedded_filer/directory.go
@@ -0,0 +1,15 @@
+package embedded_filer
+
+import (
+ "github.com/chrislusf/seaweedfs/weed/filer"
+)
+
+type DirectoryManager interface {
+ FindDirectory(dirPath string) (filer.DirectoryId, error)
+ ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error)
+ MakeDirectory(currentDirPath string, dirName string) (filer.DirectoryId, error)
+ MoveUnderDirectory(oldDirPath string, newParentDirPath string) error
+ DeleteDirectory(dirPath string) error
+ //functions used by FUSE
+ FindDirectoryById(filer.DirectoryId, error)
+}
diff --git a/weed/filer/embedded_filer/directory_in_map.go b/weed/filer/embedded_filer/directory_in_map.go
new file mode 100644
index 000000000..5100f3531
--- /dev/null
+++ b/weed/filer/embedded_filer/directory_in_map.go
@@ -0,0 +1,310 @@
+package embedded_filer
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/filer"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var writeLock sync.Mutex //serialize changes to dir.log
+
+type DirectoryEntryInMap struct {
+ sync.Mutex
+ Name string
+ Parent *DirectoryEntryInMap
+ subDirectories map[string]*DirectoryEntryInMap
+ Id filer.DirectoryId
+}
+
+func (de *DirectoryEntryInMap) getChild(dirName string) (*DirectoryEntryInMap, bool) {
+ de.Lock()
+ defer de.Unlock()
+ child, ok := de.subDirectories[dirName]
+ return child, ok
+}
+func (de *DirectoryEntryInMap) addChild(dirName string, child *DirectoryEntryInMap) {
+ de.Lock()
+ defer de.Unlock()
+ de.subDirectories[dirName] = child
+}
+func (de *DirectoryEntryInMap) removeChild(dirName string) {
+ de.Lock()
+ defer de.Unlock()
+ delete(de.subDirectories, dirName)
+}
+func (de *DirectoryEntryInMap) hasChildren() bool {
+ de.Lock()
+ defer de.Unlock()
+ return len(de.subDirectories) > 0
+}
+func (de *DirectoryEntryInMap) children() (dirNames []filer.DirectoryEntry) {
+ de.Lock()
+ defer de.Unlock()
+ for k, v := range de.subDirectories {
+ dirNames = append(dirNames, filer.DirectoryEntry{Name: k, Id: v.Id})
+ }
+ return dirNames
+}
+
+type DirectoryManagerInMap struct {
+ Root *DirectoryEntryInMap
+ max filer.DirectoryId
+ logFile *os.File
+ isLoading bool
+}
+
+func (dm *DirectoryManagerInMap) newDirectoryEntryInMap(parent *DirectoryEntryInMap, name string) (d *DirectoryEntryInMap, err error) {
+ d = &DirectoryEntryInMap{Name: name, Parent: parent, subDirectories: make(map[string]*DirectoryEntryInMap)}
+ var parts []string
+ for p := d; p != nil && p.Name != ""; p = p.Parent {
+ parts = append(parts, p.Name)
+ }
+ n := len(parts)
+ if n <= 0 {
+ return nil, fmt.Errorf("Failed to create folder %s/%s", parent.Name, name)
+ }
+ for i := 0; i < n/2; i++ {
+ parts[i], parts[n-1-i] = parts[n-1-i], parts[i]
+ }
+ dm.max++
+ d.Id = dm.max
+ dm.log("add", "/"+strings.Join(parts, "/"), strconv.Itoa(int(d.Id)))
+ return d, nil
+}
+
+func (dm *DirectoryManagerInMap) log(words ...string) {
+ if !dm.isLoading {
+ dm.logFile.WriteString(strings.Join(words, "\t") + "\n")
+ }
+}
+
+func NewDirectoryManagerInMap(dirLogFile string) (dm *DirectoryManagerInMap, err error) {
+ dm = &DirectoryManagerInMap{}
+ //dm.Root do not use newDirectoryEntryInMap, since dm.max will be changed
+ dm.Root = &DirectoryEntryInMap{subDirectories: make(map[string]*DirectoryEntryInMap)}
+ if dm.logFile, err = os.OpenFile(dirLogFile, os.O_RDWR|os.O_CREATE, 0644); err != nil {
+ return nil, fmt.Errorf("cannot write directory log file %s: %v", dirLogFile, err)
+ }
+ return dm, dm.load()
+}
+
+func (dm *DirectoryManagerInMap) processEachLine(line string) error {
+ if strings.HasPrefix(line, "#") {
+ return nil
+ }
+ if line == "" {
+ return nil
+ }
+ parts := strings.Split(line, "\t")
+ if len(parts) == 0 {
+ return nil
+ }
+ switch parts[0] {
+ case "add":
+ v, pe := strconv.Atoi(parts[2])
+ if pe != nil {
+ return pe
+ }
+ if e := dm.loadDirectory(parts[1], filer.DirectoryId(v)); e != nil {
+ return e
+ }
+ case "mov":
+ newName := ""
+ if len(parts) >= 4 {
+ newName = parts[3]
+ }
+ if e := dm.MoveUnderDirectory(parts[1], parts[2], newName); e != nil {
+ return e
+ }
+ case "del":
+ if e := dm.DeleteDirectory(parts[1]); e != nil {
+ return e
+ }
+ default:
+ fmt.Printf("line %s has %s!\n", line, parts[0])
+ return nil
+ }
+ return nil
+}
+func (dm *DirectoryManagerInMap) load() error {
+ dm.max = 0
+ lines := bufio.NewReader(dm.logFile)
+ dm.isLoading = true
+ defer func() { dm.isLoading = false }()
+ for {
+ line, err := util.Readln(lines)
+ if err != nil && err != io.EOF {
+ return err
+ }
+ if pe := dm.processEachLine(string(line)); pe != nil {
+ return pe
+ }
+ if err == io.EOF {
+ return nil
+ }
+ }
+}
+
+func (dm *DirectoryManagerInMap) findDirectory(dirPath string) (*DirectoryEntryInMap, error) {
+ if dirPath == "" {
+ return dm.Root, nil
+ }
+ dirPath = CleanFilePath(dirPath)
+ if dirPath == "/" {
+ return dm.Root, nil
+ }
+ parts := strings.Split(dirPath, "/")
+ dir := dm.Root
+ for i := 1; i < len(parts); i++ {
+ if sub, ok := dir.getChild(parts[i]); ok {
+ dir = sub
+ } else {
+ return dm.Root, fmt.Errorf("Directory %s Not Found", dirPath)
+ }
+ }
+ return dir, nil
+}
+func (dm *DirectoryManagerInMap) FindDirectory(dirPath string) (filer.DirectoryId, error) {
+ d, e := dm.findDirectory(dirPath)
+ if e == nil {
+ return d.Id, nil
+ }
+ return dm.Root.Id, e
+}
+
+func (dm *DirectoryManagerInMap) loadDirectory(dirPath string, dirId filer.DirectoryId) error {
+ dirPath = CleanFilePath(dirPath)
+ if dirPath == "/" {
+ return nil
+ }
+ parts := strings.Split(dirPath, "/")
+ dir := dm.Root
+ for i := 1; i < len(parts); i++ {
+ sub, ok := dir.getChild(parts[i])
+ if !ok {
+ writeLock.Lock()
+ if sub2, createdByOtherThread := dir.getChild(parts[i]); createdByOtherThread {
+ sub = sub2
+ } else {
+ if i != len(parts)-1 {
+ writeLock.Unlock()
+ return fmt.Errorf("%s should be created after parent %s", dirPath, parts[i])
+ }
+ var err error
+ sub, err = dm.newDirectoryEntryInMap(dir, parts[i])
+ if err != nil {
+ writeLock.Unlock()
+ return err
+ }
+ if sub.Id != dirId {
+ writeLock.Unlock()
+ // the dir.log should be the same order as in-memory directory id
+ return fmt.Errorf("%s should be have id %v instead of %v", dirPath, sub.Id, dirId)
+ }
+ dir.addChild(parts[i], sub)
+ }
+ writeLock.Unlock()
+ }
+ dir = sub
+ }
+ return nil
+}
+
+func (dm *DirectoryManagerInMap) makeDirectory(dirPath string) (dir *DirectoryEntryInMap, created bool) {
+ dirPath = CleanFilePath(dirPath)
+ if dirPath == "/" {
+ return dm.Root, false
+ }
+ parts := strings.Split(dirPath, "/")
+ dir = dm.Root
+ for i := 1; i < len(parts); i++ {
+ sub, ok := dir.getChild(parts[i])
+ if !ok {
+ writeLock.Lock()
+ if sub2, createdByOtherThread := dir.getChild(parts[i]); createdByOtherThread {
+ sub = sub2
+ } else {
+ var err error
+ sub, err = dm.newDirectoryEntryInMap(dir, parts[i])
+ if err != nil {
+ writeLock.Unlock()
+ return nil, false
+ }
+ dir.addChild(parts[i], sub)
+ created = true
+ }
+ writeLock.Unlock()
+ }
+ dir = sub
+ }
+ return dir, created
+}
+
+func (dm *DirectoryManagerInMap) MakeDirectory(dirPath string) (filer.DirectoryId, error) {
+ dir, _ := dm.makeDirectory(dirPath)
+ return dir.Id, nil
+}
+
+func (dm *DirectoryManagerInMap) MoveUnderDirectory(oldDirPath string, newParentDirPath string, newName string) error {
+ writeLock.Lock()
+ defer writeLock.Unlock()
+ oldDir, oe := dm.findDirectory(oldDirPath)
+ if oe != nil {
+ return oe
+ }
+ parentDir, pe := dm.findDirectory(newParentDirPath)
+ if pe != nil {
+ return pe
+ }
+ dm.log("mov", oldDirPath, newParentDirPath, newName)
+ oldDir.Parent.removeChild(oldDir.Name)
+ if newName == "" {
+ newName = oldDir.Name
+ }
+ parentDir.addChild(newName, oldDir)
+ oldDir.Name = newName
+ oldDir.Parent = parentDir
+ return nil
+}
+
+func (dm *DirectoryManagerInMap) ListDirectories(dirPath string) (dirNames []filer.DirectoryEntry, err error) {
+ d, e := dm.findDirectory(dirPath)
+ if e != nil {
+ return dirNames, e
+ }
+ return d.children(), nil
+}
+func (dm *DirectoryManagerInMap) DeleteDirectory(dirPath string) error {
+ writeLock.Lock()
+ defer writeLock.Unlock()
+ if dirPath == "/" {
+ return fmt.Errorf("Can not delete %s", dirPath)
+ }
+ d, e := dm.findDirectory(dirPath)
+ if e != nil {
+ return e
+ }
+ if d.hasChildren() {
+ return fmt.Errorf("dir %s still has sub directories", dirPath)
+ }
+ d.Parent.removeChild(d.Name)
+ d.Parent = nil
+ dm.log("del", dirPath)
+ return nil
+}
+
+func CleanFilePath(fp string) string {
+ ret := filepath.Clean(fp)
+ if os.PathSeparator == '\\' {
+ return strings.Replace(ret, "\\", "/", -1)
+ }
+ return ret
+}
diff --git a/weed/filer/embedded_filer/directory_test.go b/weed/filer/embedded_filer/directory_test.go
new file mode 100644
index 000000000..c8b3f1f30
--- /dev/null
+++ b/weed/filer/embedded_filer/directory_test.go
@@ -0,0 +1,86 @@
+package embedded_filer
+
+import (
+ "os"
+ "strings"
+ "testing"
+)
+
+func TestDirectory(t *testing.T) {
+ dm, _ := NewDirectoryManagerInMap("/tmp/dir.log")
+ defer func() {
+ if true {
+ os.Remove("/tmp/dir.log")
+ }
+ }()
+ dm.MakeDirectory("/a/b/c")
+ dm.MakeDirectory("/a/b/d")
+ dm.MakeDirectory("/a/b/e")
+ dm.MakeDirectory("/a/b/e/f")
+ dm.MakeDirectory("/a/b/e/f/g")
+ dm.MoveUnderDirectory("/a/b/e/f/g", "/a/b", "t")
+ if _, err := dm.FindDirectory("/a/b/e/f/g"); err == nil {
+ t.Fatal("/a/b/e/f/g should not exist any more after moving")
+ }
+ if _, err := dm.FindDirectory("/a/b/t"); err != nil {
+ t.Fatal("/a/b/t should exist after moving")
+ }
+ if _, err := dm.FindDirectory("/a/b/g"); err == nil {
+ t.Fatal("/a/b/g should not exist after moving")
+ }
+ dm.MoveUnderDirectory("/a/b/e/f", "/a/b", "")
+ if _, err := dm.FindDirectory("/a/b/f"); err != nil {
+ t.Fatal("/a/b/g should not exist after moving")
+ }
+ dm.MakeDirectory("/a/b/g/h/i")
+ dm.DeleteDirectory("/a/b/e/f")
+ dm.DeleteDirectory("/a/b/e")
+ dirNames, _ := dm.ListDirectories("/a/b/e")
+ for _, v := range dirNames {
+ println("sub1 dir:", v.Name, "id", v.Id)
+ }
+ dm.logFile.Close()
+
+ var path []string
+ printTree(dm.Root, path)
+
+ dm2, e := NewDirectoryManagerInMap("/tmp/dir.log")
+ if e != nil {
+ println("load error", e.Error())
+ }
+ if !compare(dm.Root, dm2.Root) {
+ t.Fatal("restored dir not the same!")
+ }
+ printTree(dm2.Root, path)
+}
+
+func printTree(node *DirectoryEntryInMap, path []string) {
+ println(strings.Join(path, "/") + "/" + node.Name)
+ path = append(path, node.Name)
+ for _, v := range node.subDirectories {
+ printTree(v, path)
+ }
+}
+
+func compare(root1 *DirectoryEntryInMap, root2 *DirectoryEntryInMap) bool {
+ if len(root1.subDirectories) != len(root2.subDirectories) {
+ return false
+ }
+ if root1.Name != root2.Name {
+ return false
+ }
+ if root1.Id != root2.Id {
+ return false
+ }
+ if !(root1.Parent == nil && root2.Parent == nil) {
+ if root1.Parent.Id != root2.Parent.Id {
+ return false
+ }
+ }
+ for k, v := range root1.subDirectories {
+ if !compare(v, root2.subDirectories[k]) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/weed/filer/embedded_filer/filer_embedded.go b/weed/filer/embedded_filer/filer_embedded.go
new file mode 100644
index 000000000..27299eb40
--- /dev/null
+++ b/weed/filer/embedded_filer/filer_embedded.go
@@ -0,0 +1,141 @@
+package embedded_filer
+
+import (
+ "errors"
+ "fmt"
+ "path/filepath"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/filer"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+)
+
+type FilerEmbedded struct {
+ master string
+ directories *DirectoryManagerInMap
+ files *FileListInLevelDb
+}
+
+func NewFilerEmbedded(master string, dir string) (filer *FilerEmbedded, err error) {
+ dm, de := NewDirectoryManagerInMap(filepath.Join(dir, "dir.log"))
+ if de != nil {
+ return nil, de
+ }
+ fl, fe := NewFileListInLevelDb(dir)
+ if fe != nil {
+ return nil, fe
+ }
+ filer = &FilerEmbedded{
+ master: master,
+ directories: dm,
+ files: fl,
+ }
+ return
+}
+
+func (filer *FilerEmbedded) CreateFile(filePath string, fid string) (err error) {
+ dir, file := filepath.Split(filePath)
+ dirId, e := filer.directories.MakeDirectory(dir)
+ if e != nil {
+ return e
+ }
+ return filer.files.CreateFile(dirId, file, fid)
+}
+func (filer *FilerEmbedded) FindFile(filePath string) (fid string, err error) {
+ dir, file := filepath.Split(filePath)
+ dirId, e := filer.directories.FindDirectory(dir)
+ if e != nil {
+ return "", e
+ }
+ return filer.files.FindFile(dirId, file)
+}
+func (filer *FilerEmbedded) FindDirectory(dirPath string) (dirId filer.DirectoryId, err error) {
+ return filer.directories.FindDirectory(dirPath)
+}
+func (filer *FilerEmbedded) ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) {
+ return filer.directories.ListDirectories(dirPath)
+}
+func (filer *FilerEmbedded) ListFiles(dirPath string, lastFileName string, limit int) (files []filer.FileEntry, err error) {
+ dirId, e := filer.directories.FindDirectory(dirPath)
+ if e != nil {
+ return nil, e
+ }
+ return filer.files.ListFiles(dirId, lastFileName, limit), nil
+}
+func (filer *FilerEmbedded) DeleteDirectory(dirPath string, recursive bool) (err error) {
+ dirId, e := filer.directories.FindDirectory(dirPath)
+ if e != nil {
+ return e
+ }
+ if sub_dirs, sub_err := filer.directories.ListDirectories(dirPath); sub_err == nil {
+ if len(sub_dirs) > 0 && !recursive {
+ return fmt.Errorf("Fail to delete directory %s: %d sub directories found!", dirPath, len(sub_dirs))
+ }
+ for _, sub := range sub_dirs {
+ if delete_sub_err := filer.DeleteDirectory(filepath.Join(dirPath, sub.Name), recursive); delete_sub_err != nil {
+ return delete_sub_err
+ }
+ }
+ }
+ list := filer.files.ListFiles(dirId, "", 100)
+ if len(list) != 0 && !recursive {
+ if !recursive {
+ return fmt.Errorf("Fail to delete non-empty directory %s!", dirPath)
+ }
+ }
+ for {
+ if len(list) == 0 {
+ return filer.directories.DeleteDirectory(dirPath)
+ }
+ var fids []string
+ for _, fileEntry := range list {
+ fids = append(fids, string(fileEntry.Id))
+ }
+ if result_list, delete_file_err := operation.DeleteFiles(filer.master, fids); delete_file_err != nil {
+ return delete_file_err
+ } else {
+ if len(result_list.Errors) > 0 {
+ return errors.New(strings.Join(result_list.Errors, "\n"))
+ }
+ }
+ lastFile := list[len(list)-1]
+ list = filer.files.ListFiles(dirId, lastFile.Name, 100)
+ }
+
+}
+
+func (filer *FilerEmbedded) DeleteFile(filePath string) (fid string, err error) {
+ dir, file := filepath.Split(filePath)
+ dirId, e := filer.directories.FindDirectory(dir)
+ if e != nil {
+ return "", e
+ }
+ return filer.files.DeleteFile(dirId, file)
+}
+
+/*
+Move a folder or a file, with 4 Use cases:
+mv fromDir toNewDir
+mv fromDir toOldDir
+mv fromFile toDir
+mv fromFile toFile
+*/
+func (filer *FilerEmbedded) Move(fromPath string, toPath string) error {
+ if _, dir_err := filer.FindDirectory(fromPath); dir_err == nil {
+ if _, err := filer.FindDirectory(toPath); err == nil {
+ // move folder under an existing folder
+ return filer.directories.MoveUnderDirectory(fromPath, toPath, "")
+ }
+ // move folder to a new folder
+ return filer.directories.MoveUnderDirectory(fromPath, filepath.Dir(toPath), filepath.Base(toPath))
+ }
+ if fid, file_err := filer.DeleteFile(fromPath); file_err == nil {
+ if _, err := filer.FindDirectory(toPath); err == nil {
+ // move file under an existing folder
+ return filer.CreateFile(filepath.Join(toPath, filepath.Base(fromPath)), fid)
+ }
+ // move to a folder with new name
+ return filer.CreateFile(toPath, fid)
+ }
+ return fmt.Errorf("File %s is not found!", fromPath)
+}
diff --git a/weed/filer/embedded_filer/files_in_leveldb.go b/weed/filer/embedded_filer/files_in_leveldb.go
new file mode 100644
index 000000000..19f6dd7e8
--- /dev/null
+++ b/weed/filer/embedded_filer/files_in_leveldb.go
@@ -0,0 +1,85 @@
+package embedded_filer
+
+import (
+ "bytes"
+
+ "github.com/chrislusf/seaweedfs/weed/filer"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/syndtr/goleveldb/leveldb"
+ "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+/*
+The entry in level db has this format:
+ key: genKey(dirId, fileName)
+ value: []byte(fid)
+And genKey(dirId, fileName) use first 4 bytes to store dirId, and rest for fileName
+*/
+
+type FileListInLevelDb struct {
+ db *leveldb.DB
+}
+
+func NewFileListInLevelDb(dir string) (fl *FileListInLevelDb, err error) {
+ fl = &FileListInLevelDb{}
+ if fl.db, err = leveldb.OpenFile(dir, nil); err != nil {
+ return
+ }
+ return
+}
+
+func genKey(dirId filer.DirectoryId, fileName string) []byte {
+ ret := make([]byte, 0, 4+len(fileName))
+ for i := 3; i >= 0; i-- {
+ ret = append(ret, byte(dirId>>(uint(i)*8)))
+ }
+ ret = append(ret, []byte(fileName)...)
+ return ret
+}
+
+func (fl *FileListInLevelDb) CreateFile(dirId filer.DirectoryId, fileName string, fid string) (err error) {
+ glog.V(4).Infoln("directory", dirId, "fileName", fileName, "fid", fid)
+ return fl.db.Put(genKey(dirId, fileName), []byte(fid), nil)
+}
+func (fl *FileListInLevelDb) DeleteFile(dirId filer.DirectoryId, fileName string) (fid string, err error) {
+ if fid, err = fl.FindFile(dirId, fileName); err != nil {
+ if err == leveldb.ErrNotFound {
+ return "", nil
+ }
+ return
+ }
+ err = fl.db.Delete(genKey(dirId, fileName), nil)
+ return fid, err
+}
+func (fl *FileListInLevelDb) FindFile(dirId filer.DirectoryId, fileName string) (fid string, err error) {
+ data, e := fl.db.Get(genKey(dirId, fileName), nil)
+ if e != nil {
+ return "", e
+ }
+ return string(data), nil
+}
+func (fl *FileListInLevelDb) ListFiles(dirId filer.DirectoryId, lastFileName string, limit int) (files []filer.FileEntry) {
+ glog.V(4).Infoln("directory", dirId, "lastFileName", lastFileName, "limit", limit)
+ dirKey := genKey(dirId, "")
+ iter := fl.db.NewIterator(&util.Range{Start: genKey(dirId, lastFileName)}, nil)
+ limitCounter := 0
+ for iter.Next() {
+ key := iter.Key()
+ if !bytes.HasPrefix(key, dirKey) {
+ break
+ }
+ fileName := string(key[len(dirKey):])
+ if fileName == lastFileName {
+ continue
+ }
+ limitCounter++
+ if limit > 0 {
+ if limitCounter > limit {
+ break
+ }
+ }
+ files = append(files, filer.FileEntry{Name: fileName, Id: filer.FileId(string(iter.Value()))})
+ }
+ iter.Release()
+ return
+}
diff --git a/weed/filer/filer.go b/weed/filer/filer.go
new file mode 100644
index 000000000..fd23e119c
--- /dev/null
+++ b/weed/filer/filer.go
@@ -0,0 +1,28 @@
+package filer
+
+type FileId string //file id in SeaweedFS
+
+type FileEntry struct {
+ Name string `json:"name,omitempty"` //file name without path
+ Id FileId `json:"fid,omitempty"`
+}
+
+type DirectoryId int32
+
+type DirectoryEntry struct {
+ Name string //dir name without path
+ Id DirectoryId
+}
+
+type Filer interface {
+ CreateFile(fullFileName string, fid string) (err error)
+ FindFile(fullFileName string) (fid string, err error)
+ DeleteFile(fullFileName string) (fid string, err error)
+
+ //Optional functions. embedded filer support these
+ FindDirectory(dirPath string) (dirId DirectoryId, err error)
+ ListDirectories(dirPath string) (dirs []DirectoryEntry, err error)
+ ListFiles(dirPath string, lastFileName string, limit int) (files []FileEntry, err error)
+ DeleteDirectory(dirPath string, recursive bool) (err error)
+ Move(fromPath string, toPath string) (err error)
+}
diff --git a/weed/filer/flat_namespace/flat_namespace_filer.go b/weed/filer/flat_namespace/flat_namespace_filer.go
new file mode 100644
index 000000000..c20fd2521
--- /dev/null
+++ b/weed/filer/flat_namespace/flat_namespace_filer.go
@@ -0,0 +1,50 @@
+package flat_namespace
+
+import (
+ "errors"
+
+ "github.com/chrislusf/seaweedfs/weed/filer"
+)
+
+type FlatNamespaceFiler struct {
+ master string
+ store FlatNamespaceStore
+}
+
+var (
+ ErrNotImplemented = errors.New("Not Implemented for flat namespace meta data store")
+)
+
+func NewFlatNamespaceFiler(master string, store FlatNamespaceStore) *FlatNamespaceFiler {
+ return &FlatNamespaceFiler{
+ master: master,
+ store: store,
+ }
+}
+
+func (filer *FlatNamespaceFiler) CreateFile(fullFileName string, fid string) (err error) {
+ return filer.store.Put(fullFileName, fid)
+}
+func (filer *FlatNamespaceFiler) FindFile(fullFileName string) (fid string, err error) {
+ return filer.store.Get(fullFileName)
+}
+func (filer *FlatNamespaceFiler) FindDirectory(dirPath string) (dirId filer.DirectoryId, err error) {
+ return 0, ErrNotImplemented
+}
+func (filer *FlatNamespaceFiler) ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) {
+ return nil, ErrNotImplemented
+}
+func (filer *FlatNamespaceFiler) ListFiles(dirPath string, lastFileName string, limit int) (files []filer.FileEntry, err error) {
+ return nil, ErrNotImplemented
+}
+func (filer *FlatNamespaceFiler) DeleteDirectory(dirPath string, recursive bool) (err error) {
+ return ErrNotImplemented
+}
+
+func (filer *FlatNamespaceFiler) DeleteFile(fullFileName string) (fid string, err error) {
+ return filer.store.Delete(fullFileName)
+}
+
+func (filer *FlatNamespaceFiler) Move(fromPath string, toPath string) error {
+ return ErrNotImplemented
+}
diff --git a/weed/filer/flat_namespace/flat_namespace_store.go b/weed/filer/flat_namespace/flat_namespace_store.go
new file mode 100644
index 000000000..832b70e40
--- /dev/null
+++ b/weed/filer/flat_namespace/flat_namespace_store.go
@@ -0,0 +1,9 @@
+package flat_namespace
+
+import ()
+
+type FlatNamespaceStore interface {
+ Put(fullFileName string, fid string) (err error)
+ Get(fullFileName string) (fid string, err error)
+ Delete(fullFileName string) (fid string, err error)
+}
diff --git a/weed/filer/redis_store/redis_store.go b/weed/filer/redis_store/redis_store.go
new file mode 100644
index 000000000..939172557
--- /dev/null
+++ b/weed/filer/redis_store/redis_store.go
@@ -0,0 +1,48 @@
+package redis_store
+
+import (
+ redis "gopkg.in/redis.v2"
+)
+
+type RedisStore struct {
+ Client *redis.Client
+}
+
+func NewRedisStore(hostPort string, password string, database int) *RedisStore {
+ client := redis.NewTCPClient(&redis.Options{
+ Addr: hostPort,
+ Password: password,
+ DB: int64(database),
+ })
+ return &RedisStore{Client: client}
+}
+
+func (s *RedisStore) Get(fullFileName string) (fid string, err error) {
+ fid, err = s.Client.Get(fullFileName).Result()
+ if err == redis.Nil {
+ err = nil
+ }
+ return fid, err
+}
+func (s *RedisStore) Put(fullFileName string, fid string) (err error) {
+ _, err = s.Client.Set(fullFileName, fid).Result()
+ if err == redis.Nil {
+ err = nil
+ }
+ return err
+}
+
+// Currently the fid is not returned
+func (s *RedisStore) Delete(fullFileName string) (fid string, err error) {
+ _, err = s.Client.Del(fullFileName).Result()
+ if err == redis.Nil {
+ err = nil
+ }
+ return "", err
+}
+
+func (s *RedisStore) Close() {
+ if s.Client != nil {
+ s.Client.Close()
+ }
+}
diff --git a/weed/glide.lock b/weed/glide.lock
new file mode 100644
index 000000000..047c89eae
--- /dev/null
+++ b/weed/glide.lock
@@ -0,0 +1,93 @@
+hash: fc2f4ccfa5d703b62b9c93047bae2295e6926ce1603c2164510c9010ff829945
+updated: 2016-06-02T12:26:29.018990826-07:00
+imports:
+- name: bazil.org/fuse
+ version: 5d02b06737b3b3c2e6a44e03348b6f2b44aa6835
+ subpackages:
+ - fs
+ - fuseutil
+- name: github.com/boltdb/bolt
+ version: dfb21201d9270c1082d5fb0f07f500311ff72f18
+- name: github.com/chrislusf/raft
+ version: 5f7ddd8f479583daf05879d3d3b174aa202c8fb7
+ subpackages:
+ - protobuf
+- name: github.com/dgrijalva/jwt-go
+ version: 9b486c879bab3fde556ce8c27d9a2bb05d5b2c60
+- name: github.com/disintegration/imaging
+ version: d8bbae1de109b518dabc98c6c1633eb358c148a4
+- name: github.com/gocql/gocql
+ version: b7b8a0e04b0cb0ca0b379421c58ec6fab9939b85
+ subpackages:
+ - internal/lru
+ - internal/murmur
+ - internal/streams
+- name: github.com/gogo/protobuf
+ version: 7883e1468d48d969e1c3ce4bcde89b6a7dd4adc4
+ subpackages:
+ - proto
+- name: github.com/golang/protobuf
+ version: 3b06fc7a4cad73efce5fe6217ab6c33e7231ab4a
+ subpackages:
+ - proto
+- name: github.com/golang/snappy
+ version: d9eb7a3d35ec988b8585d4a0068e462c27d28380
+- name: github.com/gorilla/context
+ version: aed02d124ae4a0e94fea4541c8effd05bf0c8296
+- name: github.com/gorilla/mux
+ version: bd09be08ed4377796d312df0a45314e11b8f5dc1
+- name: github.com/hailocab/go-hostpool
+ version: e80d13ce29ede4452c43dea11e79b9bc8a15b478
+- name: github.com/hashicorp/golang-lru
+ version: a0d98a5f288019575c6d1f4bb1573fef2d1fcdc4
+ subpackages:
+ - simplelru
+- name: github.com/klauspost/crc32
+ version: 19b0b332c9e4516a6370a0456e6182c3b5036720
+- name: github.com/reducedb/encoding
+ version: e31efcfdf1b19f4698709a6f522feaf101b1b1f4
+ subpackages:
+ - cursor
+ - delta/bp32
+ - bitpacking
+- name: github.com/rwcarlsen/goexif
+ version: 709fab3d192d7c62f86043caff1e7e3fb0f42bd8
+ subpackages:
+ - exif
+ - tiff
+- name: github.com/syndtr/goleveldb
+ version: cfa635847112c5dc4782e128fa7e0d05fdbfb394
+ subpackages:
+ - leveldb
+ - leveldb/util
+ - leveldb/cache
+ - leveldb/comparer
+ - leveldb/errors
+ - leveldb/filter
+ - leveldb/iterator
+ - leveldb/journal
+ - leveldb/memdb
+ - leveldb/opt
+ - leveldb/storage
+ - leveldb/table
+- name: golang.org/x/image
+ version: 97680175a5267bb8b31f1923e7a66df98013b11a
+ subpackages:
+ - bmp
+ - tiff
+ - tiff/lzw
+- name: golang.org/x/net
+ version: c4c3ea71919de159c9e246d7be66deb7f0a39a58
+ subpackages:
+ - context
+- name: golang.org/x/sys
+ version: 076b546753157f758b316e59bcb51e6807c04057
+ subpackages:
+ - unix
+- name: gopkg.in/bufio.v1
+ version: 567b2bfa514e796916c4747494d6ff5132a1dfce
+- name: gopkg.in/inf.v0
+ version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4
+- name: gopkg.in/redis.v2
+ version: e6179049628164864e6e84e973cfb56335748dea
+devImports: []
diff --git a/weed/glide.yaml b/weed/glide.yaml
new file mode 100644
index 000000000..e58b3679e
--- /dev/null
+++ b/weed/glide.yaml
@@ -0,0 +1,31 @@
+package: github.com/chrislusf/seaweedfs/weed
+import:
+- package: bazil.org/fuse
+ subpackages:
+ - fs
+- package: github.com/boltdb/bolt
+- package: github.com/chrislusf/raft
+- package: github.com/dgrijalva/jwt-go
+- package: github.com/disintegration/imaging
+- package: github.com/gocql/gocql
+- package: github.com/golang/protobuf
+ subpackages:
+ - proto
+- package: github.com/gorilla/mux
+- package: github.com/hashicorp/golang-lru
+- package: github.com/klauspost/crc32
+- package: github.com/reducedb/encoding
+ subpackages:
+ - cursor
+ - delta/bp32
+- package: github.com/rwcarlsen/goexif
+ subpackages:
+ - exif
+- package: github.com/syndtr/goleveldb
+ subpackages:
+ - leveldb
+ - leveldb/util
+- package: golang.org/x/net
+ subpackages:
+ - context
+- package: gopkg.in/redis.v2
diff --git a/weed/glog/LICENSE b/weed/glog/LICENSE
new file mode 100644
index 000000000..37ec93a14
--- /dev/null
+++ b/weed/glog/LICENSE
@@ -0,0 +1,191 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct or
+indirect, to cause the direction or management of such entity, whether by
+contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object code,
+generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made
+available under the License, as indicated by a copyright notice that is included
+in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative Works
+shall not include works that remain separable from, or merely link (or bind by
+name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative Works
+thereof, that is intentionally submitted to Licensor for inclusion in the Work
+by the copyright owner or by an individual or Legal Entity authorized to submit
+on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor for
+the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the Work and such
+Derivative Works in Source or Object form.
+
+3. Grant of Patent License.
+
+Subject to the terms and conditions of this License, each Contributor hereby
+grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
+irrevocable (except as stated in this section) patent license to make, have
+made, use, offer to sell, sell, import, and otherwise transfer the Work, where
+such license applies only to those patent claims licensable by such Contributor
+that are necessarily infringed by their Contribution(s) alone or by combination
+of their Contribution(s) with the Work to which such Contribution(s) was
+submitted. If You institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work or a
+Contribution incorporated within the Work constitutes direct or contributory
+patent infringement, then any patent licenses granted to You under this License
+for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution.
+
+You may reproduce and distribute copies of the Work or Derivative Works thereof
+in any medium, with or without modifications, and in Source or Object form,
+provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of
+this License; and
+You must cause any modified files to carry prominent notices stating that You
+changed the files; and
+You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source form
+of the Work, excluding those notices that do not pertain to any part of the
+Derivative Works; and
+If the Work includes a "NOTICE" text file as part of its distribution, then any
+Derivative Works that You distribute must include a readable copy of the
+attribution notices contained within such NOTICE file, excluding those notices
+that do not pertain to any part of the Derivative Works, in at least one of the
+following places: within a NOTICE text file distributed as part of the
+Derivative Works; within the Source form or documentation, if provided along
+with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents of
+the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works that
+You distribute, alongside or as an addendum to the NOTICE text from the Work,
+provided that such additional attribution notices cannot be construed as
+modifying the License.
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a whole,
+provided Your use, reproduction, and distribution of the Work otherwise complies
+with the conditions stated in this License.
+
+5. Submission of Contributions.
+
+Unless You explicitly state otherwise, any Contribution intentionally submitted
+for inclusion in the Work by You to the Licensor shall be under the terms and
+conditions of this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify the terms of
+any separate license agreement you may have executed with Licensor regarding
+such Contributions.
+
+6. Trademarks.
+
+This License does not grant permission to use the trade names, trademarks,
+service marks, or product names of the Licensor, except as required for
+reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty.
+
+Unless required by applicable law or agreed to in writing, Licensor provides the
+Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+including, without limitation, any warranties or conditions of TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
+solely responsible for determining the appropriateness of using or
+redistributing the Work and assume any risks associated with Your exercise of
+permissions under this License.
+
+8. Limitation of Liability.
+
+In no event and under no legal theory, whether in tort (including negligence),
+contract, or otherwise, unless required by applicable law (such as deliberate
+and grossly negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special, incidental,
+or consequential damages of any character arising as a result of this License or
+out of the use or inability to use the Work (including but not limited to
+damages for loss of goodwill, work stoppage, computer failure or malfunction, or
+any and all other commercial damages or losses), even if such Contributor has
+been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability.
+
+While redistributing the Work or Derivative Works thereof, You may choose to
+offer, and charge a fee for, acceptance of support, warranty, indemnity, or
+other liability obligations and/or rights consistent with this License. However,
+in accepting such obligations, You may act only on Your own behalf and on Your
+sole responsibility, not on behalf of any other Contributor, and only if You
+agree to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own
+identifying information. (Don't include the brackets!) The text should be
+enclosed in the appropriate comment syntax for the file format. We also
+recommend that a file or class name and description of purpose be included on
+the same "printed page" as the copyright notice for easier identification within
+third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/weed/glog/README b/weed/glog/README
new file mode 100644
index 000000000..5f9c11485
--- /dev/null
+++ b/weed/glog/README
@@ -0,0 +1,44 @@
+glog
+====
+
+Leveled execution logs for Go.
+
+This is an efficient pure Go implementation of leveled logs in the
+manner of the open source C++ package
+ http://code.google.com/p/google-glog
+
+By binding methods to booleans it is possible to use the log package
+without paying the expense of evaluating the arguments to the log.
+Through the -vmodule flag, the package also provides fine-grained
+control over logging at the file level.
+
+The comment from glog.go introduces the ideas:
+
+ Package glog implements logging analogous to the Google-internal
+ C++ INFO/ERROR/V setup. It provides functions Info, Warning,
+ Error, Fatal, plus formatting variants such as Infof. It
+ also provides V-style logging controlled by the -v and
+ -vmodule=file=2 flags.
+
+ Basic examples:
+
+ glog.Info("Prepare to repel boarders")
+
+ glog.Fatalf("Initialization failed: %s", err)
+
+ See the documentation for the V function for an explanation
+ of these examples:
+
+ if glog.V(2) {
+ glog.Info("Starting transaction...")
+ }
+
+ glog.V(2).Infoln("Processed", nItems, "elements")
+
+
+The repository contains an open source version of the log package
+used inside Google. The master copy of the source lives inside
+Google, not here. The code in this repo is for export only and is not itself
+under development. Feature requests will be ignored.
+
+Send bug reports to golang-nuts@googlegroups.com.
diff --git a/weed/glog/convenient_api.go b/weed/glog/convenient_api.go
new file mode 100644
index 000000000..cb43d60e2
--- /dev/null
+++ b/weed/glog/convenient_api.go
@@ -0,0 +1,6 @@
+package glog
+
+/*
+Copying the original glog because it is missing several convenient methods.
+1. remove nano time in log format
+*/
diff --git a/weed/glog/glog.go b/weed/glog/glog.go
new file mode 100644
index 000000000..abd5678d4
--- /dev/null
+++ b/weed/glog/glog.go
@@ -0,0 +1,1181 @@
+// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
+//
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package glog implements logging analogous to the Google-internal C++ INFO/ERROR/V setup.
+// It provides functions Info, Warning, Error, Fatal, plus formatting variants such as
+// Infof. It also provides V-style logging controlled by the -v and -vmodule=file=2 flags.
+//
+// Basic examples:
+//
+// glog.Info("Prepare to repel boarders")
+//
+// glog.Fatalf("Initialization failed: %s", err)
+//
+// See the documentation for the V function for an explanation of these examples:
+//
+// if glog.V(2) {
+// glog.Info("Starting transaction...")
+// }
+//
+// glog.V(2).Infoln("Processed", nItems, "elements")
+//
+// Log output is buffered and written periodically using Flush. Programs
+// should call Flush before exiting to guarantee all log output is written.
+//
+// By default, all log statements write to files in a temporary directory.
+// This package provides several flags that modify this behavior.
+// As a result, flag.Parse must be called before any logging is done.
+//
+// -logtostderr=false
+// Logs are written to standard error instead of to files.
+// -alsologtostderr=false
+// Logs are written to standard error as well as to files.
+// -stderrthreshold=ERROR
+// Log events at or above this severity are logged to standard
+// error as well as to files.
+// -log_dir=""
+// Log files will be written to this directory instead of the
+// default temporary directory.
+//
+// Other flags provide aids to debugging.
+//
+// -log_backtrace_at=""
+// When set to a file and line number holding a logging statement,
+// such as
+// -log_backtrace_at=gopherflakes.go:234
+// a stack trace will be written to the Info log whenever execution
+// hits that statement. (Unlike with -vmodule, the ".go" must be
+// present.)
+// -v=0
+// Enable V-leveled logging at the specified level.
+// -vmodule=""
+// The syntax of the argument is a comma-separated list of pattern=N,
+// where pattern is a literal file name (minus the ".go" suffix) or
+// "glob" pattern and N is a V level. For instance,
+// -vmodule=gopher*=3
+// sets the V level to 3 in all Go files whose names begin "gopher".
+//
+package glog
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ stdLog "log"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+// severity identifies the sort of log: info, warning etc. It also implements
+// the flag.Value interface. The -stderrthreshold flag is of type severity and
+// should be modified only through the flag.Value interface. The values match
+// the corresponding constants in C++.
+type severity int32 // sync/atomic int32
+
+// These constants identify the log levels in order of increasing severity.
+// A message written to a high-severity log file is also written to each
+// lower-severity log file.
+const (
+ infoLog severity = iota
+ warningLog
+ errorLog
+ fatalLog
+ numSeverity = 4
+)
+
+const severityChar = "IWEF"
+
+var severityName = []string{
+ infoLog: "INFO",
+ warningLog: "WARNING",
+ errorLog: "ERROR",
+ fatalLog: "FATAL",
+}
+
+// get returns the value of the severity.
+func (s *severity) get() severity {
+ return severity(atomic.LoadInt32((*int32)(s)))
+}
+
+// set sets the value of the severity.
+func (s *severity) set(val severity) {
+ atomic.StoreInt32((*int32)(s), int32(val))
+}
+
+// String is part of the flag.Value interface.
+func (s *severity) String() string {
+ return strconv.FormatInt(int64(*s), 10)
+}
+
+// Get is part of the flag.Value interface.
+func (s *severity) Get() interface{} {
+ return *s
+}
+
+// Set is part of the flag.Value interface.
+func (s *severity) Set(value string) error {
+ var threshold severity
+ // Is it a known name?
+ if v, ok := severityByName(value); ok {
+ threshold = v
+ } else {
+ v, err := strconv.Atoi(value)
+ if err != nil {
+ return err
+ }
+ threshold = severity(v)
+ }
+ logging.stderrThreshold.set(threshold)
+ return nil
+}
+
+func severityByName(s string) (severity, bool) {
+ s = strings.ToUpper(s)
+ for i, name := range severityName {
+ if name == s {
+ return severity(i), true
+ }
+ }
+ return 0, false
+}
+
+// OutputStats tracks the number of output lines and bytes written.
+type OutputStats struct {
+ lines int64
+ bytes int64
+}
+
+// Lines returns the number of lines written.
+func (s *OutputStats) Lines() int64 {
+ return atomic.LoadInt64(&s.lines)
+}
+
+// Bytes returns the number of bytes written.
+func (s *OutputStats) Bytes() int64 {
+ return atomic.LoadInt64(&s.bytes)
+}
+
+// Stats tracks the number of lines of output and number of bytes
+// per severity level. Values must be read with atomic.LoadInt64.
+var Stats struct {
+ Info, Warning, Error OutputStats
+}
+
+var severityStats = [numSeverity]*OutputStats{
+ infoLog: &Stats.Info,
+ warningLog: &Stats.Warning,
+ errorLog: &Stats.Error,
+}
+
+// Level is exported because it appears in the arguments to V and is
+// the type of the v flag, which can be set programmatically.
+// It's a distinct type because we want to discriminate it from logType.
+// Variables of type level are only changed under logging.mu.
+// The -v flag is read only with atomic ops, so the state of the logging
+// module is consistent.
+
+// Level is treated as a sync/atomic int32.
+
+// Level specifies a level of verbosity for V logs. *Level implements
+// flag.Value; the -v flag is of type Level and should be modified
+// only through the flag.Value interface.
+type Level int32
+
+// get returns the value of the Level.
+func (l *Level) get() Level {
+ return Level(atomic.LoadInt32((*int32)(l)))
+}
+
+// set sets the value of the Level.
+func (l *Level) set(val Level) {
+ atomic.StoreInt32((*int32)(l), int32(val))
+}
+
+// String is part of the flag.Value interface.
+func (l *Level) String() string {
+ return strconv.FormatInt(int64(*l), 10)
+}
+
+// Get is part of the flag.Value interface.
+func (l *Level) Get() interface{} {
+ return *l
+}
+
+// Set is part of the flag.Value interface.
+func (l *Level) Set(value string) error {
+ v, err := strconv.Atoi(value)
+ if err != nil {
+ return err
+ }
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ logging.setVState(Level(v), logging.vmodule.filter, false)
+ return nil
+}
+
+// moduleSpec represents the setting of the -vmodule flag.
+type moduleSpec struct {
+ filter []modulePat
+}
+
+// modulePat contains a filter for the -vmodule flag.
+// It holds a verbosity level and a file pattern to match.
+type modulePat struct {
+ pattern string
+ literal bool // The pattern is a literal string
+ level Level
+}
+
+// match reports whether the file matches the pattern. It uses a string
+// comparison if the pattern contains no metacharacters.
+func (m *modulePat) match(file string) bool {
+ if m.literal {
+ return file == m.pattern
+ }
+ match, _ := filepath.Match(m.pattern, file)
+ return match
+}
+
+func (m *moduleSpec) String() string {
+ // Lock because the type is not atomic. TODO: clean this up.
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ var b bytes.Buffer
+ for i, f := range m.filter {
+ if i > 0 {
+ b.WriteRune(',')
+ }
+ fmt.Fprintf(&b, "%s=%d", f.pattern, f.level)
+ }
+ return b.String()
+}
+
+// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the
+// struct is not exported.
+func (m *moduleSpec) Get() interface{} {
+ return nil
+}
+
+var errVmoduleSyntax = errors.New("syntax error: expect comma-separated list of filename=N")
+
+// Syntax: -vmodule=recordio=2,file=1,gfs*=3
+func (m *moduleSpec) Set(value string) error {
+ var filter []modulePat
+ for _, pat := range strings.Split(value, ",") {
+ if len(pat) == 0 {
+ // Empty strings such as from a trailing comma can be ignored.
+ continue
+ }
+ patLev := strings.Split(pat, "=")
+ if len(patLev) != 2 || len(patLev[0]) == 0 || len(patLev[1]) == 0 {
+ return errVmoduleSyntax
+ }
+ pattern := patLev[0]
+ v, err := strconv.Atoi(patLev[1])
+ if err != nil {
+ return errors.New("syntax error: expect comma-separated list of filename=N")
+ }
+ if v < 0 {
+ return errors.New("negative value for vmodule level")
+ }
+ if v == 0 {
+ continue // Ignore. It's harmless but no point in paying the overhead.
+ }
+ // TODO: check syntax of filter?
+ filter = append(filter, modulePat{pattern, isLiteral(pattern), Level(v)})
+ }
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ logging.setVState(logging.verbosity, filter, true)
+ return nil
+}
+
+// isLiteral reports whether the pattern is a literal string, that is, has no metacharacters
+// that require filepath.Match to be called to match the pattern.
+func isLiteral(pattern string) bool {
+ return !strings.ContainsAny(pattern, `\*?[]`)
+}
+
+// traceLocation represents the setting of the -log_backtrace_at flag.
+type traceLocation struct {
+ file string
+ line int
+}
+
+// isSet reports whether the trace location has been specified.
+// logging.mu is held.
+func (t *traceLocation) isSet() bool {
+ return t.line > 0
+}
+
+// match reports whether the specified file and line matches the trace location.
+// The argument file name is the full path, not the basename specified in the flag.
+// logging.mu is held.
+func (t *traceLocation) match(file string, line int) bool {
+ if t.line != line {
+ return false
+ }
+ if i := strings.LastIndex(file, "/"); i >= 0 {
+ file = file[i+1:]
+ }
+ return t.file == file
+}
+
+func (t *traceLocation) String() string {
+ // Lock because the type is not atomic. TODO: clean this up.
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ return fmt.Sprintf("%s:%d", t.file, t.line)
+}
+
+// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the
+// struct is not exported
+func (t *traceLocation) Get() interface{} {
+ return nil
+}
+
+var errTraceSyntax = errors.New("syntax error: expect file.go:234")
+
+// Syntax: -log_backtrace_at=gopherflakes.go:234
+// Note that unlike vmodule the file extension is included here.
+func (t *traceLocation) Set(value string) error {
+ if value == "" {
+ // Unset.
+ t.line = 0
+ t.file = ""
+ }
+ fields := strings.Split(value, ":")
+ if len(fields) != 2 {
+ return errTraceSyntax
+ }
+ file, line := fields[0], fields[1]
+ if !strings.Contains(file, ".") {
+ return errTraceSyntax
+ }
+ v, err := strconv.Atoi(line)
+ if err != nil {
+ return errTraceSyntax
+ }
+ if v <= 0 {
+ return errors.New("negative or zero value for level")
+ }
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ t.line = v
+ t.file = file
+ return nil
+}
+
+// flushSyncWriter is the interface satisfied by logging destinations.
+type flushSyncWriter interface {
+ Flush() error
+ Sync() error
+ io.Writer
+}
+
+func init() {
+ flag.BoolVar(&logging.toStderr, "logtostderr", false, "log to standard error instead of files")
+ flag.BoolVar(&logging.alsoToStderr, "alsologtostderr", true, "log to standard error as well as files")
+ flag.Var(&logging.verbosity, "v", "log level for V logs")
+ flag.Var(&logging.stderrThreshold, "stderrthreshold", "logs at or above this threshold go to stderr")
+ flag.Var(&logging.vmodule, "vmodule", "comma-separated list of pattern=N settings for file-filtered logging")
+ flag.Var(&logging.traceLocation, "log_backtrace_at", "when logging hits line file:N, emit a stack trace")
+
+ // Default stderrThreshold is ERROR.
+ logging.stderrThreshold = errorLog
+
+ logging.setVState(0, nil, false)
+ go logging.flushDaemon()
+}
+
+// Flush flushes all pending log I/O.
+func Flush() {
+ logging.lockAndFlushAll()
+}
+
+// loggingT collects all the global state of the logging setup.
+type loggingT struct {
+ // Boolean flags. Not handled atomically because the flag.Value interface
+ // does not let us avoid the =true, and that shorthand is necessary for
+ // compatibility. TODO: does this matter enough to fix? Seems unlikely.
+ toStderr bool // The -logtostderr flag.
+ alsoToStderr bool // The -alsologtostderr flag.
+
+ // Level flag. Handled atomically.
+ stderrThreshold severity // The -stderrthreshold flag.
+
+ // freeList is a list of byte buffers, maintained under freeListMu.
+ freeList *buffer
+ // freeListMu maintains the free list. It is separate from the main mutex
+ // so buffers can be grabbed and printed to without holding the main lock,
+ // for better parallelization.
+ freeListMu sync.Mutex
+
+ // mu protects the remaining elements of this structure and is
+ // used to synchronize logging.
+ mu sync.Mutex
+ // file holds writer for each of the log types.
+ file [numSeverity]flushSyncWriter
+ // pcs is used in V to avoid an allocation when computing the caller's PC.
+ pcs [1]uintptr
+ // vmap is a cache of the V Level for each V() call site, identified by PC.
+ // It is wiped whenever the vmodule flag changes state.
+ vmap map[uintptr]Level
+ // filterLength stores the length of the vmodule filter chain. If greater
+ // than zero, it means vmodule is enabled. It may be read safely
+ // using sync.LoadInt32, but is only modified under mu.
+ filterLength int32
+ // traceLocation is the state of the -log_backtrace_at flag.
+ traceLocation traceLocation
+ // These flags are modified only under lock, although verbosity may be fetched
+ // safely using atomic.LoadInt32.
+ vmodule moduleSpec // The state of the -vmodule flag.
+ verbosity Level // V logging level, the value of the -v flag/
+
+ // added by seaweedfs
+ exited bool
+}
+
+// buffer holds a byte Buffer for reuse. The zero value is ready for use.
+type buffer struct {
+ bytes.Buffer
+ tmp [64]byte // temporary byte array for creating headers.
+ next *buffer
+}
+
+var logging loggingT
+
+// setVState sets a consistent state for V logging.
+// l.mu is held.
+func (l *loggingT) setVState(verbosity Level, filter []modulePat, setFilter bool) {
+ // Turn verbosity off so V will not fire while we are in transition.
+ logging.verbosity.set(0)
+ // Ditto for filter length.
+ atomic.StoreInt32(&logging.filterLength, 0)
+
+ // Set the new filters and wipe the pc->Level map if the filter has changed.
+ if setFilter {
+ logging.vmodule.filter = filter
+ logging.vmap = make(map[uintptr]Level)
+ }
+
+ // Things are consistent now, so enable filtering and verbosity.
+ // They are enabled in order opposite to that in V.
+ atomic.StoreInt32(&logging.filterLength, int32(len(filter)))
+ logging.verbosity.set(verbosity)
+}
+
+// getBuffer returns a new, ready-to-use buffer.
+func (l *loggingT) getBuffer() *buffer {
+ l.freeListMu.Lock()
+ b := l.freeList
+ if b != nil {
+ l.freeList = b.next
+ }
+ l.freeListMu.Unlock()
+ if b == nil {
+ b = new(buffer)
+ } else {
+ b.next = nil
+ b.Reset()
+ }
+ return b
+}
+
+// putBuffer returns a buffer to the free list.
+func (l *loggingT) putBuffer(b *buffer) {
+ if b.Len() >= 256 {
+ // Let big buffers die a natural death.
+ return
+ }
+ l.freeListMu.Lock()
+ b.next = l.freeList
+ l.freeList = b
+ l.freeListMu.Unlock()
+}
+
+var timeNow = time.Now // Stubbed out for testing.
+
+/*
+header formats a log header as defined by the C++ implementation.
+It returns a buffer containing the formatted header and the user's file and line number.
+The depth specifies how many stack frames above lives the source line to be identified in the log message.
+
+Log lines have this form:
+ Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg...
+where the fields are defined as follows:
+ L A single character, representing the log level (eg 'I' for INFO)
+ mm The month (zero padded; ie May is '05')
+ dd The day (zero padded)
+ hh:mm:ss.uuuuuu Time in hours, minutes and fractional seconds
+ threadid The space-padded thread ID as returned by GetTID()
+ file The file name
+ line The line number
+ msg The user-supplied message
+*/
+func (l *loggingT) header(s severity, depth int) (*buffer, string, int) {
+ _, file, line, ok := runtime.Caller(3 + depth)
+ if !ok {
+ file = "???"
+ line = 1
+ } else {
+ slash := strings.LastIndex(file, "/")
+ if slash >= 0 {
+ file = file[slash+1:]
+ }
+ }
+ return l.formatHeader(s, file, line), file, line
+}
+
+// formatHeader formats a log header using the provided file name and line number.
+func (l *loggingT) formatHeader(s severity, file string, line int) *buffer {
+ now := timeNow()
+ if line < 0 {
+ line = 0 // not a real line number, but acceptable to someDigits
+ }
+ if s > fatalLog {
+ s = infoLog // for safety.
+ }
+ buf := l.getBuffer()
+
+ // Avoid Fprintf, for speed. The format is so simple that we can do it quickly by hand.
+ // It's worth about 3X. Fprintf is hard.
+ _, month, day := now.Date()
+ hour, minute, second := now.Clock()
+ // Lmmdd hh:mm:ss.uuuuuu threadid file:line]
+ buf.tmp[0] = severityChar[s]
+ buf.twoDigits(1, int(month))
+ buf.twoDigits(3, day)
+ buf.tmp[5] = ' '
+ buf.twoDigits(6, hour)
+ buf.tmp[8] = ':'
+ buf.twoDigits(9, minute)
+ buf.tmp[11] = ':'
+ buf.twoDigits(12, second)
+ buf.tmp[14] = ' '
+ buf.nDigits(5, 15, pid, ' ') // TODO: should be TID
+ buf.tmp[20] = ' '
+ buf.Write(buf.tmp[:21])
+ buf.WriteString(file)
+ buf.tmp[0] = ':'
+ n := buf.someDigits(1, line)
+ buf.tmp[n+1] = ']'
+ buf.tmp[n+2] = ' '
+ buf.Write(buf.tmp[:n+3])
+ return buf
+}
+
+// Some custom tiny helper functions to print the log header efficiently.
+
+const digits = "0123456789"
+
+// twoDigits formats a zero-prefixed two-digit integer at buf.tmp[i].
+func (buf *buffer) twoDigits(i, d int) {
+ buf.tmp[i+1] = digits[d%10]
+ d /= 10
+ buf.tmp[i] = digits[d%10]
+}
+
+// nDigits formats an n-digit integer at buf.tmp[i],
+// padding with pad on the left.
+// It assumes d >= 0.
+func (buf *buffer) nDigits(n, i, d int, pad byte) {
+ j := n - 1
+ for ; j >= 0 && d > 0; j-- {
+ buf.tmp[i+j] = digits[d%10]
+ d /= 10
+ }
+ for ; j >= 0; j-- {
+ buf.tmp[i+j] = pad
+ }
+}
+
+// someDigits formats a zero-prefixed variable-width integer at buf.tmp[i].
+func (buf *buffer) someDigits(i, d int) int {
+ // Print into the top, then copy down. We know there's space for at least
+ // a 10-digit number.
+ j := len(buf.tmp)
+ for {
+ j--
+ buf.tmp[j] = digits[d%10]
+ d /= 10
+ if d == 0 {
+ break
+ }
+ }
+ return copy(buf.tmp[i:], buf.tmp[j:])
+}
+
+func (l *loggingT) println(s severity, args ...interface{}) {
+ buf, file, line := l.header(s, 0)
+ fmt.Fprintln(buf, args...)
+ l.output(s, buf, file, line, false)
+}
+
+func (l *loggingT) print(s severity, args ...interface{}) {
+ l.printDepth(s, 1, args...)
+}
+
+func (l *loggingT) printDepth(s severity, depth int, args ...interface{}) {
+ buf, file, line := l.header(s, depth)
+ fmt.Fprint(buf, args...)
+ if buf.Bytes()[buf.Len()-1] != '\n' {
+ buf.WriteByte('\n')
+ }
+ l.output(s, buf, file, line, false)
+}
+
+func (l *loggingT) printf(s severity, format string, args ...interface{}) {
+ buf, file, line := l.header(s, 0)
+ fmt.Fprintf(buf, format, args...)
+ if buf.Bytes()[buf.Len()-1] != '\n' {
+ buf.WriteByte('\n')
+ }
+ l.output(s, buf, file, line, false)
+}
+
+// printWithFileLine behaves like print but uses the provided file and line number. If
+// alsoLogToStderr is true, the log message always appears on standard error; it
+// will also appear in the log file unless --logtostderr is set.
+func (l *loggingT) printWithFileLine(s severity, file string, line int, alsoToStderr bool, args ...interface{}) {
+ buf := l.formatHeader(s, file, line)
+ fmt.Fprint(buf, args...)
+ if buf.Bytes()[buf.Len()-1] != '\n' {
+ buf.WriteByte('\n')
+ }
+ l.output(s, buf, file, line, alsoToStderr)
+}
+
+// output writes the data to the log files and releases the buffer.
+func (l *loggingT) output(s severity, buf *buffer, file string, line int, alsoToStderr bool) {
+ l.mu.Lock()
+ if l.traceLocation.isSet() {
+ if l.traceLocation.match(file, line) {
+ buf.Write(stacks(false))
+ }
+ }
+ data := buf.Bytes()
+ if l.toStderr {
+ os.Stderr.Write(data)
+ } else {
+ if alsoToStderr || l.alsoToStderr || s >= l.stderrThreshold.get() {
+ os.Stderr.Write(data)
+ }
+ if l.file[s] == nil {
+ if err := l.createFiles(s); err != nil {
+ os.Stderr.Write(data) // Make sure the message appears somewhere.
+ l.exit(err)
+ }
+ }
+ switch s {
+ case fatalLog:
+ l.file[fatalLog].Write(data)
+ fallthrough
+ case errorLog:
+ l.file[errorLog].Write(data)
+ fallthrough
+ case warningLog:
+ l.file[warningLog].Write(data)
+ fallthrough
+ case infoLog:
+ l.file[infoLog].Write(data)
+ }
+ }
+ if s == fatalLog {
+ // If we got here via Exit rather than Fatal, print no stacks.
+ if atomic.LoadUint32(&fatalNoStacks) > 0 {
+ l.mu.Unlock()
+ timeoutFlush(10 * time.Second)
+ os.Exit(1)
+ }
+ // Dump all goroutine stacks before exiting.
+ // First, make sure we see the trace for the current goroutine on standard error.
+ // If -logtostderr has been specified, the loop below will do that anyway
+ // as the first stack in the full dump.
+ if !l.toStderr {
+ os.Stderr.Write(stacks(false))
+ }
+ // Write the stack trace for all goroutines to the files.
+ trace := stacks(true)
+ logExitFunc = func(error) {} // If we get a write error, we'll still exit below.
+ for log := fatalLog; log >= infoLog; log-- {
+ if f := l.file[log]; f != nil { // Can be nil if -logtostderr is set.
+ f.Write(trace)
+ }
+ }
+ l.mu.Unlock()
+ timeoutFlush(10 * time.Second)
+ os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway.
+ }
+ l.putBuffer(buf)
+ l.mu.Unlock()
+ if stats := severityStats[s]; stats != nil {
+ atomic.AddInt64(&stats.lines, 1)
+ atomic.AddInt64(&stats.bytes, int64(len(data)))
+ }
+}
+
+// timeoutFlush calls Flush and returns when it completes or after timeout
+// elapses, whichever happens first. This is needed because the hooks invoked
+// by Flush may deadlock when glog.Fatal is called from a hook that holds
+// a lock.
+func timeoutFlush(timeout time.Duration) {
+ done := make(chan bool, 1)
+ go func() {
+ Flush() // calls logging.lockAndFlushAll()
+ done <- true
+ }()
+ select {
+ case <-done:
+ case <-time.After(timeout):
+ fmt.Fprintln(os.Stderr, "glog: Flush took longer than", timeout)
+ }
+}
+
+// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines.
+func stacks(all bool) []byte {
+ // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though.
+ n := 10000
+ if all {
+ n = 100000
+ }
+ var trace []byte
+ for i := 0; i < 5; i++ {
+ trace = make([]byte, n)
+ nbytes := runtime.Stack(trace, all)
+ if nbytes < len(trace) {
+ return trace[:nbytes]
+ }
+ n *= 2
+ }
+ return trace
+}
+
+// logExitFunc provides a simple mechanism to override the default behavior
+// of exiting on error. Used in testing and to guarantee we reach a required exit
+// for fatal logs. Instead, exit could be a function rather than a method but that
+// would make its use clumsier.
+var logExitFunc func(error)
+
+// exit is called if there is trouble creating or writing log files.
+// It flushes the logs and exits the program; there's no point in hanging around.
+// l.mu is held.
+func (l *loggingT) exit(err error) {
+ fmt.Fprintf(os.Stderr, "glog: exiting because of error: %s\n", err)
+ // If logExitFunc is set, we do that instead of exiting.
+ if logExitFunc != nil {
+ logExitFunc(err)
+ return
+ }
+ l.flushAll()
+ l.exited = true // os.Exit(2)
+}
+
+// syncBuffer joins a bufio.Writer to its underlying file, providing access to the
+// file's Sync method and providing a wrapper for the Write method that provides log
+// file rotation. There are conflicting methods, so the file cannot be embedded.
+// l.mu is held for all its methods.
+type syncBuffer struct {
+ logger *loggingT
+ *bufio.Writer
+ file *os.File
+ sev severity
+ nbytes uint64 // The number of bytes written to this file
+}
+
+func (sb *syncBuffer) Sync() error {
+ return sb.file.Sync()
+}
+
+func (sb *syncBuffer) Write(p []byte) (n int, err error) {
+ if sb.logger.exited {
+ return
+ }
+ if sb.nbytes+uint64(len(p)) >= MaxSize {
+ if err := sb.rotateFile(time.Now()); err != nil {
+ sb.logger.exit(err)
+ }
+ }
+ n, err = sb.Writer.Write(p)
+ sb.nbytes += uint64(n)
+ if err != nil {
+ sb.logger.exit(err)
+ }
+ return
+}
+
+// rotateFile closes the syncBuffer's file and starts a new one.
+func (sb *syncBuffer) rotateFile(now time.Time) error {
+ if sb.file != nil {
+ sb.Flush()
+ sb.file.Close()
+ }
+ var err error
+ sb.file, _, err = create(severityName[sb.sev], now)
+ sb.nbytes = 0
+ if err != nil {
+ return err
+ }
+
+ sb.Writer = bufio.NewWriterSize(sb.file, bufferSize)
+
+ // Write header.
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "Log file created at: %s\n", now.Format("2006/01/02 15:04:05"))
+ fmt.Fprintf(&buf, "Running on machine: %s\n", host)
+ fmt.Fprintf(&buf, "Binary: Built with %s %s for %s/%s\n", runtime.Compiler, runtime.Version(), runtime.GOOS, runtime.GOARCH)
+ fmt.Fprintf(&buf, "Log line format: [IWEF]mmdd hh:mm:ss threadid file:line] msg\n")
+ n, err := sb.file.Write(buf.Bytes())
+ sb.nbytes += uint64(n)
+ return err
+}
+
+// bufferSize sizes the buffer associated with each log file. It's large
+// so that log records can accumulate without the logging thread blocking
+// on disk I/O. The flushDaemon will block instead.
+const bufferSize = 256 * 1024
+
+// createFiles creates all the log files for severity from sev down to infoLog.
+// l.mu is held.
+func (l *loggingT) createFiles(sev severity) error {
+ now := time.Now()
+ // Files are created in decreasing severity order, so as soon as we find one
+ // has already been created, we can stop.
+ for s := sev; s >= infoLog && l.file[s] == nil; s-- {
+ sb := &syncBuffer{
+ logger: l,
+ sev: s,
+ }
+ if err := sb.rotateFile(now); err != nil {
+ return err
+ }
+ l.file[s] = sb
+ }
+ return nil
+}
+
+const flushInterval = 30 * time.Second
+
+// flushDaemon periodically flushes the log file buffers.
+func (l *loggingT) flushDaemon() {
+ for _ = range time.NewTicker(flushInterval).C {
+ l.lockAndFlushAll()
+ }
+}
+
+// lockAndFlushAll is like flushAll but locks l.mu first.
+func (l *loggingT) lockAndFlushAll() {
+ l.mu.Lock()
+ l.flushAll()
+ l.mu.Unlock()
+}
+
+// flushAll flushes all the logs and attempts to "sync" their data to disk.
+// l.mu is held.
+func (l *loggingT) flushAll() {
+ // Flush from fatal down, in case there's trouble flushing.
+ for s := fatalLog; s >= infoLog; s-- {
+ file := l.file[s]
+ if file != nil {
+ file.Flush() // ignore error
+ file.Sync() // ignore error
+ }
+ }
+}
+
+// CopyStandardLogTo arranges for messages written to the Go "log" package's
+// default logs to also appear in the Google logs for the named and lower
+// severities. Subsequent changes to the standard log's default output location
+// or format may break this behavior.
+//
+// Valid names are "INFO", "WARNING", "ERROR", and "FATAL". If the name is not
+// recognized, CopyStandardLogTo panics.
+func CopyStandardLogTo(name string) {
+ sev, ok := severityByName(name)
+ if !ok {
+ panic(fmt.Sprintf("log.CopyStandardLogTo(%q): unrecognized severity name", name))
+ }
+ // Set a log format that captures the user's file and line:
+ // d.go:23: message
+ stdLog.SetFlags(stdLog.Lshortfile)
+ stdLog.SetOutput(logBridge(sev))
+}
+
+// logBridge provides the Write method that enables CopyStandardLogTo to connect
+// Go's standard logs to the logs provided by this package.
+type logBridge severity
+
+// Write parses the standard logging line and passes its components to the
+// logger for severity(lb).
+func (lb logBridge) Write(b []byte) (n int, err error) {
+ var (
+ file = "???"
+ line = 1
+ text string
+ )
+ // Split "d.go:23: message" into "d.go", "23", and "message".
+ if parts := bytes.SplitN(b, []byte{':'}, 3); len(parts) != 3 || len(parts[0]) < 1 || len(parts[2]) < 1 {
+ text = fmt.Sprintf("bad log format: %s", b)
+ } else {
+ file = string(parts[0])
+ text = string(parts[2][1:]) // skip leading space
+ line, err = strconv.Atoi(string(parts[1]))
+ if err != nil {
+ text = fmt.Sprintf("bad line number: %s", b)
+ line = 1
+ }
+ }
+ // printWithFileLine with alsoToStderr=true, so standard log messages
+ // always appear on standard error.
+ logging.printWithFileLine(severity(lb), file, line, true, text)
+ return len(b), nil
+}
+
+// setV computes and remembers the V level for a given PC
+// when vmodule is enabled.
+// File pattern matching takes the basename of the file, stripped
+// of its .go suffix, and uses filepath.Match, which is a little more
+// general than the *? matching used in C++.
+// l.mu is held.
+func (l *loggingT) setV(pc uintptr) Level {
+ fn := runtime.FuncForPC(pc)
+ file, _ := fn.FileLine(pc)
+ // The file is something like /a/b/c/d.go. We want just the d.
+ if strings.HasSuffix(file, ".go") {
+ file = file[:len(file)-3]
+ }
+ if slash := strings.LastIndex(file, "/"); slash >= 0 {
+ file = file[slash+1:]
+ }
+ for _, filter := range l.vmodule.filter {
+ if filter.match(file) {
+ l.vmap[pc] = filter.level
+ return filter.level
+ }
+ }
+ l.vmap[pc] = 0
+ return 0
+}
+
+// Verbose is a boolean type that implements Infof (like Printf) etc.
+// See the documentation of V for more information.
+type Verbose bool
+
+// V reports whether verbosity at the call site is at least the requested level.
+// The returned value is a boolean of type Verbose, which implements Info, Infoln
+// and Infof. These methods will write to the Info log if called.
+// Thus, one may write either
+// if glog.V(2) { glog.Info("log this") }
+// or
+// glog.V(2).Info("log this")
+// The second form is shorter but the first is cheaper if logging is off because it does
+// not evaluate its arguments.
+//
+// Whether an individual call to V generates a log record depends on the setting of
+// the -v and --vmodule flags; both are off by default. If the level in the call to
+// V is at least the value of -v, or of -vmodule for the source file containing the
+// call, the V call will log.
+func V(level Level) Verbose {
+ // This function tries hard to be cheap unless there's work to do.
+ // The fast path is two atomic loads and compares.
+
+ // Here is a cheap but safe test to see if V logging is enabled globally.
+ if logging.verbosity.get() >= level {
+ return Verbose(true)
+ }
+
+ // It's off globally but it vmodule may still be set.
+ // Here is another cheap but safe test to see if vmodule is enabled.
+ if atomic.LoadInt32(&logging.filterLength) > 0 {
+ // Now we need a proper lock to use the logging structure. The pcs field
+ // is shared so we must lock before accessing it. This is fairly expensive,
+ // but if V logging is enabled we're slow anyway.
+ logging.mu.Lock()
+ defer logging.mu.Unlock()
+ if runtime.Callers(2, logging.pcs[:]) == 0 {
+ return Verbose(false)
+ }
+ v, ok := logging.vmap[logging.pcs[0]]
+ if !ok {
+ v = logging.setV(logging.pcs[0])
+ }
+ return Verbose(v >= level)
+ }
+ return Verbose(false)
+}
+
+// Info is equivalent to the global Info function, guarded by the value of v.
+// See the documentation of V for usage.
+func (v Verbose) Info(args ...interface{}) {
+ if v {
+ logging.print(infoLog, args...)
+ }
+}
+
+// Infoln is equivalent to the global Infoln function, guarded by the value of v.
+// See the documentation of V for usage.
+func (v Verbose) Infoln(args ...interface{}) {
+ if v {
+ logging.println(infoLog, args...)
+ }
+}
+
+// Infof is equivalent to the global Infof function, guarded by the value of v.
+// See the documentation of V for usage.
+func (v Verbose) Infof(format string, args ...interface{}) {
+ if v {
+ logging.printf(infoLog, format, args...)
+ }
+}
+
+// Info logs to the INFO log.
+// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
+func Info(args ...interface{}) {
+ logging.print(infoLog, args...)
+}
+
+// InfoDepth acts as Info but uses depth to determine which call frame to log.
+// InfoDepth(0, "msg") is the same as Info("msg").
+func InfoDepth(depth int, args ...interface{}) {
+ logging.printDepth(infoLog, depth, args...)
+}
+
+// Infoln logs to the INFO log.
+// Arguments are handled in the manner of fmt.Println; a newline is appended if missing.
+func Infoln(args ...interface{}) {
+ logging.println(infoLog, args...)
+}
+
+// Infof logs to the INFO log.
+// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
+func Infof(format string, args ...interface{}) {
+ logging.printf(infoLog, format, args...)
+}
+
+// Warning logs to the WARNING and INFO logs.
+// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
+func Warning(args ...interface{}) {
+ logging.print(warningLog, args...)
+}
+
+// WarningDepth acts as Warning but uses depth to determine which call frame to log.
+// WarningDepth(0, "msg") is the same as Warning("msg").
+func WarningDepth(depth int, args ...interface{}) {
+ logging.printDepth(warningLog, depth, args...)
+}
+
+// Warningln logs to the WARNING and INFO logs.
+// Arguments are handled in the manner of fmt.Println; a newline is appended if missing.
+func Warningln(args ...interface{}) {
+ logging.println(warningLog, args...)
+}
+
+// Warningf logs to the WARNING and INFO logs.
+// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
+func Warningf(format string, args ...interface{}) {
+ logging.printf(warningLog, format, args...)
+}
+
+// Error logs to the ERROR, WARNING, and INFO logs.
+// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
+func Error(args ...interface{}) {
+ logging.print(errorLog, args...)
+}
+
+// ErrorDepth acts as Error but uses depth to determine which call frame to log.
+// ErrorDepth(0, "msg") is the same as Error("msg").
+func ErrorDepth(depth int, args ...interface{}) {
+ logging.printDepth(errorLog, depth, args...)
+}
+
+// Errorln logs to the ERROR, WARNING, and INFO logs.
+// Arguments are handled in the manner of fmt.Println; a newline is appended if missing.
+func Errorln(args ...interface{}) {
+ logging.println(errorLog, args...)
+}
+
+// Errorf logs to the ERROR, WARNING, and INFO logs.
+// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
+func Errorf(format string, args ...interface{}) {
+ logging.printf(errorLog, format, args...)
+}
+
+// Fatal logs to the FATAL, ERROR, WARNING, and INFO logs,
+// including a stack trace of all running goroutines, then calls os.Exit(255).
+// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
+func Fatal(args ...interface{}) {
+ logging.print(fatalLog, args...)
+}
+
+// FatalDepth acts as Fatal but uses depth to determine which call frame to log.
+// FatalDepth(0, "msg") is the same as Fatal("msg").
+func FatalDepth(depth int, args ...interface{}) {
+ logging.printDepth(fatalLog, depth, args...)
+}
+
+// Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs,
+// including a stack trace of all running goroutines, then calls os.Exit(255).
+// Arguments are handled in the manner of fmt.Println; a newline is appended if missing.
+func Fatalln(args ...interface{}) {
+ logging.println(fatalLog, args...)
+}
+
+// Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs,
+// including a stack trace of all running goroutines, then calls os.Exit(255).
+// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
+func Fatalf(format string, args ...interface{}) {
+ logging.printf(fatalLog, format, args...)
+}
+
+// fatalNoStacks is non-zero if we are to exit without dumping goroutine stacks.
+// It allows Exit and relatives to use the Fatal logs.
+var fatalNoStacks uint32
+
+// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
+// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
+func Exit(args ...interface{}) {
+ atomic.StoreUint32(&fatalNoStacks, 1)
+ logging.print(fatalLog, args...)
+}
+
+// ExitDepth acts as Exit but uses depth to determine which call frame to log.
+// ExitDepth(0, "msg") is the same as Exit("msg").
+func ExitDepth(depth int, args ...interface{}) {
+ atomic.StoreUint32(&fatalNoStacks, 1)
+ logging.printDepth(fatalLog, depth, args...)
+}
+
+// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
+func Exitln(args ...interface{}) {
+ atomic.StoreUint32(&fatalNoStacks, 1)
+ logging.println(fatalLog, args...)
+}
+
+// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
+// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
+func Exitf(format string, args ...interface{}) {
+ atomic.StoreUint32(&fatalNoStacks, 1)
+ logging.printf(fatalLog, format, args...)
+}
diff --git a/weed/glog/glog_file.go b/weed/glog/glog_file.go
new file mode 100644
index 000000000..65075d281
--- /dev/null
+++ b/weed/glog/glog_file.go
@@ -0,0 +1,124 @@
+// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
+//
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// File I/O for logs.
+
+package glog
+
+import (
+ "errors"
+ "flag"
+ "fmt"
+ "os"
+ "os/user"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+)
+
+// MaxSize is the maximum size of a log file in bytes.
+var MaxSize uint64 = 1024 * 1024 * 1800
+
+// logDirs lists the candidate directories for new log files.
+var logDirs []string
+
+// If non-empty, overrides the choice of directory in which to write logs.
+// See createLogDirs for the full list of possible destinations.
+var logDir = flag.String("log_dir", "", "If non-empty, write log files in this directory")
+
+func createLogDirs() {
+ if *logDir != "" {
+ logDirs = append(logDirs, *logDir)
+ }
+ logDirs = append(logDirs, os.TempDir())
+}
+
+var (
+ pid = os.Getpid()
+ program = filepath.Base(os.Args[0])
+ host = "unknownhost"
+ userName = "unknownuser"
+)
+
+func init() {
+ h, err := os.Hostname()
+ if err == nil {
+ host = shortHostname(h)
+ }
+
+ current, err := user.Current()
+ if err == nil {
+ userName = current.Username
+ }
+
+ // Sanitize userName since it may contain filepath separators on Windows.
+ userName = strings.Replace(userName, `\`, "_", -1)
+}
+
+// shortHostname returns its argument, truncating at the first period.
+// For instance, given "www.google.com" it returns "www".
+func shortHostname(hostname string) string {
+ if i := strings.Index(hostname, "."); i >= 0 {
+ return hostname[:i]
+ }
+ return hostname
+}
+
+// logName returns a new log file name containing tag, with start time t, and
+// the name for the symlink for tag.
+func logName(tag string, t time.Time) (name, link string) {
+ name = fmt.Sprintf("%s.%s.%s.log.%s.%04d%02d%02d-%02d%02d%02d.%d",
+ program,
+ host,
+ userName,
+ tag,
+ t.Year(),
+ t.Month(),
+ t.Day(),
+ t.Hour(),
+ t.Minute(),
+ t.Second(),
+ pid)
+ return name, program + "." + tag
+}
+
+var onceLogDirs sync.Once
+
+// create creates a new log file and returns the file and its filename, which
+// contains tag ("INFO", "FATAL", etc.) and t. If the file is created
+// successfully, create also attempts to update the symlink for that tag, ignoring
+// errors.
+func create(tag string, t time.Time) (f *os.File, filename string, err error) {
+ onceLogDirs.Do(createLogDirs)
+ if len(logDirs) == 0 {
+ return nil, "", errors.New("log: no log dirs")
+ }
+ name, link := logName(tag, t)
+ var lastErr error
+ for _, dir := range logDirs {
+ fname := filepath.Join(dir, name)
+ f, err := os.Create(fname)
+ if err == nil {
+ symlink := filepath.Join(dir, link)
+ os.Remove(symlink) // ignore err
+ os.Symlink(name, symlink) // ignore err
+ return f, fname, nil
+ }
+ lastErr = err
+ }
+ return nil, "", fmt.Errorf("log: cannot create log: %v", lastErr)
+}
diff --git a/weed/glog/glog_test.go b/weed/glog/glog_test.go
new file mode 100644
index 000000000..12c3acf3d
--- /dev/null
+++ b/weed/glog/glog_test.go
@@ -0,0 +1,415 @@
+// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
+//
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package glog
+
+import (
+ "bytes"
+ "fmt"
+ stdLog "log"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "testing"
+ "time"
+)
+
+// Test that shortHostname works as advertised.
+func TestShortHostname(t *testing.T) {
+ for hostname, expect := range map[string]string{
+ "": "",
+ "host": "host",
+ "host.google.com": "host",
+ } {
+ if got := shortHostname(hostname); expect != got {
+ t.Errorf("shortHostname(%q): expected %q, got %q", hostname, expect, got)
+ }
+ }
+}
+
+// flushBuffer wraps a bytes.Buffer to satisfy flushSyncWriter.
+type flushBuffer struct {
+ bytes.Buffer
+}
+
+func (f *flushBuffer) Flush() error {
+ return nil
+}
+
+func (f *flushBuffer) Sync() error {
+ return nil
+}
+
+// swap sets the log writers and returns the old array.
+func (l *loggingT) swap(writers [numSeverity]flushSyncWriter) (old [numSeverity]flushSyncWriter) {
+ l.mu.Lock()
+ defer l.mu.Unlock()
+ old = l.file
+ for i, w := range writers {
+ logging.file[i] = w
+ }
+ return
+}
+
+// newBuffers sets the log writers to all new byte buffers and returns the old array.
+func (l *loggingT) newBuffers() [numSeverity]flushSyncWriter {
+ return l.swap([numSeverity]flushSyncWriter{new(flushBuffer), new(flushBuffer), new(flushBuffer), new(flushBuffer)})
+}
+
+// contents returns the specified log value as a string.
+func contents(s severity) string {
+ return logging.file[s].(*flushBuffer).String()
+}
+
+// contains reports whether the string is contained in the log.
+func contains(s severity, str string, t *testing.T) bool {
+ return strings.Contains(contents(s), str)
+}
+
+// setFlags configures the logging flags how the test expects them.
+func setFlags() {
+ logging.toStderr = false
+}
+
+// Test that Info works as advertised.
+func TestInfo(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ Info("test")
+ if !contains(infoLog, "I", t) {
+ t.Errorf("Info has wrong character: %q", contents(infoLog))
+ }
+ if !contains(infoLog, "test", t) {
+ t.Error("Info failed")
+ }
+}
+
+func TestInfoDepth(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+
+ f := func() { InfoDepth(1, "depth-test1") }
+
+ // The next three lines must stay together
+ _, _, wantLine, _ := runtime.Caller(0)
+ InfoDepth(0, "depth-test0")
+ f()
+
+ msgs := strings.Split(strings.TrimSuffix(contents(infoLog), "\n"), "\n")
+ if len(msgs) != 2 {
+ t.Fatalf("Got %d lines, expected 2", len(msgs))
+ }
+
+ for i, m := range msgs {
+ if !strings.HasPrefix(m, "I") {
+ t.Errorf("InfoDepth[%d] has wrong character: %q", i, m)
+ }
+ w := fmt.Sprintf("depth-test%d", i)
+ if !strings.Contains(m, w) {
+ t.Errorf("InfoDepth[%d] missing %q: %q", i, w, m)
+ }
+
+ // pull out the line number (between : and ])
+ msg := m[strings.LastIndex(m, ":")+1:]
+ x := strings.Index(msg, "]")
+ if x < 0 {
+ t.Errorf("InfoDepth[%d]: missing ']': %q", i, m)
+ continue
+ }
+ line, err := strconv.Atoi(msg[:x])
+ if err != nil {
+ t.Errorf("InfoDepth[%d]: bad line number: %q", i, m)
+ continue
+ }
+ wantLine++
+ if wantLine != line {
+ t.Errorf("InfoDepth[%d]: got line %d, want %d", i, line, wantLine)
+ }
+ }
+}
+
+func init() {
+ CopyStandardLogTo("INFO")
+}
+
+// Test that CopyStandardLogTo panics on bad input.
+func TestCopyStandardLogToPanic(t *testing.T) {
+ defer func() {
+ if s, ok := recover().(string); !ok || !strings.Contains(s, "LOG") {
+ t.Errorf(`CopyStandardLogTo("LOG") should have panicked: %v`, s)
+ }
+ }()
+ CopyStandardLogTo("LOG")
+}
+
+// Test that using the standard log package logs to INFO.
+func TestStandardLog(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ stdLog.Print("test")
+ if !contains(infoLog, "I", t) {
+ t.Errorf("Info has wrong character: %q", contents(infoLog))
+ }
+ if !contains(infoLog, "test", t) {
+ t.Error("Info failed")
+ }
+}
+
+// Test that the header has the correct format.
+func TestHeader(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ defer func(previous func() time.Time) { timeNow = previous }(timeNow)
+ timeNow = func() time.Time {
+ return time.Date(2006, 1, 2, 15, 4, 5, .067890e9, time.Local)
+ }
+ pid = 1234
+ Info("test")
+ var line int
+ format := "I0102 15:04:05 1234 glog_test.go:%d] test\n"
+ n, err := fmt.Sscanf(contents(infoLog), format, &line)
+ if n != 1 || err != nil {
+ t.Errorf("log format error: %d elements, error %s:\n%s", n, err, contents(infoLog))
+ }
+ // Scanf treats multiple spaces as equivalent to a single space,
+ // so check for correct space-padding also.
+ want := fmt.Sprintf(format, line)
+ if contents(infoLog) != want {
+ t.Errorf("log format error: got:\n\t%q\nwant:\t%q", contents(infoLog), want)
+ }
+}
+
+// Test that an Error log goes to Warning and Info.
+// Even in the Info log, the source character will be E, so the data should
+// all be identical.
+func TestError(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ Error("test")
+ if !contains(errorLog, "E", t) {
+ t.Errorf("Error has wrong character: %q", contents(errorLog))
+ }
+ if !contains(errorLog, "test", t) {
+ t.Error("Error failed")
+ }
+ str := contents(errorLog)
+ if !contains(warningLog, str, t) {
+ t.Error("Warning failed")
+ }
+ if !contains(infoLog, str, t) {
+ t.Error("Info failed")
+ }
+}
+
+// Test that a Warning log goes to Info.
+// Even in the Info log, the source character will be W, so the data should
+// all be identical.
+func TestWarning(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ Warning("test")
+ if !contains(warningLog, "W", t) {
+ t.Errorf("Warning has wrong character: %q", contents(warningLog))
+ }
+ if !contains(warningLog, "test", t) {
+ t.Error("Warning failed")
+ }
+ str := contents(warningLog)
+ if !contains(infoLog, str, t) {
+ t.Error("Info failed")
+ }
+}
+
+// Test that a V log goes to Info.
+func TestV(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ logging.verbosity.Set("2")
+ defer logging.verbosity.Set("0")
+ V(2).Info("test")
+ if !contains(infoLog, "I", t) {
+ t.Errorf("Info has wrong character: %q", contents(infoLog))
+ }
+ if !contains(infoLog, "test", t) {
+ t.Error("Info failed")
+ }
+}
+
+// Test that a vmodule enables a log in this file.
+func TestVmoduleOn(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ logging.vmodule.Set("glog_test=2")
+ defer logging.vmodule.Set("")
+ if !V(1) {
+ t.Error("V not enabled for 1")
+ }
+ if !V(2) {
+ t.Error("V not enabled for 2")
+ }
+ if V(3) {
+ t.Error("V enabled for 3")
+ }
+ V(2).Info("test")
+ if !contains(infoLog, "I", t) {
+ t.Errorf("Info has wrong character: %q", contents(infoLog))
+ }
+ if !contains(infoLog, "test", t) {
+ t.Error("Info failed")
+ }
+}
+
+// Test that a vmodule of another file does not enable a log in this file.
+func TestVmoduleOff(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ logging.vmodule.Set("notthisfile=2")
+ defer logging.vmodule.Set("")
+ for i := 1; i <= 3; i++ {
+ if V(Level(i)) {
+ t.Errorf("V enabled for %d", i)
+ }
+ }
+ V(2).Info("test")
+ if contents(infoLog) != "" {
+ t.Error("V logged incorrectly")
+ }
+}
+
+// vGlobs are patterns that match/don't match this file at V=2.
+var vGlobs = map[string]bool{
+ // Easy to test the numeric match here.
+ "glog_test=1": false, // If -vmodule sets V to 1, V(2) will fail.
+ "glog_test=2": true,
+ "glog_test=3": true, // If -vmodule sets V to 1, V(3) will succeed.
+ // These all use 2 and check the patterns. All are true.
+ "*=2": true,
+ "?l*=2": true,
+ "????_*=2": true,
+ "??[mno]?_*t=2": true,
+ // These all use 2 and check the patterns. All are false.
+ "*x=2": false,
+ "m*=2": false,
+ "??_*=2": false,
+ "?[abc]?_*t=2": false,
+}
+
+// Test that vmodule globbing works as advertised.
+func testVmoduleGlob(pat string, match bool, t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ defer logging.vmodule.Set("")
+ logging.vmodule.Set(pat)
+ if V(2) != Verbose(match) {
+ t.Errorf("incorrect match for %q: got %t expected %t", pat, V(2), match)
+ }
+}
+
+// Test that a vmodule globbing works as advertised.
+func TestVmoduleGlob(t *testing.T) {
+ for glob, match := range vGlobs {
+ testVmoduleGlob(glob, match, t)
+ }
+}
+
+func TestRollover(t *testing.T) {
+ setFlags()
+ var err error
+ defer func(previous func(error)) { logExitFunc = previous }(logExitFunc)
+ logExitFunc = func(e error) {
+ err = e
+ }
+ defer func(previous uint64) { MaxSize = previous }(MaxSize)
+ MaxSize = 512
+
+ Info("x") // Be sure we have a file.
+ info, ok := logging.file[infoLog].(*syncBuffer)
+ if !ok {
+ t.Fatal("info wasn't created")
+ }
+ if err != nil {
+ t.Fatalf("info has initial error: %v", err)
+ }
+ fname0 := info.file.Name()
+ Info(strings.Repeat("x", int(MaxSize))) // force a rollover
+ if err != nil {
+ t.Fatalf("info has error after big write: %v", err)
+ }
+
+ // Make sure the next log file gets a file name with a different
+ // time stamp.
+ //
+ // TODO: determine whether we need to support subsecond log
+ // rotation. C++ does not appear to handle this case (nor does it
+ // handle Daylight Savings Time properly).
+ time.Sleep(1 * time.Second)
+
+ Info("x") // create a new file
+ if err != nil {
+ t.Fatalf("error after rotation: %v", err)
+ }
+ fname1 := info.file.Name()
+ if fname0 == fname1 {
+ t.Errorf("info.f.Name did not change: %v", fname0)
+ }
+ if info.nbytes >= MaxSize {
+ t.Errorf("file size was not reset: %d", info.nbytes)
+ }
+}
+
+func TestLogBacktraceAt(t *testing.T) {
+ setFlags()
+ defer logging.swap(logging.newBuffers())
+ // The peculiar style of this code simplifies line counting and maintenance of the
+ // tracing block below.
+ var infoLine string
+ setTraceLocation := func(file string, line int, ok bool, delta int) {
+ if !ok {
+ t.Fatal("could not get file:line")
+ }
+ _, file = filepath.Split(file)
+ infoLine = fmt.Sprintf("%s:%d", file, line+delta)
+ err := logging.traceLocation.Set(infoLine)
+ if err != nil {
+ t.Fatal("error setting log_backtrace_at: ", err)
+ }
+ }
+ {
+ // Start of tracing block. These lines know about each other's relative position.
+ _, file, line, ok := runtime.Caller(0)
+ setTraceLocation(file, line, ok, +2) // Two lines between Caller and Info calls.
+ Info("we want a stack trace here")
+ }
+ numAppearances := strings.Count(contents(infoLog), infoLine)
+ if numAppearances < 2 {
+ // Need 2 appearances, one in the log header and one in the trace:
+ // log_test.go:281: I0511 16:36:06.952398 02238 log_test.go:280] we want a stack trace here
+ // ...
+ // github.com/glog/glog_test.go:280 (0x41ba91)
+ // ...
+ // We could be more precise but that would require knowing the details
+ // of the traceback format, which may not be dependable.
+ t.Fatal("got no trace back; log is ", contents(infoLog))
+ }
+}
+
+func BenchmarkHeader(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ buf, _, _ := logging.header(infoLog, 0)
+ logging.putBuffer(buf)
+ }
+}
diff --git a/weed/images/favicon.go b/weed/images/favicon.go
new file mode 100644
index 000000000..09504976c
--- /dev/null
+++ b/weed/images/favicon.go
@@ -0,0 +1,236 @@
+// Code generated by go-bindata.
+// sources:
+// favicon/favicon.ico
+
+package images
+
+import (
+ "bytes"
+ "compress/gzip"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "time"
+)
+
+func bindataRead(data []byte, name string) ([]byte, error) {
+ gz, err := gzip.NewReader(bytes.NewBuffer(data))
+ if err != nil {
+ return nil, fmt.Errorf("Read %q: %v", name, err)
+ }
+
+ var buf bytes.Buffer
+ _, err = io.Copy(&buf, gz)
+ clErr := gz.Close()
+
+ if err != nil {
+ return nil, fmt.Errorf("Read %q: %v", name, err)
+ }
+ if clErr != nil {
+ return nil, err
+ }
+
+ return buf.Bytes(), nil
+}
+
+type asset struct {
+ bytes []byte
+ info os.FileInfo
+}
+
+type bindataFileInfo struct {
+ name string
+ size int64
+ mode os.FileMode
+ modTime time.Time
+}
+
+func (fi bindataFileInfo) Name() string {
+ return fi.name
+}
+func (fi bindataFileInfo) Size() int64 {
+ return fi.size
+}
+func (fi bindataFileInfo) Mode() os.FileMode {
+ return fi.mode
+}
+func (fi bindataFileInfo) ModTime() time.Time {
+ return fi.modTime
+}
+func (fi bindataFileInfo) IsDir() bool {
+ return false
+}
+func (fi bindataFileInfo) Sys() interface{} {
+ return nil
+}
+
+var _favicon = []byte("\x1f\x8b\x08\x00\x00\x09\x6e\x88\x00\xff\xec\x94\x5f\x48\x53\x51\x1c\xc7\xbf\x77\xce\xeb\xf1\xff\x9d\x53\x37\xe7\xbf\x4d\xcd\x2c\x08\xa2\x87\xea\xd1\x82\x9e\xa2\x88\x5e\x7a\x8a\x82\x1e\xf2\x21\x7a\x28\x42\xc9\x7a\xb0\x08\x22\x4d\x08\xfa\x43\x25\x61\x05\x4b\xac\xb7\x7a\x92\x8a\x82\xa2\x34\x59\x5c\x4d\x5b\x39\x2d\xfb\xb3\x9c\xe0\x52\xb7\x5a\x6e\x73\xeb\xdc\xbb\xed\xb6\x5d\xc7\x89\x7a\xa8\xa0\xfb\x81\x7b\xef\x39\xbf\xef\x3d\xf7\xfe\xce\x79\xf8\x00\x1c\x74\x10\x04\xe9\x49\xd0\x94\x09\xd4\x03\xb0\x5a\x63\xf3\x1e\x02\x74\xd2\x5a\x03\xad\x09\x52\x1d\xb1\xba\x0c\x87\x1f\x70\xb1\xab\xff\xd0\x2a\x0c\x9d\xde\x0e\xf1\xe4\x66\xbc\xba\xb2\x0f\xe1\xc0\x3c\x5e\x77\xef\x87\xe7\xe9\x4d\xcc\x3a\x1f\x21\x1a\x8d\x22\x1a\x89\xc0\x7d\xbf\x0b\x03\xcd\xab\x31\xde\x73\x18\x6f\x6e\xb5\x61\xea\xf1\x0d\x84\xbf\xf9\x31\x76\xfd\x20\xbe\x4e\xb9\x20\xb6\x6f\xc5\xb3\xd6\xb5\x78\x7e\x62\x13\x7d\xae\x83\xa3\xad\x11\x73\xae\x7e\xbc\x38\xb3\x03\x23\x67\x77\x62\x61\xd6\x03\x67\x57\x13\x1c\xc7\x37\x62\xa2\xf7\x28\x22\xe1\x10\x7c\x93\x22\x42\x7e\x2f\x82\x73\xd3\x18\x68\x59\x03\x97\xbd\x05\x43\x1d\xdb\xa0\xa1\xa1\xf1\xaf\x51\x5b\x97\x3c\xab\x29\x20\xe6\xa4\x69\x71\x35\x21\x56\x1b\x25\x3e\xaf\xac\x22\x31\x12\x2f\x94\x59\x48\x79\x05\x45\x7e\xb9\x84\xde\x4a\x4d\xca\xfa\x0c\x43\x11\xbd\xe7\x1a\xa5\x71\x3e\xa5\x80\x14\x0a\x89\x2c\xfe\x19\x92\x17\x9f\xf3\x94\x2c\x92\x9d\x93\x9b\xf4\xf3\x0c\x7d\x66\x4a\xa7\x9c\xee\xb7\x37\xf9\xcb\x48\x9e\xb5\xd2\xab\x11\x49\x9e\xd5\x27\x37\x03\xc5\xb3\x8e\x63\x1b\xe0\x79\xd2\x8b\x4f\x0f\xaf\xca\x2e\x95\x1c\x2b\xf9\x75\x7e\x7c\x50\x76\xe0\xe8\xf9\x5d\xb2\x2f\xbd\xc3\x77\x11\x59\x0c\x61\xf2\x76\x3b\x66\xc4\x3e\x0c\x1e\x59\x8f\xc0\xf4\x84\xec\xd6\x0f\x7d\xe7\xe0\x7f\x3f\x22\x7b\x33\xf4\xe5\xb3\xec\x54\xc9\x9d\x8b\xc1\x00\x75\x73\x2b\x86\xa9\xcb\xdf\xdd\xe9\x94\x1d\xed\xb2\x37\x53\x4f\xdb\xe1\x7b\x2b\xe2\xe3\xbd\x4b\x78\x79\x71\x0f\x82\xbe\x19\x38\x2f\xef\xa5\xdf\xee\xc0\xe8\x85\xdd\x58\xf0\xba\xe1\x7e\xd0\x8d\xb1\x6b\x07\x64\x9f\x8b\xa7\xb6\xfc\xb9\xc3\xd3\xd0\xd0\xf8\x7b\xac\xb4\x31\xe3\x86\x15\x15\x8c\x54\xbf\x9c\xe7\x19\x39\x97\xc5\xb3\xf2\x65\x44\x32\x79\xbd\x31\x81\x2a\xae\xcb\xe3\x53\x49\x8d\x6b\x79\x35\xaa\xf5\xb6\x1a\x76\x0e\x43\xb5\x54\xb5\xe6\x24\x58\xd2\xa0\xa9\x8a\xd9\x3f\xa5\xd2\x9c\x2e\x2f\xb7\x28\x43\x8b\x90\x26\xcf\x34\x97\x29\xe3\x42\x4b\x9a\x9c\xe7\x75\x26\xc6\x5f\x69\xce\x1b\x0d\x4b\xca\xca\x41\x16\x4b\xfb\x2a\x29\x55\xe7\xea\xf3\x2a\xfa\x49\x2e\x18\x98\x79\x41\xa1\x6a\xbd\x72\x90\xb9\x34\xcd\xcb\x67\xf6\x4f\xb2\x99\xfb\xcb\xe2\x18\x31\x74\x19\x7a\x56\x0c\xe6\xe2\xff\x97\xef\x01\x00\x00\xff\xff\x3e\xd4\x17\xe7\x36\x0e\x00\x00")
+
+func faviconBytes() ([]byte, error) {
+ return bindataRead(
+ _favicon,
+ "favicon/favicon.ico",
+ )
+}
+
+func favicon() (*asset, error) {
+ bytes, err := faviconBytes()
+ if err != nil {
+ return nil, err
+ }
+
+ info := bindataFileInfo{name: "favicon/favicon.ico", size: 3638, mode: os.FileMode(420), modTime: time.Unix(1460621502, 0)}
+ a := &asset{bytes: bytes, info: info}
+ return a, nil
+}
+
+// Asset loads and returns the asset for the given name.
+// It returns an error if the asset could not be found or
+// could not be loaded.
+func Asset(name string) ([]byte, error) {
+ cannonicalName := strings.Replace(name, "\\", "/", -1)
+ if f, ok := _bindata[cannonicalName]; ok {
+ a, err := f()
+ if err != nil {
+ return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err)
+ }
+ return a.bytes, nil
+ }
+ return nil, fmt.Errorf("Asset %s not found", name)
+}
+
+// MustAsset is like Asset but panics when Asset would return an error.
+// It simplifies safe initialization of global variables.
+func MustAsset(name string) []byte {
+ a, err := Asset(name)
+ if err != nil {
+ panic("asset: Asset(" + name + "): " + err.Error())
+ }
+
+ return a
+}
+
+// AssetInfo loads and returns the asset info for the given name.
+// It returns an error if the asset could not be found or
+// could not be loaded.
+func AssetInfo(name string) (os.FileInfo, error) {
+ cannonicalName := strings.Replace(name, "\\", "/", -1)
+ if f, ok := _bindata[cannonicalName]; ok {
+ a, err := f()
+ if err != nil {
+ return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err)
+ }
+ return a.info, nil
+ }
+ return nil, fmt.Errorf("AssetInfo %s not found", name)
+}
+
+// AssetNames returns the names of the assets.
+func AssetNames() []string {
+ names := make([]string, 0, len(_bindata))
+ for name := range _bindata {
+ names = append(names, name)
+ }
+ return names
+}
+
+// _bindata is a table, holding each asset generator, mapped to its name.
+var _bindata = map[string]func() (*asset, error){
+ "favicon/favicon.ico": favicon,
+}
+
+// AssetDir returns the file names below a certain
+// directory embedded in the file by go-bindata.
+// For example if you run go-bindata on data/... and data contains the
+// following hierarchy:
+// data/
+// foo.txt
+// img/
+// a.png
+// b.png
+// then AssetDir("data") would return []string{"foo.txt", "img"}
+// AssetDir("data/img") would return []string{"a.png", "b.png"}
+// AssetDir("foo.txt") and AssetDir("notexist") would return an error
+// AssetDir("") will return []string{"data"}.
+func AssetDir(name string) ([]string, error) {
+ node := _bintree
+ if len(name) != 0 {
+ cannonicalName := strings.Replace(name, "\\", "/", -1)
+ pathList := strings.Split(cannonicalName, "/")
+ for _, p := range pathList {
+ node = node.Children[p]
+ if node == nil {
+ return nil, fmt.Errorf("Asset %s not found", name)
+ }
+ }
+ }
+ if node.Func != nil {
+ return nil, fmt.Errorf("Asset %s not found", name)
+ }
+ rv := make([]string, 0, len(node.Children))
+ for childName := range node.Children {
+ rv = append(rv, childName)
+ }
+ return rv, nil
+}
+
+type bintree struct {
+ Func func() (*asset, error)
+ Children map[string]*bintree
+}
+var _bintree = &bintree{nil, map[string]*bintree{
+ "favicon": &bintree{nil, map[string]*bintree{
+ "favicon.ico": &bintree{favicon, map[string]*bintree{}},
+ }},
+}}
+
+// RestoreAsset restores an asset under the given directory
+func RestoreAsset(dir, name string) error {
+ data, err := Asset(name)
+ if err != nil {
+ return err
+ }
+ info, err := AssetInfo(name)
+ if err != nil {
+ return err
+ }
+ err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755))
+ if err != nil {
+ return err
+ }
+ err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode())
+ if err != nil {
+ return err
+ }
+ err = os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime())
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// RestoreAssets restores an asset under the given directory recursively
+func RestoreAssets(dir, name string) error {
+ children, err := AssetDir(name)
+ // File
+ if err != nil {
+ return RestoreAsset(dir, name)
+ }
+ // Dir
+ for _, child := range children {
+ err = RestoreAssets(dir, filepath.Join(name, child))
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func _filePath(dir, name string) string {
+ cannonicalName := strings.Replace(name, "\\", "/", -1)
+ return filepath.Join(append([]string{dir}, strings.Split(cannonicalName, "/")...)...)
+}
+
diff --git a/weed/images/favicon/favicon.ico b/weed/images/favicon/favicon.ico
new file mode 100644
index 000000000..cc6f531b3
--- /dev/null
+++ b/weed/images/favicon/favicon.ico
Binary files differ
diff --git a/weed/images/orientation.go b/weed/images/orientation.go
new file mode 100644
index 000000000..4bff89311
--- /dev/null
+++ b/weed/images/orientation.go
@@ -0,0 +1,182 @@
+package images
+
+import (
+ "bytes"
+ "image"
+ "image/draw"
+ "image/jpeg"
+ "log"
+
+ "github.com/rwcarlsen/goexif/exif"
+)
+
+//many code is copied from http://camlistore.org/pkg/images/images.go
+func FixJpgOrientation(data []byte) (oriented []byte) {
+ ex, err := exif.Decode(bytes.NewReader(data))
+ if err != nil {
+ return data
+ }
+ tag, err := ex.Get(exif.Orientation)
+ if err != nil {
+ return data
+ }
+ angle := 0
+ flipMode := FlipDirection(0)
+ orient, err := tag.Int(0)
+ if err != nil {
+ return data
+ }
+ switch orient {
+ case topLeftSide:
+ // do nothing
+ return data
+ case topRightSide:
+ flipMode = 2
+ case bottomRightSide:
+ angle = 180
+ case bottomLeftSide:
+ angle = 180
+ flipMode = 2
+ case leftSideTop:
+ angle = -90
+ flipMode = 2
+ case rightSideTop:
+ angle = -90
+ case rightSideBottom:
+ angle = 90
+ flipMode = 2
+ case leftSideBottom:
+ angle = 90
+ }
+
+ if srcImage, _, err := image.Decode(bytes.NewReader(data)); err == nil {
+ dstImage := flip(rotate(srcImage, angle), flipMode)
+ var buf bytes.Buffer
+ jpeg.Encode(&buf, dstImage, nil)
+ return buf.Bytes()
+ }
+
+ return data
+}
+
+// Exif Orientation Tag values
+// http://sylvana.net/jpegcrop/exif_orientation.html
+const (
+ topLeftSide = 1
+ topRightSide = 2
+ bottomRightSide = 3
+ bottomLeftSide = 4
+ leftSideTop = 5
+ rightSideTop = 6
+ rightSideBottom = 7
+ leftSideBottom = 8
+)
+
+// The FlipDirection type is used by the Flip option in DecodeOpts
+// to indicate in which direction to flip an image.
+type FlipDirection int
+
+// FlipVertical and FlipHorizontal are two possible FlipDirections
+// values to indicate in which direction an image will be flipped.
+const (
+ FlipVertical FlipDirection = 1 << iota
+ FlipHorizontal
+)
+
+type DecodeOpts struct {
+ // Rotate specifies how to rotate the image.
+ // If nil, the image is rotated automatically based on EXIF metadata.
+ // If an int, Rotate is the number of degrees to rotate
+ // counter clockwise and must be one of 0, 90, -90, 180, or
+ // -180.
+ Rotate interface{}
+
+ // Flip specifies how to flip the image.
+ // If nil, the image is flipped automatically based on EXIF metadata.
+ // Otherwise, Flip is a FlipDirection bitfield indicating how to flip.
+ Flip interface{}
+}
+
+func rotate(im image.Image, angle int) image.Image {
+ var rotated *image.NRGBA
+ // trigonometric (i.e counter clock-wise)
+ switch angle {
+ case 90:
+ newH, newW := im.Bounds().Dx(), im.Bounds().Dy()
+ rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH))
+ for y := 0; y < newH; y++ {
+ for x := 0; x < newW; x++ {
+ rotated.Set(x, y, im.At(newH-1-y, x))
+ }
+ }
+ case -90:
+ newH, newW := im.Bounds().Dx(), im.Bounds().Dy()
+ rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH))
+ for y := 0; y < newH; y++ {
+ for x := 0; x < newW; x++ {
+ rotated.Set(x, y, im.At(y, newW-1-x))
+ }
+ }
+ case 180, -180:
+ newW, newH := im.Bounds().Dx(), im.Bounds().Dy()
+ rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH))
+ for y := 0; y < newH; y++ {
+ for x := 0; x < newW; x++ {
+ rotated.Set(x, y, im.At(newW-1-x, newH-1-y))
+ }
+ }
+ default:
+ return im
+ }
+ return rotated
+}
+
+// flip returns a flipped version of the image im, according to
+// the direction(s) in dir.
+// It may flip the imput im in place and return it, or it may allocate a
+// new NRGBA (if im is an *image.YCbCr).
+func flip(im image.Image, dir FlipDirection) image.Image {
+ if dir == 0 {
+ return im
+ }
+ ycbcr := false
+ var nrgba image.Image
+ dx, dy := im.Bounds().Dx(), im.Bounds().Dy()
+ di, ok := im.(draw.Image)
+ if !ok {
+ if _, ok := im.(*image.YCbCr); !ok {
+ log.Printf("failed to flip image: input does not satisfy draw.Image")
+ return im
+ }
+ // because YCbCr does not implement Set, we replace it with a new NRGBA
+ ycbcr = true
+ nrgba = image.NewNRGBA(image.Rect(0, 0, dx, dy))
+ di, ok = nrgba.(draw.Image)
+ if !ok {
+ log.Print("failed to flip image: could not cast an NRGBA to a draw.Image")
+ return im
+ }
+ }
+ if dir&FlipHorizontal != 0 {
+ for y := 0; y < dy; y++ {
+ for x := 0; x < dx/2; x++ {
+ old := im.At(x, y)
+ di.Set(x, y, im.At(dx-1-x, y))
+ di.Set(dx-1-x, y, old)
+ }
+ }
+ }
+ if dir&FlipVertical != 0 {
+ for y := 0; y < dy/2; y++ {
+ for x := 0; x < dx; x++ {
+ old := im.At(x, y)
+ di.Set(x, y, im.At(x, dy-1-y))
+ di.Set(x, dy-1-y, old)
+ }
+ }
+ }
+ if ycbcr {
+ return nrgba
+ }
+ return im
+}
diff --git a/weed/images/orientation_test.go b/weed/images/orientation_test.go
new file mode 100644
index 000000000..adab17ff8
--- /dev/null
+++ b/weed/images/orientation_test.go
@@ -0,0 +1,17 @@
+package images
+
+import (
+ "io/ioutil"
+ "testing"
+)
+
+func TestXYZ(t *testing.T) {
+ fname := "sample1.jpg"
+
+ dat, _ := ioutil.ReadFile(fname)
+
+ fixed_data := FixJpgOrientation(dat)
+
+ ioutil.WriteFile("fixed1.jpg", fixed_data, 0644)
+
+}
diff --git a/weed/images/preprocess.go b/weed/images/preprocess.go
new file mode 100644
index 000000000..0d6cb2d9e
--- /dev/null
+++ b/weed/images/preprocess.go
@@ -0,0 +1,27 @@
+package images
+
+import (
+ "path/filepath"
+ "strings"
+)
+
+/*
+* Preprocess image files on client side.
+* 1. possibly adjust the orientation
+* 2. resize the image to a width or height limit
+* 3. remove the exif data
+* Call this function on any file uploaded to SeaweedFS
+*
+ */
+func MaybePreprocessImage(filename string, data []byte, width, height int) (resized []byte, w int, h int) {
+ ext := filepath.Ext(filename)
+ ext = strings.ToLower(ext)
+ switch ext {
+ case ".png", ".gif":
+ return Resized(ext, data, width, height)
+ case ".jpg", ".jpeg":
+ data = FixJpgOrientation(data)
+ return Resized(ext, data, width, height)
+ }
+ return data, 0, 0
+}
diff --git a/weed/images/resizing.go b/weed/images/resizing.go
new file mode 100644
index 000000000..7e4a88c42
--- /dev/null
+++ b/weed/images/resizing.go
@@ -0,0 +1,46 @@
+package images
+
+import (
+ "bytes"
+ "image"
+ "image/gif"
+ "image/jpeg"
+ "image/png"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/disintegration/imaging"
+)
+
+func Resized(ext string, data []byte, width, height int) (resized []byte, w int, h int) {
+ if width == 0 && height == 0 {
+ return data, 0, 0
+ }
+ srcImage, _, err := image.Decode(bytes.NewReader(data))
+ if err == nil {
+ bounds := srcImage.Bounds()
+ var dstImage *image.NRGBA
+ if bounds.Dx() > width && width != 0 || bounds.Dy() > height && height != 0 {
+ if width == height && bounds.Dx() != bounds.Dy() {
+ dstImage = imaging.Thumbnail(srcImage, width, height, imaging.Lanczos)
+ w, h = width, height
+ } else {
+ dstImage = imaging.Resize(srcImage, width, height, imaging.Lanczos)
+ }
+ } else {
+ return data, bounds.Dx(), bounds.Dy()
+ }
+ var buf bytes.Buffer
+ switch ext {
+ case ".png":
+ png.Encode(&buf, dstImage)
+ case ".jpg", ".jpeg":
+ jpeg.Encode(&buf, dstImage, nil)
+ case ".gif":
+ gif.Encode(&buf, dstImage, nil)
+ }
+ return buf.Bytes(), dstImage.Bounds().Dx(), dstImage.Bounds().Dy()
+ } else {
+ glog.Error(err)
+ }
+ return data, 0, 0
+}
diff --git a/weed/images/sample1.jpg b/weed/images/sample1.jpg
new file mode 100644
index 000000000..991b59bac
--- /dev/null
+++ b/weed/images/sample1.jpg
Binary files differ
diff --git a/weed/operation/assign_file_id.go b/weed/operation/assign_file_id.go
new file mode 100644
index 000000000..acc2d3034
--- /dev/null
+++ b/weed/operation/assign_file_id.go
@@ -0,0 +1,48 @@
+package operation
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/url"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type AssignResult struct {
+ Fid string `json:"fid,omitempty"`
+ Url string `json:"url,omitempty"`
+ PublicUrl string `json:"publicUrl,omitempty"`
+ Count uint64 `json:"count,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+func Assign(server string, count uint64, replication string, collection string, ttl string) (*AssignResult, error) {
+ values := make(url.Values)
+ values.Add("count", strconv.FormatUint(count, 10))
+ if replication != "" {
+ values.Add("replication", replication)
+ }
+ if collection != "" {
+ values.Add("collection", collection)
+ }
+ if ttl != "" {
+ values.Add("ttl", ttl)
+ }
+ jsonBlob, err := util.Post("http://"+server+"/dir/assign", values)
+ glog.V(2).Info("assign result :", string(jsonBlob))
+ if err != nil {
+ return nil, err
+ }
+ var ret AssignResult
+ err = json.Unmarshal(jsonBlob, &ret)
+ if err != nil {
+ return nil, fmt.Errorf("/dir/assign result JSON unmarshal error:%v, json:%s", err, string(jsonBlob))
+ }
+ if ret.Count <= 0 {
+ return nil, errors.New(ret.Error)
+ }
+ return &ret, nil
+}
diff --git a/weed/operation/chunked_file.go b/weed/operation/chunked_file.go
new file mode 100644
index 000000000..52086514a
--- /dev/null
+++ b/weed/operation/chunked_file.go
@@ -0,0 +1,213 @@
+package operation
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "sort"
+
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var (
+ // when the remote server does not allow range requests (Accept-Ranges was not set)
+ ErrRangeRequestsNotSupported = errors.New("Range requests are not supported by the remote server")
+ // ErrInvalidRange is returned by Read when trying to read past the end of the file
+ ErrInvalidRange = errors.New("Invalid range")
+)
+
+type ChunkInfo struct {
+ Fid string `json:"fid"`
+ Offset int64 `json:"offset"`
+ Size int64 `json:"size"`
+}
+
+type ChunkList []*ChunkInfo
+
+type ChunkManifest struct {
+ Name string `json:"name,omitempty"`
+ Mime string `json:"mime,omitempty"`
+ Size int64 `json:"size,omitempty"`
+ Chunks ChunkList `json:"chunks,omitempty"`
+}
+
+// seekable chunked file reader
+type ChunkedFileReader struct {
+ Manifest *ChunkManifest
+ Master string
+ pos int64
+ pr *io.PipeReader
+ pw *io.PipeWriter
+ mutex sync.Mutex
+}
+
+func (s ChunkList) Len() int { return len(s) }
+func (s ChunkList) Less(i, j int) bool { return s[i].Offset < s[j].Offset }
+func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+
+func LoadChunkManifest(buffer []byte, isGzipped bool) (*ChunkManifest, error) {
+ if isGzipped {
+ var err error
+ if buffer, err = UnGzipData(buffer); err != nil {
+ return nil, err
+ }
+ }
+ cm := ChunkManifest{}
+ if e := json.Unmarshal(buffer, &cm); e != nil {
+ return nil, e
+ }
+ sort.Sort(cm.Chunks)
+ return &cm, nil
+}
+
+func (cm *ChunkManifest) Marshal() ([]byte, error) {
+ return json.Marshal(cm)
+}
+
+func (cm *ChunkManifest) DeleteChunks(master string) error {
+ deleteError := 0
+ for _, ci := range cm.Chunks {
+ if e := DeleteFile(master, ci.Fid, ""); e != nil {
+ deleteError++
+ glog.V(0).Infof("Delete %s error: %v, master: %s", ci.Fid, e, master)
+ }
+ }
+ if deleteError > 0 {
+ return errors.New("Not all chunks deleted.")
+ }
+ return nil
+}
+
+func readChunkNeedle(fileUrl string, w io.Writer, offset int64) (written int64, e error) {
+ req, err := http.NewRequest("GET", fileUrl, nil)
+ if err != nil {
+ return written, err
+ }
+ if offset > 0 {
+ req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
+ }
+
+ resp, err := util.Do(req)
+ if err != nil {
+ return written, err
+ }
+ defer resp.Body.Close()
+
+ switch resp.StatusCode {
+ case http.StatusRequestedRangeNotSatisfiable:
+ return written, ErrInvalidRange
+ case http.StatusOK:
+ if offset > 0 {
+ return written, ErrRangeRequestsNotSupported
+ }
+ case http.StatusPartialContent:
+ break
+ default:
+ return written, fmt.Errorf("Read chunk needle error: [%d] %s", resp.StatusCode, fileUrl)
+
+ }
+ return io.Copy(w, resp.Body)
+}
+
+func (cf *ChunkedFileReader) Seek(offset int64, whence int) (int64, error) {
+ var err error
+ switch whence {
+ case 0:
+ case 1:
+ offset += cf.pos
+ case 2:
+ offset = cf.Manifest.Size - offset
+ }
+ if offset > cf.Manifest.Size {
+ err = ErrInvalidRange
+ }
+ if cf.pos != offset {
+ cf.Close()
+ }
+ cf.pos = offset
+ return cf.pos, err
+}
+
+func (cf *ChunkedFileReader) WriteTo(w io.Writer) (n int64, err error) {
+ cm := cf.Manifest
+ chunkIndex := -1
+ chunkStartOffset := int64(0)
+ for i, ci := range cm.Chunks {
+ if cf.pos >= ci.Offset && cf.pos < ci.Offset+ci.Size {
+ chunkIndex = i
+ chunkStartOffset = cf.pos - ci.Offset
+ break
+ }
+ }
+ if chunkIndex < 0 {
+ return n, ErrInvalidRange
+ }
+ for ; chunkIndex < cm.Chunks.Len(); chunkIndex++ {
+ ci := cm.Chunks[chunkIndex]
+ // if we need read date from local volume server first?
+ fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid)
+ if lookupError != nil {
+ return n, lookupError
+ }
+ if wn, e := readChunkNeedle(fileUrl, w, chunkStartOffset); e != nil {
+ return n, e
+ } else {
+ n += wn
+ cf.pos += wn
+ }
+
+ chunkStartOffset = 0
+ }
+ return n, nil
+}
+
+func (cf *ChunkedFileReader) ReadAt(p []byte, off int64) (n int, err error) {
+ cf.Seek(off, 0)
+ return cf.Read(p)
+}
+
+func (cf *ChunkedFileReader) Read(p []byte) (int, error) {
+ return cf.getPipeReader().Read(p)
+}
+
+func (cf *ChunkedFileReader) Close() (e error) {
+ cf.mutex.Lock()
+ defer cf.mutex.Unlock()
+ return cf.closePipe()
+}
+
+func (cf *ChunkedFileReader) closePipe() (e error) {
+ if cf.pr != nil {
+ if err := cf.pr.Close(); err != nil {
+ e = err
+ }
+ }
+ cf.pr = nil
+ if cf.pw != nil {
+ if err := cf.pw.Close(); err != nil {
+ e = err
+ }
+ }
+ cf.pw = nil
+ return e
+}
+
+func (cf *ChunkedFileReader) getPipeReader() io.Reader {
+ cf.mutex.Lock()
+ defer cf.mutex.Unlock()
+ if cf.pr != nil && cf.pw != nil {
+ return cf.pr
+ }
+ cf.closePipe()
+ cf.pr, cf.pw = io.Pipe()
+ go func(pw *io.PipeWriter) {
+ _, e := cf.WriteTo(pw)
+ pw.CloseWithError(e)
+ }(cf.pw)
+ return cf.pr
+}
diff --git a/weed/operation/compress.go b/weed/operation/compress.go
new file mode 100644
index 000000000..de62e5bf7
--- /dev/null
+++ b/weed/operation/compress.go
@@ -0,0 +1,59 @@
+package operation
+
+import (
+ "bytes"
+ "compress/flate"
+ "compress/gzip"
+ "io/ioutil"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+/*
+* Default more not to gzip since gzip can be done on client side.
+ */
+func IsGzippable(ext, mtype string) bool {
+ if strings.HasPrefix(mtype, "text/") {
+ return true
+ }
+ switch ext {
+ case ".zip", ".rar", ".gz", ".bz2", ".xz":
+ return false
+ case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json":
+ return true
+ }
+ if strings.HasPrefix(mtype, "application/") {
+ if strings.HasSuffix(mtype, "xml") {
+ return true
+ }
+ if strings.HasSuffix(mtype, "script") {
+ return true
+ }
+ }
+ return false
+}
+
+func GzipData(input []byte) ([]byte, error) {
+ buf := new(bytes.Buffer)
+ w, _ := gzip.NewWriterLevel(buf, flate.BestCompression)
+ if _, err := w.Write(input); err != nil {
+ glog.V(2).Infoln("error compressing data:", err)
+ return nil, err
+ }
+ if err := w.Close(); err != nil {
+ glog.V(2).Infoln("error closing compressed data:", err)
+ return nil, err
+ }
+ return buf.Bytes(), nil
+}
+func UnGzipData(input []byte) ([]byte, error) {
+ buf := bytes.NewBuffer(input)
+ r, _ := gzip.NewReader(buf)
+ defer r.Close()
+ output, err := ioutil.ReadAll(r)
+ if err != nil {
+ glog.V(2).Infoln("error uncompressing data:", err)
+ }
+ return output, err
+}
diff --git a/weed/operation/data_struts.go b/weed/operation/data_struts.go
new file mode 100644
index 000000000..bfc53aa50
--- /dev/null
+++ b/weed/operation/data_struts.go
@@ -0,0 +1,7 @@
+package operation
+
+type JoinResult struct {
+ VolumeSizeLimit uint64 `json:"VolumeSizeLimit,omitempty"`
+ SecretKey string `json:"secretKey,omitempty"`
+ Error string `json:"error,omitempty"`
+}
diff --git a/weed/operation/delete_content.go b/weed/operation/delete_content.go
new file mode 100644
index 000000000..b78221da1
--- /dev/null
+++ b/weed/operation/delete_content.go
@@ -0,0 +1,117 @@
+package operation
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/url"
+ "strings"
+ "sync"
+
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type DeleteResult struct {
+ Fid string `json:"fid"`
+ Size int `json:"size"`
+ Status int `json:"status"`
+ Error string `json:"error,omitempty"`
+}
+
+func DeleteFile(master string, fileId string, jwt security.EncodedJwt) error {
+ fileUrl, err := LookupFileId(master, fileId)
+ if err != nil {
+ return fmt.Errorf("Failed to lookup %s:%v", fileId, err)
+ }
+ err = util.Delete(fileUrl, jwt)
+ if err != nil {
+ return fmt.Errorf("Failed to delete %s:%v", fileUrl, err)
+ }
+ return nil
+}
+
+func ParseFileId(fid string) (vid string, key_cookie string, err error) {
+ commaIndex := strings.Index(fid, ",")
+ if commaIndex <= 0 {
+ return "", "", errors.New("Wrong fid format.")
+ }
+ return fid[:commaIndex], fid[commaIndex+1:], nil
+}
+
+type DeleteFilesResult struct {
+ Errors []string
+ Results []DeleteResult
+}
+
+func DeleteFiles(master string, fileIds []string) (*DeleteFilesResult, error) {
+ vid_to_fileIds := make(map[string][]string)
+ ret := &DeleteFilesResult{}
+ var vids []string
+ for _, fileId := range fileIds {
+ vid, _, err := ParseFileId(fileId)
+ if err != nil {
+ ret.Results = append(ret.Results, DeleteResult{
+ Fid: vid,
+ Status: http.StatusBadRequest,
+ Error: err.Error()},
+ )
+ continue
+ }
+ if _, ok := vid_to_fileIds[vid]; !ok {
+ vid_to_fileIds[vid] = make([]string, 0)
+ vids = append(vids, vid)
+ }
+ vid_to_fileIds[vid] = append(vid_to_fileIds[vid], fileId)
+ }
+
+ lookupResults, err := LookupVolumeIds(master, vids)
+ if err != nil {
+ return ret, err
+ }
+
+ server_to_fileIds := make(map[string][]string)
+ for vid, result := range lookupResults {
+ if result.Error != "" {
+ ret.Errors = append(ret.Errors, result.Error)
+ continue
+ }
+ for _, location := range result.Locations {
+ if _, ok := server_to_fileIds[location.Url]; !ok {
+ server_to_fileIds[location.Url] = make([]string, 0)
+ }
+ server_to_fileIds[location.Url] = append(
+ server_to_fileIds[location.Url], vid_to_fileIds[vid]...)
+ }
+ }
+
+ var wg sync.WaitGroup
+
+ for server, fidList := range server_to_fileIds {
+ wg.Add(1)
+ go func(server string, fidList []string) {
+ defer wg.Done()
+ values := make(url.Values)
+ for _, fid := range fidList {
+ values.Add("fid", fid)
+ }
+ jsonBlob, err := util.Post("http://"+server+"/delete", values)
+ if err != nil {
+ ret.Errors = append(ret.Errors, err.Error()+" "+string(jsonBlob))
+ return
+ }
+ var result []DeleteResult
+ err = json.Unmarshal(jsonBlob, &result)
+ if err != nil {
+ ret.Errors = append(ret.Errors, err.Error()+" "+string(jsonBlob))
+ return
+ }
+ ret.Results = append(ret.Results, result...)
+ }(server, fidList)
+ }
+ wg.Wait()
+
+ return ret, nil
+}
diff --git a/weed/operation/list_masters.go b/weed/operation/list_masters.go
new file mode 100644
index 000000000..0a15b0af8
--- /dev/null
+++ b/weed/operation/list_masters.go
@@ -0,0 +1,32 @@
+package operation
+
+import (
+ "encoding/json"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type ClusterStatusResult struct {
+ IsLeader bool `json:"IsLeader,omitempty"`
+ Leader string `json:"Leader,omitempty"`
+ Peers []string `json:"Peers,omitempty"`
+}
+
+func ListMasters(server string) ([]string, error) {
+ jsonBlob, err := util.Get("http://" + server + "/cluster/status")
+ glog.V(2).Info("list masters result :", string(jsonBlob))
+ if err != nil {
+ return nil, err
+ }
+ var ret ClusterStatusResult
+ err = json.Unmarshal(jsonBlob, &ret)
+ if err != nil {
+ return nil, err
+ }
+ masters := ret.Peers
+ if ret.IsLeader {
+ masters = append(masters, ret.Leader)
+ }
+ return masters, nil
+}
diff --git a/weed/operation/lookup.go b/weed/operation/lookup.go
new file mode 100644
index 000000000..19d9dbb94
--- /dev/null
+++ b/weed/operation/lookup.go
@@ -0,0 +1,118 @@
+package operation
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "math/rand"
+ "net/url"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type Location struct {
+ Url string `json:"url,omitempty"`
+ PublicUrl string `json:"publicUrl,omitempty"`
+}
+type LookupResult struct {
+ VolumeId string `json:"volumeId,omitempty"`
+ Locations []Location `json:"locations,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+func (lr *LookupResult) String() string {
+ return fmt.Sprintf("VolumeId:%s, Locations:%v, Error:%s", lr.VolumeId, lr.Locations, lr.Error)
+}
+
+var (
+ vc VidCache // caching of volume locations, re-check if after 10 minutes
+)
+
+func Lookup(server string, vid string) (ret *LookupResult, err error) {
+ locations, cache_err := vc.Get(vid)
+ if cache_err != nil {
+ if ret, err = do_lookup(server, vid); err == nil {
+ vc.Set(vid, ret.Locations, 10*time.Minute)
+ }
+ } else {
+ ret = &LookupResult{VolumeId: vid, Locations: locations}
+ }
+ return
+}
+
+func do_lookup(server string, vid string) (*LookupResult, error) {
+ values := make(url.Values)
+ values.Add("volumeId", vid)
+ jsonBlob, err := util.Post("http://"+server+"/dir/lookup", values)
+ if err != nil {
+ return nil, err
+ }
+ var ret LookupResult
+ err = json.Unmarshal(jsonBlob, &ret)
+ if err != nil {
+ return nil, err
+ }
+ if ret.Error != "" {
+ return nil, errors.New(ret.Error)
+ }
+ return &ret, nil
+}
+
+func LookupFileId(server string, fileId string) (fullUrl string, err error) {
+ parts := strings.Split(fileId, ",")
+ if len(parts) != 2 {
+ return "", errors.New("Invalid fileId " + fileId)
+ }
+ lookup, lookupError := Lookup(server, parts[0])
+ if lookupError != nil {
+ return "", lookupError
+ }
+ if len(lookup.Locations) == 0 {
+ return "", errors.New("File Not Found")
+ }
+ return "http://" + lookup.Locations[rand.Intn(len(lookup.Locations))].Url + "/" + fileId, nil
+}
+
+// LookupVolumeIds find volume locations by cache and actual lookup
+func LookupVolumeIds(server string, vids []string) (map[string]LookupResult, error) {
+ ret := make(map[string]LookupResult)
+ var unknown_vids []string
+
+ //check vid cache first
+ for _, vid := range vids {
+ locations, cache_err := vc.Get(vid)
+ if cache_err == nil {
+ ret[vid] = LookupResult{VolumeId: vid, Locations: locations}
+ } else {
+ unknown_vids = append(unknown_vids, vid)
+ }
+ }
+ //return success if all volume ids are known
+ if len(unknown_vids) == 0 {
+ return ret, nil
+ }
+
+ //only query unknown_vids
+ values := make(url.Values)
+ for _, vid := range unknown_vids {
+ values.Add("volumeId", vid)
+ }
+ jsonBlob, err := util.Post("http://"+server+"/vol/lookup", values)
+ if err != nil {
+ return nil, err
+ }
+ err = json.Unmarshal(jsonBlob, &ret)
+ if err != nil {
+ return nil, errors.New(err.Error() + " " + string(jsonBlob))
+ }
+
+ //set newly checked vids to cache
+ for _, vid := range unknown_vids {
+ locations := ret[vid].Locations
+ vc.Set(vid, locations, 10*time.Minute)
+ }
+
+ return ret, nil
+}
diff --git a/weed/operation/lookup_vid_cache.go b/weed/operation/lookup_vid_cache.go
new file mode 100644
index 000000000..1ed03613d
--- /dev/null
+++ b/weed/operation/lookup_vid_cache.go
@@ -0,0 +1,51 @@
+package operation
+
+import (
+ "errors"
+ "strconv"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type VidInfo struct {
+ Locations []Location
+ NextRefreshTime time.Time
+}
+type VidCache struct {
+ cache []VidInfo
+}
+
+func (vc *VidCache) Get(vid string) ([]Location, error) {
+ id, err := strconv.Atoi(vid)
+ if err != nil {
+ glog.V(1).Infof("Unknown volume id %s", vid)
+ return nil, err
+ }
+ if 0 < id && id <= len(vc.cache) {
+ if vc.cache[id-1].Locations == nil {
+ return nil, errors.New("Not Set")
+ }
+ if vc.cache[id-1].NextRefreshTime.Before(time.Now()) {
+ return nil, errors.New("Expired")
+ }
+ return vc.cache[id-1].Locations, nil
+ }
+ return nil, errors.New("Not Found")
+}
+func (vc *VidCache) Set(vid string, locations []Location, duration time.Duration) {
+ id, err := strconv.Atoi(vid)
+ if err != nil {
+ glog.V(1).Infof("Unknown volume id %s", vid)
+ return
+ }
+ if id > len(vc.cache) {
+ for i := id - len(vc.cache); i > 0; i-- {
+ vc.cache = append(vc.cache, VidInfo{})
+ }
+ }
+ if id > 0 {
+ vc.cache[id-1].Locations = locations
+ vc.cache[id-1].NextRefreshTime = time.Now().Add(duration)
+ }
+}
diff --git a/weed/operation/lookup_vid_cache_test.go b/weed/operation/lookup_vid_cache_test.go
new file mode 100644
index 000000000..9c9e2affb
--- /dev/null
+++ b/weed/operation/lookup_vid_cache_test.go
@@ -0,0 +1,26 @@
+package operation
+
+import (
+ "fmt"
+ "testing"
+ "time"
+)
+
+func TestCaching(t *testing.T) {
+ var (
+ vc VidCache
+ )
+ var locations []Location
+ locations = append(locations, Location{Url: "a.com:8080"})
+ vc.Set("123", locations, time.Second)
+ ret, _ := vc.Get("123")
+ if ret == nil {
+ t.Fatal("Not found vid 123")
+ }
+ fmt.Printf("vid 123 locations = %v\n", ret)
+ time.Sleep(2 * time.Second)
+ ret, _ = vc.Get("123")
+ if ret != nil {
+ t.Fatal("Not found vid 123")
+ }
+}
diff --git a/weed/operation/submit.go b/weed/operation/submit.go
new file mode 100644
index 000000000..19bbd7a70
--- /dev/null
+++ b/weed/operation/submit.go
@@ -0,0 +1,194 @@
+package operation
+
+import (
+ "bytes"
+ "io"
+ "mime"
+ "net/url"
+ "os"
+ "path"
+ "strconv"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/security"
+)
+
+type FilePart struct {
+ Reader io.Reader
+ FileName string
+ FileSize int64
+ IsGzipped bool
+ MimeType string
+ ModTime int64 //in seconds
+ Replication string
+ Collection string
+ Ttl string
+ Server string //this comes from assign result
+ Fid string //this comes from assign result, but customizable
+}
+
+type SubmitResult struct {
+ FileName string `json:"fileName,omitempty"`
+ FileUrl string `json:"fileUrl,omitempty"`
+ Fid string `json:"fid,omitempty"`
+ Size uint32 `json:"size,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+func SubmitFiles(master string, files []FilePart,
+ replication string, collection string, ttl string, maxMB int,
+ secret security.Secret,
+) ([]SubmitResult, error) {
+ results := make([]SubmitResult, len(files))
+ for index, file := range files {
+ results[index].FileName = file.FileName
+ }
+ ret, err := Assign(master, uint64(len(files)), replication, collection, ttl)
+ if err != nil {
+ for index, _ := range files {
+ results[index].Error = err.Error()
+ }
+ return results, err
+ }
+ for index, file := range files {
+ file.Fid = ret.Fid
+ if index > 0 {
+ file.Fid = file.Fid + "_" + strconv.Itoa(index)
+ }
+ file.Server = ret.Url
+ file.Replication = replication
+ file.Collection = collection
+ results[index].Size, err = file.Upload(maxMB, master, secret)
+ if err != nil {
+ results[index].Error = err.Error()
+ }
+ results[index].Fid = file.Fid
+ results[index].FileUrl = ret.PublicUrl + "/" + file.Fid
+ }
+ return results, nil
+}
+
+func NewFileParts(fullPathFilenames []string) (ret []FilePart, err error) {
+ ret = make([]FilePart, len(fullPathFilenames))
+ for index, file := range fullPathFilenames {
+ if ret[index], err = newFilePart(file); err != nil {
+ return
+ }
+ }
+ return
+}
+func newFilePart(fullPathFilename string) (ret FilePart, err error) {
+ fh, openErr := os.Open(fullPathFilename)
+ if openErr != nil {
+ glog.V(0).Info("Failed to open file: ", fullPathFilename)
+ return ret, openErr
+ }
+ ret.Reader = fh
+
+ if fi, fiErr := fh.Stat(); fiErr != nil {
+ glog.V(0).Info("Failed to stat file:", fullPathFilename)
+ return ret, fiErr
+ } else {
+ ret.ModTime = fi.ModTime().UTC().Unix()
+ ret.FileSize = fi.Size()
+ }
+ ext := strings.ToLower(path.Ext(fullPathFilename))
+ ret.IsGzipped = ext == ".gz"
+ if ret.IsGzipped {
+ ret.FileName = fullPathFilename[0 : len(fullPathFilename)-3]
+ }
+ ret.FileName = fullPathFilename
+ if ext != "" {
+ ret.MimeType = mime.TypeByExtension(ext)
+ }
+
+ return ret, nil
+}
+
+func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (retSize uint32, err error) {
+ jwt := security.GenJwt(secret, fi.Fid)
+ fileUrl := "http://" + fi.Server + "/" + fi.Fid
+ if fi.ModTime != 0 {
+ fileUrl += "?ts=" + strconv.Itoa(int(fi.ModTime))
+ }
+ if closer, ok := fi.Reader.(io.Closer); ok {
+ defer closer.Close()
+ }
+ baseName := path.Base(fi.FileName)
+ if maxMB > 0 && fi.FileSize > int64(maxMB*1024*1024) {
+ chunkSize := int64(maxMB * 1024 * 1024)
+ chunks := fi.FileSize/chunkSize + 1
+ cm := ChunkManifest{
+ Name: baseName,
+ Size: fi.FileSize,
+ Mime: fi.MimeType,
+ Chunks: make([]*ChunkInfo, 0, chunks),
+ }
+
+ for i := int64(0); i < chunks; i++ {
+ id, count, e := upload_one_chunk(
+ baseName+"-"+strconv.FormatInt(i+1, 10),
+ io.LimitReader(fi.Reader, chunkSize),
+ master, fi.Replication, fi.Collection, fi.Ttl,
+ jwt)
+ if e != nil {
+ // delete all uploaded chunks
+ cm.DeleteChunks(master)
+ return 0, e
+ }
+ cm.Chunks = append(cm.Chunks,
+ &ChunkInfo{
+ Offset: i * chunkSize,
+ Size: int64(count),
+ Fid: id,
+ },
+ )
+ retSize += count
+ }
+ err = upload_chunked_file_manifest(fileUrl, &cm, jwt)
+ if err != nil {
+ // delete all uploaded chunks
+ cm.DeleteChunks(master)
+ }
+ } else {
+ ret, e := Upload(fileUrl, baseName, fi.Reader, fi.IsGzipped, fi.MimeType, jwt)
+ if e != nil {
+ return 0, e
+ }
+ return ret.Size, e
+ }
+ return
+}
+
+func upload_one_chunk(filename string, reader io.Reader, master,
+ replication string, collection string, ttl string, jwt security.EncodedJwt,
+) (fid string, size uint32, e error) {
+ ret, err := Assign(master, 1, replication, collection, ttl)
+ if err != nil {
+ return "", 0, err
+ }
+ fileUrl, fid := "http://"+ret.Url+"/"+ret.Fid, ret.Fid
+ glog.V(4).Info("Uploading part ", filename, " to ", fileUrl, "...")
+ uploadResult, uploadError := Upload(fileUrl, filename, reader, false,
+ "application/octet-stream", jwt)
+ if uploadError != nil {
+ return fid, 0, uploadError
+ }
+ return fid, uploadResult.Size, nil
+}
+
+func upload_chunked_file_manifest(fileUrl string, manifest *ChunkManifest, jwt security.EncodedJwt) error {
+ buf, e := manifest.Marshal()
+ if e != nil {
+ return e
+ }
+ bufReader := bytes.NewReader(buf)
+ glog.V(4).Info("Uploading chunks manifest ", manifest.Name, " to ", fileUrl, "...")
+ u, _ := url.Parse(fileUrl)
+ q := u.Query()
+ q.Set("cm", "true")
+ u.RawQuery = q.Encode()
+ _, e = Upload(u.String(), manifest.Name, bufReader, false, "application/json", jwt)
+ return e
+}
diff --git a/weed/operation/sync_volume.go b/weed/operation/sync_volume.go
new file mode 100644
index 000000000..b7a727fc7
--- /dev/null
+++ b/weed/operation/sync_volume.go
@@ -0,0 +1,54 @@
+package operation
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/url"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type SyncVolumeResponse struct {
+ Replication string `json:"Replication,omitempty"`
+ Ttl string `json:"Ttl,omitempty"`
+ TailOffset uint64 `json:"TailOffset,omitempty"`
+ CompactRevision uint16 `json:"CompactRevision,omitempty"`
+ IdxFileSize uint64 `json:"IdxFileSize,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+func GetVolumeSyncStatus(server string, vid string) (*SyncVolumeResponse, error) {
+ values := make(url.Values)
+ values.Add("volume", vid)
+ jsonBlob, err := util.Post("http://"+server+"/admin/sync/status", values)
+ glog.V(2).Info("sync volume result :", string(jsonBlob))
+ if err != nil {
+ return nil, err
+ }
+ var ret SyncVolumeResponse
+ err = json.Unmarshal(jsonBlob, &ret)
+ if err != nil {
+ return nil, err
+ }
+ if ret.Error != "" {
+ return nil, fmt.Errorf("Volume %s get sync status error: %s", vid, ret.Error)
+ }
+ return &ret, nil
+}
+
+func GetVolumeIdxEntries(server string, vid string, eachEntryFn func(key uint64, offset, size uint32)) error {
+ values := make(url.Values)
+ values.Add("volume", vid)
+ line := make([]byte, 16)
+ err := util.GetBufferStream("http://"+server+"/admin/sync/index", values, line, func(bytes []byte) {
+ key := util.BytesToUint64(bytes[:8])
+ offset := util.BytesToUint32(bytes[8:12])
+ size := util.BytesToUint32(bytes[12:16])
+ eachEntryFn(key, offset, size)
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/weed/operation/system_message.pb.go b/weed/operation/system_message.pb.go
new file mode 100644
index 000000000..742a1ca4e
--- /dev/null
+++ b/weed/operation/system_message.pb.go
@@ -0,0 +1,203 @@
+// Code generated by protoc-gen-go.
+// source: system_message.proto
+// DO NOT EDIT!
+
+/*
+Package operation is a generated protocol buffer package.
+
+It is generated from these files:
+ system_message.proto
+
+It has these top-level messages:
+ VolumeInformationMessage
+ JoinMessage
+*/
+package operation
+
+import proto "github.com/golang/protobuf/proto"
+import math "math"
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = math.Inf
+
+type VolumeInformationMessage struct {
+ Id *uint32 `protobuf:"varint,1,req,name=id" json:"id,omitempty"`
+ Size *uint64 `protobuf:"varint,2,req,name=size" json:"size,omitempty"`
+ Collection *string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"`
+ FileCount *uint64 `protobuf:"varint,4,req,name=file_count" json:"file_count,omitempty"`
+ DeleteCount *uint64 `protobuf:"varint,5,req,name=delete_count" json:"delete_count,omitempty"`
+ DeletedByteCount *uint64 `protobuf:"varint,6,req,name=deleted_byte_count" json:"deleted_byte_count,omitempty"`
+ ReadOnly *bool `protobuf:"varint,7,opt,name=read_only" json:"read_only,omitempty"`
+ ReplicaPlacement *uint32 `protobuf:"varint,8,req,name=replica_placement" json:"replica_placement,omitempty"`
+ Version *uint32 `protobuf:"varint,9,opt,name=version,def=2" json:"version,omitempty"`
+ Ttl *uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"`
+ XXX_unrecognized []byte `json:"-"`
+}
+
+func (m *VolumeInformationMessage) Reset() { *m = VolumeInformationMessage{} }
+func (m *VolumeInformationMessage) String() string { return proto.CompactTextString(m) }
+func (*VolumeInformationMessage) ProtoMessage() {}
+
+const Default_VolumeInformationMessage_Version uint32 = 2
+
+func (m *VolumeInformationMessage) GetId() uint32 {
+ if m != nil && m.Id != nil {
+ return *m.Id
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetSize() uint64 {
+ if m != nil && m.Size != nil {
+ return *m.Size
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetCollection() string {
+ if m != nil && m.Collection != nil {
+ return *m.Collection
+ }
+ return ""
+}
+
+func (m *VolumeInformationMessage) GetFileCount() uint64 {
+ if m != nil && m.FileCount != nil {
+ return *m.FileCount
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetDeleteCount() uint64 {
+ if m != nil && m.DeleteCount != nil {
+ return *m.DeleteCount
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetDeletedByteCount() uint64 {
+ if m != nil && m.DeletedByteCount != nil {
+ return *m.DeletedByteCount
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetReadOnly() bool {
+ if m != nil && m.ReadOnly != nil {
+ return *m.ReadOnly
+ }
+ return false
+}
+
+func (m *VolumeInformationMessage) GetReplicaPlacement() uint32 {
+ if m != nil && m.ReplicaPlacement != nil {
+ return *m.ReplicaPlacement
+ }
+ return 0
+}
+
+func (m *VolumeInformationMessage) GetVersion() uint32 {
+ if m != nil && m.Version != nil {
+ return *m.Version
+ }
+ return Default_VolumeInformationMessage_Version
+}
+
+func (m *VolumeInformationMessage) GetTtl() uint32 {
+ if m != nil && m.Ttl != nil {
+ return *m.Ttl
+ }
+ return 0
+}
+
+type JoinMessage struct {
+ IsInit *bool `protobuf:"varint,1,opt,name=is_init" json:"is_init,omitempty"`
+ Ip *string `protobuf:"bytes,2,req,name=ip" json:"ip,omitempty"`
+ Port *uint32 `protobuf:"varint,3,req,name=port" json:"port,omitempty"`
+ PublicUrl *string `protobuf:"bytes,4,opt,name=public_url" json:"public_url,omitempty"`
+ MaxVolumeCount *uint32 `protobuf:"varint,5,req,name=max_volume_count" json:"max_volume_count,omitempty"`
+ MaxFileKey *uint64 `protobuf:"varint,6,req,name=max_file_key" json:"max_file_key,omitempty"`
+ DataCenter *string `protobuf:"bytes,7,opt,name=data_center" json:"data_center,omitempty"`
+ Rack *string `protobuf:"bytes,8,opt,name=rack" json:"rack,omitempty"`
+ Volumes []*VolumeInformationMessage `protobuf:"bytes,9,rep,name=volumes" json:"volumes,omitempty"`
+ AdminPort *uint32 `protobuf:"varint,10,opt,name=admin_port" json:"admin_port,omitempty"`
+ XXX_unrecognized []byte `json:"-"`
+}
+
+func (m *JoinMessage) Reset() { *m = JoinMessage{} }
+func (m *JoinMessage) String() string { return proto.CompactTextString(m) }
+func (*JoinMessage) ProtoMessage() {}
+
+func (m *JoinMessage) GetIsInit() bool {
+ if m != nil && m.IsInit != nil {
+ return *m.IsInit
+ }
+ return false
+}
+
+func (m *JoinMessage) GetIp() string {
+ if m != nil && m.Ip != nil {
+ return *m.Ip
+ }
+ return ""
+}
+
+func (m *JoinMessage) GetPort() uint32 {
+ if m != nil && m.Port != nil {
+ return *m.Port
+ }
+ return 0
+}
+
+func (m *JoinMessage) GetPublicUrl() string {
+ if m != nil && m.PublicUrl != nil {
+ return *m.PublicUrl
+ }
+ return ""
+}
+
+func (m *JoinMessage) GetMaxVolumeCount() uint32 {
+ if m != nil && m.MaxVolumeCount != nil {
+ return *m.MaxVolumeCount
+ }
+ return 0
+}
+
+func (m *JoinMessage) GetMaxFileKey() uint64 {
+ if m != nil && m.MaxFileKey != nil {
+ return *m.MaxFileKey
+ }
+ return 0
+}
+
+func (m *JoinMessage) GetDataCenter() string {
+ if m != nil && m.DataCenter != nil {
+ return *m.DataCenter
+ }
+ return ""
+}
+
+func (m *JoinMessage) GetRack() string {
+ if m != nil && m.Rack != nil {
+ return *m.Rack
+ }
+ return ""
+}
+
+func (m *JoinMessage) GetVolumes() []*VolumeInformationMessage {
+ if m != nil {
+ return m.Volumes
+ }
+ return nil
+}
+
+func (m *JoinMessage) GetAdminPort() uint32 {
+ if m != nil && m.AdminPort != nil {
+ return *m.AdminPort
+ }
+ return 0
+}
+
+func init() {
+}
diff --git a/weed/operation/system_message_test.go b/weed/operation/system_message_test.go
new file mode 100644
index 000000000..d18ca49a4
--- /dev/null
+++ b/weed/operation/system_message_test.go
@@ -0,0 +1,59 @@
+package operation
+
+import (
+ "encoding/json"
+ "log"
+ "testing"
+
+ "github.com/golang/protobuf/proto"
+)
+
+func TestSerialDeserial(t *testing.T) {
+ volumeMessage := &VolumeInformationMessage{
+ Id: proto.Uint32(12),
+ Size: proto.Uint64(2341234),
+ Collection: proto.String("benchmark"),
+ FileCount: proto.Uint64(2341234),
+ DeleteCount: proto.Uint64(234),
+ DeletedByteCount: proto.Uint64(21234),
+ ReadOnly: proto.Bool(false),
+ ReplicaPlacement: proto.Uint32(210),
+ Version: proto.Uint32(2),
+ }
+ var volumeMessages []*VolumeInformationMessage
+ volumeMessages = append(volumeMessages, volumeMessage)
+
+ joinMessage := &JoinMessage{
+ IsInit: proto.Bool(true),
+ Ip: proto.String("127.0.3.12"),
+ Port: proto.Uint32(34546),
+ PublicUrl: proto.String("localhost:2342"),
+ MaxVolumeCount: proto.Uint32(210),
+ MaxFileKey: proto.Uint64(324234423),
+ DataCenter: proto.String("dc1"),
+ Rack: proto.String("rack2"),
+ Volumes: volumeMessages,
+ }
+
+ data, err := proto.Marshal(joinMessage)
+ if err != nil {
+ log.Fatal("marshaling error: ", err)
+ }
+ newMessage := &JoinMessage{}
+ err = proto.Unmarshal(data, newMessage)
+ if err != nil {
+ log.Fatal("unmarshaling error: ", err)
+ }
+ log.Println("The pb data size is", len(data))
+
+ jsonData, jsonError := json.Marshal(joinMessage)
+ if jsonError != nil {
+ log.Fatal("json marshaling error: ", jsonError)
+ }
+ log.Println("The json data size is", len(jsonData), string(jsonData))
+
+ // Now test and newTest contain the same data.
+ if *joinMessage.PublicUrl != *newMessage.PublicUrl {
+ log.Fatalf("data mismatch %q != %q", *joinMessage.PublicUrl, *newMessage.PublicUrl)
+ }
+}
diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go
new file mode 100644
index 000000000..a87784cad
--- /dev/null
+++ b/weed/operation/upload_content.go
@@ -0,0 +1,96 @@
+package operation
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "mime"
+ "mime/multipart"
+ "net/http"
+ "net/textproto"
+ "path/filepath"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/security"
+)
+
+type UploadResult struct {
+ Name string `json:"name,omitempty"`
+ Size uint32 `json:"size,omitempty"`
+ Error string `json:"error,omitempty"`
+}
+
+var (
+ client *http.Client
+)
+
+func init() {
+ client = &http.Client{Transport: &http.Transport{
+ MaxIdleConnsPerHost: 1024,
+ }}
+}
+
+var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"")
+
+func Upload(uploadUrl string, filename string, reader io.Reader, isGzipped bool, mtype string, jwt security.EncodedJwt) (*UploadResult, error) {
+ return upload_content(uploadUrl, func(w io.Writer) (err error) {
+ _, err = io.Copy(w, reader)
+ return
+ }, filename, isGzipped, mtype, jwt)
+}
+func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error, filename string, isGzipped bool, mtype string, jwt security.EncodedJwt) (*UploadResult, error) {
+ body_buf := bytes.NewBufferString("")
+ body_writer := multipart.NewWriter(body_buf)
+ h := make(textproto.MIMEHeader)
+ h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, fileNameEscaper.Replace(filename)))
+ if mtype == "" {
+ mtype = mime.TypeByExtension(strings.ToLower(filepath.Ext(filename)))
+ }
+ if mtype != "" {
+ h.Set("Content-Type", mtype)
+ }
+ if isGzipped {
+ h.Set("Content-Encoding", "gzip")
+ }
+ if jwt != "" {
+ h.Set("Authorization", "BEARER "+string(jwt))
+ }
+ file_writer, cp_err := body_writer.CreatePart(h)
+ if cp_err != nil {
+ glog.V(0).Infoln("error creating form file", cp_err.Error())
+ return nil, cp_err
+ }
+ if err := fillBufferFunction(file_writer); err != nil {
+ glog.V(0).Infoln("error copying data", err)
+ return nil, err
+ }
+ content_type := body_writer.FormDataContentType()
+ if err := body_writer.Close(); err != nil {
+ glog.V(0).Infoln("error closing body", err)
+ return nil, err
+ }
+ resp, post_err := client.Post(uploadUrl, content_type, body_buf)
+ if post_err != nil {
+ glog.V(0).Infoln("failing to upload to", uploadUrl, post_err.Error())
+ return nil, post_err
+ }
+ defer resp.Body.Close()
+ resp_body, ra_err := ioutil.ReadAll(resp.Body)
+ if ra_err != nil {
+ return nil, ra_err
+ }
+ var ret UploadResult
+ unmarshal_err := json.Unmarshal(resp_body, &ret)
+ if unmarshal_err != nil {
+ glog.V(0).Infoln("failing to read upload resonse", uploadUrl, string(resp_body))
+ return nil, unmarshal_err
+ }
+ if ret.Error != "" {
+ return nil, errors.New(ret.Error)
+ }
+ return &ret, nil
+}
diff --git a/weed/proto/Makefile b/weed/proto/Makefile
new file mode 100644
index 000000000..73af851dd
--- /dev/null
+++ b/weed/proto/Makefile
@@ -0,0 +1,4 @@
+TARG=../operation
+
+all:
+ protoc --go_out=$(TARG) system_message.proto
diff --git a/weed/proto/system_message.proto b/weed/proto/system_message.proto
new file mode 100644
index 000000000..548360b27
--- /dev/null
+++ b/weed/proto/system_message.proto
@@ -0,0 +1,27 @@
+package operation;
+
+message VolumeInformationMessage {
+ required uint32 id = 1;
+ required uint64 size = 2;
+ optional string collection = 3;
+ required uint64 file_count = 4;
+ required uint64 delete_count = 5;
+ required uint64 deleted_byte_count = 6;
+ optional bool read_only = 7;
+ required uint32 replica_placement = 8;
+ optional uint32 version = 9 [default=2];
+ optional uint32 ttl = 10;
+}
+
+message JoinMessage {
+ optional bool is_init = 1;
+ required string ip = 2;
+ required uint32 port = 3;
+ optional string public_url = 4;
+ required uint32 max_volume_count = 5;
+ required uint64 max_file_key = 6;
+ optional string data_center = 7;
+ optional string rack = 8;
+ repeated VolumeInformationMessage volumes = 9;
+ optional uint32 admin_port = 10;
+}
diff --git a/weed/security/guard.go b/weed/security/guard.go
new file mode 100644
index 000000000..76f64cc0e
--- /dev/null
+++ b/weed/security/guard.go
@@ -0,0 +1,162 @@
+package security
+
+import (
+ "errors"
+ "fmt"
+ "net"
+ "net/http"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+var (
+ ErrUnauthorized = errors.New("unauthorized token")
+)
+
+/*
+Guard is to ensure data access security.
+There are 2 ways to check access:
+1. white list. It's checking request ip address.
+2. JSON Web Token(JWT) generated from secretKey.
+ The jwt can come from:
+ 1. url parameter jwt=...
+ 2. request header "Authorization"
+ 3. cookie with the name "jwt"
+
+The white list is checked first because it is easy.
+Then the JWT is checked.
+
+The Guard will also check these claims if provided:
+1. "exp" Expiration Time
+2. "nbf" Not Before
+
+Generating JWT:
+1. use HS256 to sign
+2. optionally set "exp", "nbf" fields, in Unix time,
+ the number of seconds elapsed since January 1, 1970 UTC.
+
+Referenced:
+https://github.com/pkieltyka/jwtauth/blob/master/jwtauth.go
+
+*/
+type Guard struct {
+ whiteList []string
+ SecretKey Secret
+
+ isActive bool
+}
+
+func NewGuard(whiteList []string, secretKey string) *Guard {
+ g := &Guard{whiteList: whiteList, SecretKey: Secret(secretKey)}
+ g.isActive = len(g.whiteList) != 0 || len(g.SecretKey) != 0
+ return g
+}
+
+func (g *Guard) WhiteList(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) {
+ if !g.isActive {
+ //if no security needed, just skip all checkings
+ return f
+ }
+ return func(w http.ResponseWriter, r *http.Request) {
+ if err := g.checkWhiteList(w, r); err != nil {
+ w.WriteHeader(http.StatusUnauthorized)
+ return
+ }
+ f(w, r)
+ }
+}
+
+func (g *Guard) Secure(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) {
+ if !g.isActive {
+ //if no security needed, just skip all checkings
+ return f
+ }
+ return func(w http.ResponseWriter, r *http.Request) {
+ if err := g.checkJwt(w, r); err != nil {
+ w.WriteHeader(http.StatusUnauthorized)
+ return
+ }
+ f(w, r)
+ }
+}
+
+func GetActualRemoteHost(r *http.Request) (host string, err error) {
+ host = r.Header.Get("HTTP_X_FORWARDED_FOR")
+ if host == "" {
+ host = r.Header.Get("X-FORWARDED-FOR")
+ }
+ if strings.Contains(host, ",") {
+ host = host[0:strings.Index(host, ",")]
+ }
+ if host == "" {
+ host, _, err = net.SplitHostPort(r.RemoteAddr)
+ }
+ return
+}
+
+func (g *Guard) checkWhiteList(w http.ResponseWriter, r *http.Request) error {
+ if len(g.whiteList) == 0 {
+ return nil
+ }
+
+ host, err := GetActualRemoteHost(r)
+ if err == nil {
+ for _, ip := range g.whiteList {
+
+ // If the whitelist entry contains a "/" it
+ // is a CIDR range, and we should check the
+ // remote host is within it
+ if strings.Contains(ip, "/") {
+ _, cidrnet, err := net.ParseCIDR(ip)
+ if err != nil {
+ panic(err)
+ }
+ remote := net.ParseIP(host)
+ if cidrnet.Contains(remote) {
+ return nil
+ }
+ }
+
+ //
+ // Otherwise we're looking for a literal match.
+ //
+ if ip == host {
+ return nil
+ }
+ }
+ }
+
+ glog.V(1).Infof("Not in whitelist: %s", r.RemoteAddr)
+ return fmt.Errorf("Not in whitelis: %s", r.RemoteAddr)
+}
+
+func (g *Guard) checkJwt(w http.ResponseWriter, r *http.Request) error {
+ if g.checkWhiteList(w, r) == nil {
+ return nil
+ }
+
+ if len(g.SecretKey) == 0 {
+ return nil
+ }
+
+ tokenStr := GetJwt(r)
+
+ if tokenStr == "" {
+ return ErrUnauthorized
+ }
+
+ // Verify the token
+ token, err := DecodeJwt(g.SecretKey, tokenStr)
+ if err != nil {
+ glog.V(1).Infof("Token verification error from %s: %v", r.RemoteAddr, err)
+ return ErrUnauthorized
+ }
+ if !token.Valid {
+ glog.V(1).Infof("Token invliad from %s: %v", r.RemoteAddr, tokenStr)
+ return ErrUnauthorized
+ }
+
+ glog.V(1).Infof("No permission from %s", r.RemoteAddr)
+ return fmt.Errorf("No write permisson from %s", r.RemoteAddr)
+}
diff --git a/weed/security/jwt.go b/weed/security/jwt.go
new file mode 100644
index 000000000..a2472ca6e
--- /dev/null
+++ b/weed/security/jwt.go
@@ -0,0 +1,72 @@
+package security
+
+import (
+ "net/http"
+ "strings"
+
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ jwt "github.com/dgrijalva/jwt-go"
+)
+
+type EncodedJwt string
+type Secret string
+
+func GenJwt(secret Secret, fileId string) EncodedJwt {
+ if secret == "" {
+ return ""
+ }
+
+ t := jwt.New(jwt.GetSigningMethod("HS256"))
+ t.Claims["exp"] = time.Now().Unix() + 10
+ t.Claims["sub"] = fileId
+ encoded, e := t.SignedString(secret)
+ if e != nil {
+ glog.V(0).Infof("Failed to sign claims: %v", t.Claims)
+ return ""
+ }
+ return EncodedJwt(encoded)
+}
+
+func GetJwt(r *http.Request) EncodedJwt {
+
+ // Get token from query params
+ tokenStr := r.URL.Query().Get("jwt")
+
+ // Get token from authorization header
+ if tokenStr == "" {
+ bearer := r.Header.Get("Authorization")
+ if len(bearer) > 7 && strings.ToUpper(bearer[0:6]) == "BEARER" {
+ tokenStr = bearer[7:]
+ }
+ }
+
+ // Get token from cookie
+ if tokenStr == "" {
+ cookie, err := r.Cookie("jwt")
+ if err == nil {
+ tokenStr = cookie.Value
+ }
+ }
+
+ return EncodedJwt(tokenStr)
+}
+
+func EncodeJwt(secret Secret, claims map[string]interface{}) (EncodedJwt, error) {
+ if secret == "" {
+ return "", nil
+ }
+
+ t := jwt.New(jwt.GetSigningMethod("HS256"))
+ t.Claims = claims
+ encoded, e := t.SignedString(secret)
+ return EncodedJwt(encoded), e
+}
+
+func DecodeJwt(secret Secret, tokenString EncodedJwt) (token *jwt.Token, err error) {
+ // check exp, nbf
+ return jwt.Parse(string(tokenString), func(token *jwt.Token) (interface{}, error) {
+ return secret, nil
+ })
+}
diff --git a/weed/sequence/memory_sequencer.go b/weed/sequence/memory_sequencer.go
new file mode 100644
index 000000000..d727dc723
--- /dev/null
+++ b/weed/sequence/memory_sequencer.go
@@ -0,0 +1,36 @@
+package sequence
+
+import (
+ "sync"
+)
+
+// just for testing
+type MemorySequencer struct {
+ counter uint64
+ sequenceLock sync.Mutex
+}
+
+func NewMemorySequencer() (m *MemorySequencer) {
+ m = &MemorySequencer{counter: 1}
+ return
+}
+
+func (m *MemorySequencer) NextFileId(count uint64) (uint64, uint64) {
+ m.sequenceLock.Lock()
+ defer m.sequenceLock.Unlock()
+ ret := m.counter
+ m.counter += uint64(count)
+ return ret, count
+}
+
+func (m *MemorySequencer) SetMax(seenValue uint64) {
+ m.sequenceLock.Lock()
+ defer m.sequenceLock.Unlock()
+ if m.counter <= seenValue {
+ m.counter = seenValue + 1
+ }
+}
+
+func (m *MemorySequencer) Peek() uint64 {
+ return m.counter
+}
diff --git a/weed/sequence/sequence.go b/weed/sequence/sequence.go
new file mode 100644
index 000000000..fbdc3b8ef
--- /dev/null
+++ b/weed/sequence/sequence.go
@@ -0,0 +1,7 @@
+package sequence
+
+type Sequencer interface {
+ NextFileId(count uint64) (uint64, uint64)
+ SetMax(uint64)
+ Peek() uint64
+}
diff --git a/weed/server/common.go b/weed/server/common.go
new file mode 100644
index 000000000..312bcea14
--- /dev/null
+++ b/weed/server/common.go
@@ -0,0 +1,179 @@
+package weed_server
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/http"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/stats"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var serverStats *stats.ServerStats
+var startTime = time.Now()
+
+func init() {
+ serverStats = stats.NewServerStats()
+ go serverStats.Start()
+
+}
+
+func writeJson(w http.ResponseWriter, r *http.Request, httpStatus int, obj interface{}) (err error) {
+ var bytes []byte
+ if r.FormValue("pretty") != "" {
+ bytes, err = json.MarshalIndent(obj, "", " ")
+ } else {
+ bytes, err = json.Marshal(obj)
+ }
+ if err != nil {
+ return
+ }
+ callback := r.FormValue("callback")
+ if callback == "" {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(httpStatus)
+ _, err = w.Write(bytes)
+ } else {
+ w.Header().Set("Content-Type", "application/javascript")
+ w.WriteHeader(httpStatus)
+ if _, err = w.Write([]uint8(callback)); err != nil {
+ return
+ }
+ if _, err = w.Write([]uint8("(")); err != nil {
+ return
+ }
+ fmt.Fprint(w, string(bytes))
+ if _, err = w.Write([]uint8(")")); err != nil {
+ return
+ }
+ }
+
+ return
+}
+
+// wrapper for writeJson - just logs errors
+func writeJsonQuiet(w http.ResponseWriter, r *http.Request, httpStatus int, obj interface{}) {
+ if err := writeJson(w, r, httpStatus, obj); err != nil {
+ glog.V(0).Infof("error writing JSON %s: %v", obj, err)
+ }
+}
+func writeJsonError(w http.ResponseWriter, r *http.Request, httpStatus int, err error) {
+ m := make(map[string]interface{})
+ m["error"] = err.Error()
+ writeJsonQuiet(w, r, httpStatus, m)
+}
+
+func debug(params ...interface{}) {
+ glog.V(4).Infoln(params)
+}
+
+func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string) {
+ jwt := security.GetJwt(r)
+ m := make(map[string]interface{})
+ if r.Method != "POST" {
+ writeJsonError(w, r, http.StatusMethodNotAllowed, errors.New("Only submit via POST!"))
+ return
+ }
+
+ debug("parsing upload file...")
+ fname, data, mimeType, isGzipped, lastModified, _, _, pe := storage.ParseUpload(r)
+ if pe != nil {
+ writeJsonError(w, r, http.StatusBadRequest, pe)
+ return
+ }
+
+ debug("assigning file id for", fname)
+ r.ParseForm()
+ assignResult, ae := operation.Assign(masterUrl, 1, r.FormValue("replication"), r.FormValue("collection"), r.FormValue("ttl"))
+ if ae != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, ae)
+ return
+ }
+
+ url := "http://" + assignResult.Url + "/" + assignResult.Fid
+ if lastModified != 0 {
+ url = url + "?ts=" + strconv.FormatUint(lastModified, 10)
+ }
+
+ debug("upload file to store", url)
+ uploadResult, err := operation.Upload(url, fname, bytes.NewReader(data), isGzipped, mimeType, jwt)
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ }
+
+ m["fileName"] = fname
+ m["fid"] = assignResult.Fid
+ m["fileUrl"] = assignResult.PublicUrl + "/" + assignResult.Fid
+ m["size"] = uploadResult.Size
+ writeJsonQuiet(w, r, http.StatusCreated, m)
+ return
+}
+
+func deleteForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string) {
+ r.ParseForm()
+ fids := r.Form["fid"]
+ ret, err := operation.DeleteFiles(masterUrl, fids)
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ }
+ writeJsonQuiet(w, r, http.StatusAccepted, ret)
+}
+
+func parseURLPath(path string) (vid, fid, filename, ext string, isVolumeIdOnly bool) {
+ switch strings.Count(path, "/") {
+ case 3:
+ parts := strings.Split(path, "/")
+ vid, fid, filename = parts[1], parts[2], parts[3]
+ ext = filepath.Ext(filename)
+ case 2:
+ parts := strings.Split(path, "/")
+ vid, fid = parts[1], parts[2]
+ dotIndex := strings.LastIndex(fid, ".")
+ if dotIndex > 0 {
+ ext = fid[dotIndex:]
+ fid = fid[0:dotIndex]
+ }
+ default:
+ sepIndex := strings.LastIndex(path, "/")
+ commaIndex := strings.LastIndex(path[sepIndex:], ",")
+ if commaIndex <= 0 {
+ vid, isVolumeIdOnly = path[sepIndex+1:], true
+ return
+ }
+ dotIndex := strings.LastIndex(path[sepIndex:], ".")
+ vid = path[sepIndex+1 : commaIndex]
+ fid = path[commaIndex+1:]
+ ext = ""
+ if dotIndex > 0 {
+ fid = path[commaIndex+1 : dotIndex]
+ ext = path[dotIndex:]
+ }
+ }
+ return
+}
+
+func statsCounterHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ m["Counters"] = serverStats
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
+
+func statsMemoryHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ m["Memory"] = stats.MemStat()
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go
new file mode 100644
index 000000000..ee7eaf886
--- /dev/null
+++ b/weed/server/filer_server.go
@@ -0,0 +1,67 @@
+package weed_server
+
+import (
+ "net/http"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/filer"
+ "github.com/chrislusf/seaweedfs/weed/filer/cassandra_store"
+ "github.com/chrislusf/seaweedfs/weed/filer/embedded_filer"
+ "github.com/chrislusf/seaweedfs/weed/filer/flat_namespace"
+ "github.com/chrislusf/seaweedfs/weed/filer/redis_store"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/security"
+)
+
+type FilerServer struct {
+ port string
+ master string
+ collection string
+ defaultReplication string
+ redirectOnRead bool
+ disableDirListing bool
+ secret security.Secret
+ filer filer.Filer
+}
+
+func NewFilerServer(r *http.ServeMux, ip string, port int, master string, dir string, collection string,
+ replication string, redirectOnRead bool, disableDirListing bool,
+ secret string,
+ cassandra_server string, cassandra_keyspace string,
+ redis_server string, redis_password string, redis_database int,
+) (fs *FilerServer, err error) {
+ fs = &FilerServer{
+ master: master,
+ collection: collection,
+ defaultReplication: replication,
+ redirectOnRead: redirectOnRead,
+ disableDirListing: disableDirListing,
+ port: ip + ":" + strconv.Itoa(port),
+ }
+
+ if cassandra_server != "" {
+ cassandra_store, err := cassandra_store.NewCassandraStore(cassandra_keyspace, cassandra_server)
+ if err != nil {
+ glog.Fatalf("Can not connect to cassandra server %s with keyspace %s: %v", cassandra_server, cassandra_keyspace, err)
+ }
+ fs.filer = flat_namespace.NewFlatNamespaceFiler(master, cassandra_store)
+ } else if redis_server != "" {
+ redis_store := redis_store.NewRedisStore(redis_server, redis_password, redis_database)
+ fs.filer = flat_namespace.NewFlatNamespaceFiler(master, redis_store)
+ } else {
+ if fs.filer, err = embedded_filer.NewFilerEmbedded(master, dir); err != nil {
+ glog.Fatalf("Can not start filer in dir %s : %v", dir, err)
+ return
+ }
+
+ r.HandleFunc("/admin/mv", fs.moveHandler)
+ }
+
+ r.HandleFunc("/", fs.filerHandler)
+
+ return fs, nil
+}
+
+func (fs *FilerServer) jwt(fileId string) security.EncodedJwt {
+ return security.GenJwt(fs.secret, fileId)
+}
diff --git a/weed/server/filer_server_handlers.go b/weed/server/filer_server_handlers.go
new file mode 100644
index 000000000..d6b98976b
--- /dev/null
+++ b/weed/server/filer_server_handlers.go
@@ -0,0 +1,265 @@
+package weed_server
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "net/url"
+ "strconv"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/syndtr/goleveldb/leveldb"
+)
+
+func (fs *FilerServer) filerHandler(w http.ResponseWriter, r *http.Request) {
+ switch r.Method {
+ case "GET":
+ fs.GetOrHeadHandler(w, r, true)
+ case "HEAD":
+ fs.GetOrHeadHandler(w, r, false)
+ case "DELETE":
+ fs.DeleteHandler(w, r)
+ case "PUT":
+ fs.PostHandler(w, r)
+ case "POST":
+ fs.PostHandler(w, r)
+ }
+}
+
+// listDirectoryHandler lists directories and folers under a directory
+// files are sorted by name and paginated via "lastFileName" and "limit".
+// sub directories are listed on the first page, when "lastFileName"
+// is empty.
+func (fs *FilerServer) listDirectoryHandler(w http.ResponseWriter, r *http.Request) {
+ if !strings.HasSuffix(r.URL.Path, "/") {
+ return
+ }
+ dirlist, err := fs.filer.ListDirectories(r.URL.Path)
+ if err == leveldb.ErrNotFound {
+ glog.V(3).Infoln("Directory Not Found in db", r.URL.Path)
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ m := make(map[string]interface{})
+ m["Directory"] = r.URL.Path
+ lastFileName := r.FormValue("lastFileName")
+ if lastFileName == "" {
+ m["Subdirectories"] = dirlist
+ }
+ limit, limit_err := strconv.Atoi(r.FormValue("limit"))
+ if limit_err != nil {
+ limit = 100
+ }
+ m["Files"], _ = fs.filer.ListFiles(r.URL.Path, lastFileName, limit)
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
+
+func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, isGetMethod bool) {
+ if strings.HasSuffix(r.URL.Path, "/") {
+ if fs.disableDirListing {
+ w.WriteHeader(http.StatusMethodNotAllowed)
+ return
+ }
+ fs.listDirectoryHandler(w, r)
+ return
+ }
+
+ fileId, err := fs.filer.FindFile(r.URL.Path)
+ if err == leveldb.ErrNotFound {
+ glog.V(3).Infoln("Not found in db", r.URL.Path)
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+
+ urlLocation, err := operation.LookupFileId(fs.master, fileId)
+ if err != nil {
+ glog.V(1).Infoln("operation LookupFileId %s failed, err is %s", fileId, err.Error())
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ urlString := urlLocation
+ if fs.redirectOnRead {
+ http.Redirect(w, r, urlString, http.StatusFound)
+ return
+ }
+ u, _ := url.Parse(urlString)
+ request := &http.Request{
+ Method: r.Method,
+ URL: u,
+ Proto: r.Proto,
+ ProtoMajor: r.ProtoMajor,
+ ProtoMinor: r.ProtoMinor,
+ Header: r.Header,
+ Body: r.Body,
+ Host: r.Host,
+ ContentLength: r.ContentLength,
+ }
+ glog.V(3).Infoln("retrieving from", u)
+ resp, do_err := util.Do(request)
+ if do_err != nil {
+ glog.V(0).Infoln("failing to connect to volume server", do_err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, do_err)
+ return
+ }
+ defer resp.Body.Close()
+ for k, v := range resp.Header {
+ w.Header()[k] = v
+ }
+ w.WriteHeader(resp.StatusCode)
+ io.Copy(w, resp.Body)
+}
+
+type analogueReader struct {
+ *bytes.Buffer
+}
+
+// So that it implements the io.ReadCloser interface
+func (m analogueReader) Close() error { return nil }
+
+func (fs *FilerServer) PostHandler(w http.ResponseWriter, r *http.Request) {
+ query := r.URL.Query()
+ replication := query.Get("replication")
+ if replication == "" {
+ replication = fs.defaultReplication
+ }
+ collection := query.Get("collection")
+ if collection == "" {
+ collection = fs.collection
+ }
+
+ var fileId string
+ var err error
+ var urlLocation string
+ if r.Method == "PUT" {
+ buf, _ := ioutil.ReadAll(r.Body)
+ r.Body = analogueReader{bytes.NewBuffer(buf)}
+ fileName, _, _, _, _, _, _, pe := storage.ParseUpload(r)
+ if pe != nil {
+ glog.V(0).Infoln("failing to parse post body", pe.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, pe)
+ return
+ }
+ //reconstruct http request body for following new request to volume server
+ r.Body = analogueReader{bytes.NewBuffer(buf)}
+
+ path := r.URL.Path
+ if strings.HasSuffix(path, "/") {
+ if fileName != "" {
+ path += fileName
+ }
+ }
+
+ if fileId, err = fs.filer.FindFile(path); err != nil && err != leveldb.ErrNotFound {
+ glog.V(0).Infoln("failing to find path in filer store", path, err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ } else if fileId != "" && err == nil {
+ var le error
+ urlLocation, le = operation.LookupFileId(fs.master, fileId)
+ if le != nil {
+ glog.V(1).Infoln("operation LookupFileId %s failed, err is %s", fileId, le.Error())
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ }
+ } else {
+ assignResult, ae := operation.Assign(fs.master, 1, replication, collection, query.Get("ttl"))
+ if ae != nil {
+ glog.V(0).Infoln("failing to assign a file id", ae.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, ae)
+ return
+ }
+ fileId = assignResult.Fid
+ urlLocation = "http://" + assignResult.Url + "/" + assignResult.Fid
+ }
+
+ u, _ := url.Parse(urlLocation)
+ glog.V(4).Infoln("post to", u)
+ request := &http.Request{
+ Method: r.Method,
+ URL: u,
+ Proto: r.Proto,
+ ProtoMajor: r.ProtoMajor,
+ ProtoMinor: r.ProtoMinor,
+ Header: r.Header,
+ Body: r.Body,
+ Host: r.Host,
+ ContentLength: r.ContentLength,
+ }
+ resp, do_err := util.Do(request)
+ if do_err != nil {
+ glog.V(0).Infoln("failing to connect to volume server", r.RequestURI, do_err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, do_err)
+ return
+ }
+ defer resp.Body.Close()
+ resp_body, ra_err := ioutil.ReadAll(resp.Body)
+ if ra_err != nil {
+ glog.V(0).Infoln("failing to upload to volume server", r.RequestURI, ra_err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, ra_err)
+ return
+ }
+ glog.V(4).Infoln("post result", string(resp_body))
+ var ret operation.UploadResult
+ unmarshal_err := json.Unmarshal(resp_body, &ret)
+ if unmarshal_err != nil {
+ glog.V(0).Infoln("failing to read upload resonse", r.RequestURI, string(resp_body))
+ writeJsonError(w, r, http.StatusInternalServerError, unmarshal_err)
+ return
+ }
+ if ret.Error != "" {
+ glog.V(0).Infoln("failing to post to volume server", r.RequestURI, ret.Error)
+ writeJsonError(w, r, http.StatusInternalServerError, errors.New(ret.Error))
+ return
+ }
+ path := r.URL.Path
+ if strings.HasSuffix(path, "/") {
+ if ret.Name != "" {
+ path += ret.Name
+ } else {
+ operation.DeleteFile(fs.master, fileId, fs.jwt(fileId)) //clean up
+ glog.V(0).Infoln("Can not to write to folder", path, "without a file name!")
+ writeJsonError(w, r, http.StatusInternalServerError,
+ errors.New("Can not to write to folder "+path+" without a file name"))
+ return
+ }
+ }
+ glog.V(4).Infoln("saving", path, "=>", fileId)
+ if db_err := fs.filer.CreateFile(path, fileId); db_err != nil {
+ operation.DeleteFile(fs.master, fileId, fs.jwt(fileId)) //clean up
+ glog.V(0).Infof("failing to write %s to filer server : %v", path, db_err)
+ writeJsonError(w, r, http.StatusInternalServerError, db_err)
+ return
+ }
+ w.WriteHeader(http.StatusCreated)
+ w.Write(resp_body)
+}
+
+// curl -X DELETE http://localhost:8888/path/to
+// curl -X DELETE http://localhost:8888/path/to?recursive=true
+func (fs *FilerServer) DeleteHandler(w http.ResponseWriter, r *http.Request) {
+ var err error
+ var fid string
+ if strings.HasSuffix(r.URL.Path, "/") {
+ isRecursive := r.FormValue("recursive") == "true"
+ err = fs.filer.DeleteDirectory(r.URL.Path, isRecursive)
+ } else {
+ fid, err = fs.filer.DeleteFile(r.URL.Path)
+ if err == nil && fid != "" {
+ err = operation.DeleteFile(fs.master, fid, fs.jwt(fid))
+ }
+ }
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""})
+ } else {
+ glog.V(4).Infoln("deleting", r.URL.Path, ":", err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ }
+}
diff --git a/weed/server/filer_server_handlers_admin.go b/weed/server/filer_server_handlers_admin.go
new file mode 100644
index 000000000..979ad517b
--- /dev/null
+++ b/weed/server/filer_server_handlers_admin.go
@@ -0,0 +1,29 @@
+package weed_server
+
+import (
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+/*
+Move a folder or a file, with 4 Use cases:
+ mv fromDir toNewDir
+ mv fromDir toOldDir
+ mv fromFile toDir
+ mv fromFile toFile
+
+Wildcard is not supported.
+
+*/
+func (fs *FilerServer) moveHandler(w http.ResponseWriter, r *http.Request) {
+ from := r.FormValue("from")
+ to := r.FormValue("to")
+ err := fs.filer.Move(from, to)
+ if err != nil {
+ glog.V(4).Infoln("moving", from, "->", to, err.Error())
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ } else {
+ w.WriteHeader(http.StatusOK)
+ }
+}
diff --git a/weed/server/master_server.go b/weed/server/master_server.go
new file mode 100644
index 000000000..61bda6988
--- /dev/null
+++ b/weed/server/master_server.go
@@ -0,0 +1,131 @@
+package weed_server
+
+import (
+ "fmt"
+ "net/http"
+ "net/http/httputil"
+ "net/url"
+ "sync"
+
+ "github.com/chrislusf/raft"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/sequence"
+ "github.com/chrislusf/seaweedfs/weed/topology"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/gorilla/mux"
+)
+
+type MasterServer struct {
+ port int
+ metaFolder string
+ volumeSizeLimitMB uint
+ pulseSeconds int
+ defaultReplicaPlacement string
+ garbageThreshold string
+ guard *security.Guard
+
+ Topo *topology.Topology
+ vg *topology.VolumeGrowth
+ vgLock sync.Mutex
+
+ bounedLeaderChan chan int
+}
+
+func NewMasterServer(r *mux.Router, port int, metaFolder string,
+ volumeSizeLimitMB uint,
+ pulseSeconds int,
+ confFile string,
+ defaultReplicaPlacement string,
+ garbageThreshold string,
+ whiteList []string,
+ secureKey string,
+) *MasterServer {
+ ms := &MasterServer{
+ port: port,
+ volumeSizeLimitMB: volumeSizeLimitMB,
+ pulseSeconds: pulseSeconds,
+ defaultReplicaPlacement: defaultReplicaPlacement,
+ garbageThreshold: garbageThreshold,
+ }
+ ms.bounedLeaderChan = make(chan int, 16)
+ seq := sequence.NewMemorySequencer()
+ var e error
+ if ms.Topo, e = topology.NewTopology("topo", confFile, seq,
+ uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds); e != nil {
+ glog.Fatalf("cannot create topology:%s", e)
+ }
+ ms.vg = topology.NewDefaultVolumeGrowth()
+ glog.V(0).Infoln("Volume Size Limit is", volumeSizeLimitMB, "MB")
+
+ ms.guard = security.NewGuard(whiteList, secureKey)
+
+ r.HandleFunc("/", ms.uiStatusHandler)
+ r.HandleFunc("/ui/index.html", ms.uiStatusHandler)
+ r.HandleFunc("/dir/assign", ms.proxyToLeader(ms.guard.WhiteList(ms.dirAssignHandler)))
+ r.HandleFunc("/dir/lookup", ms.proxyToLeader(ms.guard.WhiteList(ms.dirLookupHandler)))
+ r.HandleFunc("/dir/join", ms.proxyToLeader(ms.guard.WhiteList(ms.dirJoinHandler)))
+ r.HandleFunc("/dir/status", ms.proxyToLeader(ms.guard.WhiteList(ms.dirStatusHandler)))
+ r.HandleFunc("/col/delete", ms.proxyToLeader(ms.guard.WhiteList(ms.collectionDeleteHandler)))
+ r.HandleFunc("/vol/lookup", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeLookupHandler)))
+ r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler)))
+ r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler)))
+ r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler)))
+ r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler))
+ r.HandleFunc("/delete", ms.guard.WhiteList(ms.deleteFromMasterServerHandler))
+ r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler))
+ r.HandleFunc("/stats/counter", ms.guard.WhiteList(statsCounterHandler))
+ r.HandleFunc("/stats/memory", ms.guard.WhiteList(statsMemoryHandler))
+
+ ms.Topo.StartRefreshWritableVolumes(garbageThreshold)
+
+ return ms
+}
+
+func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) {
+ ms.Topo.RaftServer = raftServer.raftServer
+ ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) {
+ if ms.Topo.RaftServer.Leader() != "" {
+ glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.")
+ }
+ })
+ if ms.Topo.IsLeader() {
+ glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", "I am the leader!")
+ } else {
+ if ms.Topo.RaftServer.Leader() != "" {
+ glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "is the leader.")
+ }
+ }
+}
+
+func (ms *MasterServer) proxyToLeader(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) {
+ return func(w http.ResponseWriter, r *http.Request) {
+ if ms.Topo.IsLeader() {
+ f(w, r)
+ } else if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" {
+ ms.bounedLeaderChan <- 1
+ defer func() { <-ms.bounedLeaderChan }()
+ targetUrl, err := url.Parse("http://" + ms.Topo.RaftServer.Leader())
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError,
+ fmt.Errorf("Leader URL http://%s Parse Error: %v", ms.Topo.RaftServer.Leader(), err))
+ return
+ }
+ glog.V(4).Infoln("proxying to leader", ms.Topo.RaftServer.Leader())
+ proxy := httputil.NewSingleHostReverseProxy(targetUrl)
+ director := proxy.Director
+ proxy.Director = func(req *http.Request) {
+ actualHost, err := security.GetActualRemoteHost(req)
+ if err == nil {
+ req.Header.Set("HTTP_X_FORWARDED_FOR", actualHost)
+ }
+ director(req)
+ }
+ proxy.Transport = util.Transport
+ proxy.ServeHTTP(w, r)
+ } else {
+ //drop it to the floor
+ //writeJsonError(w, r, errors.New(ms.Topo.RaftServer.Name()+" does not know Leader yet:"+ms.Topo.RaftServer.Leader()))
+ }
+ }
+}
diff --git a/weed/server/master_server_handlers.go b/weed/server/master_server_handlers.go
new file mode 100644
index 000000000..e811631f8
--- /dev/null
+++ b/weed/server/master_server_handlers.go
@@ -0,0 +1,104 @@
+package weed_server
+
+import (
+ "fmt"
+ "net/http"
+ "strconv"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/stats"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volumeLocations map[string]operation.LookupResult) {
+ volumeLocations = make(map[string]operation.LookupResult)
+ for _, vid := range vids {
+ commaSep := strings.Index(vid, ",")
+ if commaSep > 0 {
+ vid = vid[0:commaSep]
+ }
+ if _, ok := volumeLocations[vid]; ok {
+ continue
+ }
+ volumeId, err := storage.NewVolumeId(vid)
+ if err == nil {
+ machines := ms.Topo.Lookup(collection, volumeId)
+ if machines != nil {
+ var ret []operation.Location
+ for _, dn := range machines {
+ ret = append(ret, operation.Location{Url: dn.Url(), PublicUrl: dn.PublicUrl})
+ }
+ volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Locations: ret}
+ } else {
+ volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: "volumeId not found."}
+ }
+ } else {
+ volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: "Unknown volumeId format."}
+ }
+ }
+ return
+}
+
+// Takes one volumeId only, can not do batch lookup
+func (ms *MasterServer) dirLookupHandler(w http.ResponseWriter, r *http.Request) {
+ vid := r.FormValue("volumeId")
+ commaSep := strings.Index(vid, ",")
+ if commaSep > 0 {
+ vid = vid[0:commaSep]
+ }
+ vids := []string{vid}
+ collection := r.FormValue("collection") //optional, but can be faster if too many collections
+ volumeLocations := ms.lookupVolumeId(vids, collection)
+ location := volumeLocations[vid]
+ httpStatus := http.StatusOK
+ if location.Error != "" {
+ httpStatus = http.StatusNotFound
+ }
+ writeJsonQuiet(w, r, httpStatus, location)
+}
+
+// This can take batched volumeIds, &volumeId=x&volumeId=y&volumeId=z
+func (ms *MasterServer) volumeLookupHandler(w http.ResponseWriter, r *http.Request) {
+ r.ParseForm()
+ vids := r.Form["volumeId"]
+ collection := r.FormValue("collection") //optional, but can be faster if too many collections
+ volumeLocations := ms.lookupVolumeId(vids, collection)
+ writeJsonQuiet(w, r, http.StatusOK, volumeLocations)
+}
+
+func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) {
+ stats.AssignRequest()
+ requestedCount, e := strconv.ParseUint(r.FormValue("count"), 10, 64)
+ if e != nil || requestedCount == 0 {
+ requestedCount = 1
+ }
+
+ option, err := ms.getVolumeGrowOption(r)
+ if err != nil {
+ writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()})
+ return
+ }
+
+ if !ms.Topo.HasWritableVolume(option) {
+ if ms.Topo.FreeSpace() <= 0 {
+ writeJsonQuiet(w, r, http.StatusNotFound, operation.AssignResult{Error: "No free volumes left!"})
+ return
+ }
+ ms.vgLock.Lock()
+ defer ms.vgLock.Unlock()
+ if !ms.Topo.HasWritableVolume(option) {
+ if _, err = ms.vg.AutomaticGrowByType(option, ms.Topo); err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError,
+ fmt.Errorf("Cannot grow volume group! %v", err))
+ return
+ }
+ }
+ }
+ fid, count, dn, err := ms.Topo.PickForWrite(requestedCount, option)
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusOK, operation.AssignResult{Fid: fid, Url: dn.Url(), PublicUrl: dn.PublicUrl, Count: count})
+ } else {
+ writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()})
+ }
+}
diff --git a/weed/server/master_server_handlers_admin.go b/weed/server/master_server_handlers_admin.go
new file mode 100644
index 000000000..a762bf416
--- /dev/null
+++ b/weed/server/master_server_handlers_admin.go
@@ -0,0 +1,193 @@
+package weed_server
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "math/rand"
+ "net/http"
+ "strconv"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/topology"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/golang/protobuf/proto"
+)
+
+func (ms *MasterServer) collectionDeleteHandler(w http.ResponseWriter, r *http.Request) {
+ collection, ok := ms.Topo.FindCollection(r.FormValue("collection"))
+ if !ok {
+ writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("collection %s does not exist", r.FormValue("collection")))
+ return
+ }
+ for _, server := range collection.ListVolumeServers() {
+ _, err := util.Get("http://" + server.Ip + ":" + strconv.Itoa(server.Port) + "/admin/delete_collection?collection=" + r.FormValue("collection"))
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ }
+ }
+ ms.Topo.DeleteCollection(r.FormValue("collection"))
+}
+
+func (ms *MasterServer) dirJoinHandler(w http.ResponseWriter, r *http.Request) {
+ body, err := ioutil.ReadAll(r.Body)
+ if err != nil {
+ writeJsonError(w, r, http.StatusBadRequest, err)
+ return
+ }
+ joinMessage := &operation.JoinMessage{}
+ if err = proto.Unmarshal(body, joinMessage); err != nil {
+ writeJsonError(w, r, http.StatusBadRequest, err)
+ return
+ }
+ if *joinMessage.Ip == "" {
+ *joinMessage.Ip = r.RemoteAddr[0:strings.LastIndex(r.RemoteAddr, ":")]
+ }
+ if glog.V(4) {
+ if jsonData, jsonError := json.Marshal(joinMessage); jsonError != nil {
+ glog.V(0).Infoln("json marshaling error: ", jsonError)
+ writeJsonError(w, r, http.StatusBadRequest, jsonError)
+ return
+ } else {
+ glog.V(4).Infoln("Proto size", len(body), "json size", len(jsonData), string(jsonData))
+ }
+ }
+
+ ms.Topo.ProcessJoinMessage(joinMessage)
+ writeJsonQuiet(w, r, http.StatusOK, operation.JoinResult{
+ VolumeSizeLimit: uint64(ms.volumeSizeLimitMB) * 1024 * 1024,
+ SecretKey: string(ms.guard.SecretKey),
+ })
+}
+
+func (ms *MasterServer) dirStatusHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ m["Topology"] = ms.Topo.ToMap()
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
+
+func (ms *MasterServer) volumeVacuumHandler(w http.ResponseWriter, r *http.Request) {
+ gcThreshold := r.FormValue("garbageThreshold")
+ if gcThreshold == "" {
+ gcThreshold = ms.garbageThreshold
+ }
+ glog.Infoln("garbageThreshold =", gcThreshold)
+ ms.Topo.Vacuum(gcThreshold)
+ ms.dirStatusHandler(w, r)
+}
+
+func (ms *MasterServer) volumeGrowHandler(w http.ResponseWriter, r *http.Request) {
+ count := 0
+ option, err := ms.getVolumeGrowOption(r)
+ if err != nil {
+ writeJsonError(w, r, http.StatusNotAcceptable, err)
+ return
+ }
+ if err == nil {
+ if count, err = strconv.Atoi(r.FormValue("count")); err == nil {
+ if ms.Topo.FreeSpace() < count*option.ReplicaPlacement.GetCopyCount() {
+ err = errors.New("Only " + strconv.Itoa(ms.Topo.FreeSpace()) + " volumes left! Not enough for " + strconv.Itoa(count*option.ReplicaPlacement.GetCopyCount()))
+ } else {
+ count, err = ms.vg.GrowByCountAndType(count, option, ms.Topo)
+ }
+ } else {
+ err = errors.New("parameter count is not found")
+ }
+ }
+ if err != nil {
+ writeJsonError(w, r, http.StatusNotAcceptable, err)
+ } else {
+ writeJsonQuiet(w, r, http.StatusOK, map[string]interface{}{"count": count})
+ }
+}
+
+func (ms *MasterServer) volumeStatusHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ m["Volumes"] = ms.Topo.ToVolumeMap()
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
+
+func (ms *MasterServer) redirectHandler(w http.ResponseWriter, r *http.Request) {
+ vid, _, _, _, _ := parseURLPath(r.URL.Path)
+ volumeId, err := storage.NewVolumeId(vid)
+ if err != nil {
+ debug("parsing error:", err, r.URL.Path)
+ return
+ }
+ collection := r.FormValue("collection")
+ machines := ms.Topo.Lookup(collection, volumeId)
+ if machines != nil && len(machines) > 0 {
+ var url string
+ if r.URL.RawQuery != "" {
+ url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + "?" + r.URL.RawQuery
+ } else {
+ url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path
+ }
+ http.Redirect(w, r, url, http.StatusMovedPermanently)
+ } else {
+ writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %d or collection %s not found", volumeId, collection))
+ }
+}
+
+func (ms *MasterServer) selfUrl(r *http.Request) string {
+ if r.Host != "" {
+ return r.Host
+ }
+ return "localhost:" + strconv.Itoa(ms.port)
+}
+func (ms *MasterServer) submitFromMasterServerHandler(w http.ResponseWriter, r *http.Request) {
+ if ms.Topo.IsLeader() {
+ submitForClientHandler(w, r, ms.selfUrl(r))
+ } else {
+ masterUrl, err := ms.Topo.Leader()
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ } else {
+ submitForClientHandler(w, r, masterUrl)
+ }
+ }
+}
+
+func (ms *MasterServer) deleteFromMasterServerHandler(w http.ResponseWriter, r *http.Request) {
+ if ms.Topo.IsLeader() {
+ deleteForClientHandler(w, r, ms.selfUrl(r))
+ } else {
+ deleteForClientHandler(w, r, ms.Topo.RaftServer.Leader())
+ }
+}
+
+func (ms *MasterServer) HasWritableVolume(option *topology.VolumeGrowOption) bool {
+ vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl)
+ return vl.GetActiveVolumeCount(option) > 0
+}
+
+func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGrowOption, error) {
+ replicationString := r.FormValue("replication")
+ if replicationString == "" {
+ replicationString = ms.defaultReplicaPlacement
+ }
+ replicaPlacement, err := storage.NewReplicaPlacementFromString(replicationString)
+ if err != nil {
+ return nil, err
+ }
+ ttl, err := storage.ReadTTL(r.FormValue("ttl"))
+ if err != nil {
+ return nil, err
+ }
+ volumeGrowOption := &topology.VolumeGrowOption{
+ Collection: r.FormValue("collection"),
+ ReplicaPlacement: replicaPlacement,
+ Ttl: ttl,
+ DataCenter: r.FormValue("dataCenter"),
+ Rack: r.FormValue("rack"),
+ DataNode: r.FormValue("dataNode"),
+ }
+ return volumeGrowOption, nil
+}
diff --git a/weed/server/master_server_handlers_ui.go b/weed/server/master_server_handlers_ui.go
new file mode 100644
index 000000000..9ad234877
--- /dev/null
+++ b/weed/server/master_server_handlers_ui.go
@@ -0,0 +1,30 @@
+package weed_server
+
+import (
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/stats"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ ui "github.com/chrislusf/seaweedfs/weed/server/master_ui"
+)
+
+func (ms *MasterServer) uiStatusHandler(w http.ResponseWriter, r *http.Request) {
+ infos := make(map[string]interface{})
+ infos["Version"] = util.VERSION
+ args := struct {
+ Version string
+ Topology interface{}
+ Leader string
+ Peers interface{}
+ Stats map[string]interface{}
+ Counters *stats.ServerStats
+ }{
+ util.VERSION,
+ ms.Topo.ToMap(),
+ ms.Topo.RaftServer.Leader(),
+ ms.Topo.RaftServer.Peers(),
+ infos,
+ serverStats,
+ }
+ ui.StatusTpl.Execute(w, args)
+}
diff --git a/weed/server/master_ui/templates.go b/weed/server/master_ui/templates.go
new file mode 100644
index 000000000..e9ee2d8d2
--- /dev/null
+++ b/weed/server/master_ui/templates.go
@@ -0,0 +1,102 @@
+package master_ui
+
+import (
+ "html/template"
+)
+
+var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html>
+<html>
+ <head>
+ <title>SeaweedFS {{ .Version }}</title>
+ <link rel="icon" href="http://7viirv.com1.z0.glb.clouddn.com/seaweed_favicon.png" sizes="32x32" />
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css">
+ </head>
+ <body>
+ <div class="container">
+ <div class="page-header">
+ <h1>
+ <img src="http://7viirv.com1.z0.glb.clouddn.com/seaweed50x50.png"></img>
+ SeaweedFS <small>{{ .Version }}</small>
+ </h1>
+ </div>
+
+ <div class="row">
+ <div class="col-sm-6">
+ <h2>Cluster status</h2>
+ <table class="table">
+ <tbody>
+ <tr>
+ <th>Free</th>
+ <td>{{ .Topology.Free }}</td>
+ </tr>
+ <tr>
+ <th>Max</th>
+ <td>{{ .Topology.Max }}</td>
+ </tr>
+ <tr>
+ <th>Leader</th>
+ <td><a href="http://{{ .Leader }}">{{ .Leader }}</a></td>
+ </tr>
+ <tr>
+ <td class="col-sm-2 field-label"><label>Peers:</label></td>
+ <td class="col-sm-10"><ul class="list-unstyled">
+ {{ range $k, $p := .Peers }}
+ <li><a href="{{ $p.ConnectionString }}">{{ $p.Name }}</a></li>
+ {{ end }}
+ </ul></td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+
+ <div class="col-sm-6">
+ <h2>System Stats</h2>
+ <table class="table table-condensed table-striped">
+ <tr>
+ <th>Concurrent Connections</th>
+ <td>{{ .Counters.Connections.WeekCounter.Sum }}</td>
+ </tr>
+ {{ range $key, $val := .Stats }}
+ <tr>
+ <th>{{ $key }}</th>
+ <td>{{ $val }}</td>
+ </tr>
+ {{ end }}
+ </table>
+ </div>
+ </div>
+
+ <div class="row">
+ <h2>Topology</h2>
+ <table class="table table-striped">
+ <thead>
+ <tr>
+ <th>Data Center</th>
+ <th>Rack</th>
+ <th>RemoteAddr</th>
+ <th>#Volumes</th>
+ <th>Max</th>
+ </tr>
+ </thead>
+ <tbody>
+ {{ range $dc_index, $dc := .Topology.DataCenters }}
+ {{ range $rack_index, $rack := $dc.Racks }}
+ {{ range $dn_index, $dn := $rack.DataNodes }}
+ <tr>
+ <td><code>{{ $dc.Id }}</code></td>
+ <td>{{ $rack.Id }}</td>
+ <td><a href="http://{{ $dn.Url }}/ui/index.html">{{ $dn.Url }}</a></td>
+ <td>{{ $dn.Volumes }}</td>
+ <td>{{ $dn.Max }}</td>
+ </tr>
+ {{ end }}
+ {{ end }}
+ {{ end }}
+ </tbody>
+ </table>
+ </div>
+
+ </div>
+ </body>
+</html>
+`))
diff --git a/weed/server/raft_server.go b/weed/server/raft_server.go
new file mode 100644
index 000000000..a35659818
--- /dev/null
+++ b/weed/server/raft_server.go
@@ -0,0 +1,217 @@
+package weed_server
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "math/rand"
+ "net/http"
+ "net/url"
+ "os"
+ "path"
+ "reflect"
+ "sort"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/raft"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/topology"
+ "github.com/gorilla/mux"
+)
+
+type RaftServer struct {
+ peers []string // initial peers to join with
+ raftServer raft.Server
+ dataDir string
+ httpAddr string
+ router *mux.Router
+ topo *topology.Topology
+}
+
+func NewRaftServer(r *mux.Router, peers []string, httpAddr string, dataDir string, topo *topology.Topology, pulseSeconds int) *RaftServer {
+ s := &RaftServer{
+ peers: peers,
+ httpAddr: httpAddr,
+ dataDir: dataDir,
+ router: r,
+ topo: topo,
+ }
+
+ if glog.V(4) {
+ raft.SetLogLevel(2)
+ }
+
+ raft.RegisterCommand(&topology.MaxVolumeIdCommand{})
+
+ var err error
+ transporter := raft.NewHTTPTransporter("/cluster", 0)
+ transporter.Transport.MaxIdleConnsPerHost = 1024
+ glog.V(1).Infof("Starting RaftServer with IP:%v:", httpAddr)
+
+ // Clear old cluster configurations if peers are changed
+ if oldPeers, changed := isPeersChanged(s.dataDir, httpAddr, s.peers); changed {
+ glog.V(0).Infof("Peers Change: %v => %v", oldPeers, s.peers)
+ os.RemoveAll(path.Join(s.dataDir, "conf"))
+ os.RemoveAll(path.Join(s.dataDir, "log"))
+ os.RemoveAll(path.Join(s.dataDir, "snapshot"))
+ }
+
+ s.raftServer, err = raft.NewServer(s.httpAddr, s.dataDir, transporter, nil, topo, "")
+ if err != nil {
+ glog.V(0).Infoln(err)
+ return nil
+ }
+ transporter.Install(s.raftServer, s)
+ s.raftServer.SetHeartbeatInterval(1 * time.Second)
+ s.raftServer.SetElectionTimeout(time.Duration(pulseSeconds) * 3450 * time.Millisecond)
+ s.raftServer.Start()
+
+ s.router.HandleFunc("/cluster/join", s.joinHandler).Methods("POST")
+ s.router.HandleFunc("/cluster/status", s.statusHandler).Methods("GET")
+
+ if len(s.peers) > 0 {
+ // Join to leader if specified.
+ for {
+ glog.V(0).Infoln("Joining cluster:", strings.Join(s.peers, ","))
+ time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
+ firstJoinError := s.Join(s.peers)
+ if firstJoinError != nil {
+ glog.V(0).Infoln("No existing server found. Starting as leader in the new cluster.")
+ _, err := s.raftServer.Do(&raft.DefaultJoinCommand{
+ Name: s.raftServer.Name(),
+ ConnectionString: "http://" + s.httpAddr,
+ })
+ if err != nil {
+ glog.V(0).Infoln(err)
+ } else {
+ break
+ }
+ } else {
+ break
+ }
+ }
+ } else if s.raftServer.IsLogEmpty() {
+ // Initialize the server by joining itself.
+ glog.V(0).Infoln("Initializing new cluster")
+
+ _, err := s.raftServer.Do(&raft.DefaultJoinCommand{
+ Name: s.raftServer.Name(),
+ ConnectionString: "http://" + s.httpAddr,
+ })
+
+ if err != nil {
+ glog.V(0).Infoln(err)
+ return nil
+ }
+
+ } else {
+ glog.V(0).Infoln("Old conf,log,snapshot should have been removed.")
+ }
+
+ return s
+}
+
+func (s *RaftServer) Peers() (members []string) {
+ peers := s.raftServer.Peers()
+
+ for _, p := range peers {
+ members = append(members, strings.TrimPrefix(p.ConnectionString, "http://"))
+ }
+
+ return
+}
+
+func isPeersChanged(dir string, self string, peers []string) (oldPeers []string, changed bool) {
+ confPath := path.Join(dir, "conf")
+ // open conf file
+ b, err := ioutil.ReadFile(confPath)
+ if err != nil {
+ return oldPeers, true
+ }
+ conf := &raft.Config{}
+ if err = json.Unmarshal(b, conf); err != nil {
+ return oldPeers, true
+ }
+
+ for _, p := range conf.Peers {
+ oldPeers = append(oldPeers, strings.TrimPrefix(p.ConnectionString, "http://"))
+ }
+ oldPeers = append(oldPeers, self)
+
+ sort.Strings(peers)
+ sort.Strings(oldPeers)
+
+ return oldPeers, reflect.DeepEqual(peers, oldPeers)
+
+}
+
+// Join joins an existing cluster.
+func (s *RaftServer) Join(peers []string) error {
+ command := &raft.DefaultJoinCommand{
+ Name: s.raftServer.Name(),
+ ConnectionString: "http://" + s.httpAddr,
+ }
+
+ var err error
+ var b bytes.Buffer
+ json.NewEncoder(&b).Encode(command)
+ for _, m := range peers {
+ if m == s.httpAddr {
+ continue
+ }
+ target := fmt.Sprintf("http://%s/cluster/join", strings.TrimSpace(m))
+ glog.V(0).Infoln("Attempting to connect to:", target)
+
+ err = postFollowingOneRedirect(target, "application/json", &b)
+
+ if err != nil {
+ glog.V(0).Infoln("Post returned error: ", err.Error())
+ if _, ok := err.(*url.Error); ok {
+ // If we receive a network error try the next member
+ continue
+ }
+ } else {
+ return nil
+ }
+ }
+
+ return errors.New("Could not connect to any cluster peers")
+}
+
+// a workaround because http POST following redirection misses request body
+func postFollowingOneRedirect(target string, contentType string, b *bytes.Buffer) error {
+ backupReader := bytes.NewReader(b.Bytes())
+ resp, err := http.Post(target, contentType, b)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ reply, _ := ioutil.ReadAll(resp.Body)
+ statusCode := resp.StatusCode
+
+ if statusCode == http.StatusMovedPermanently {
+ var urlStr string
+ if urlStr = resp.Header.Get("Location"); urlStr == "" {
+ return fmt.Errorf("%d response missing Location header", resp.StatusCode)
+ }
+
+ glog.V(0).Infoln("Post redirected to ", urlStr)
+ resp2, err2 := http.Post(urlStr, contentType, backupReader)
+ if err2 != nil {
+ return err2
+ }
+ defer resp2.Body.Close()
+ reply, _ = ioutil.ReadAll(resp2.Body)
+ statusCode = resp2.StatusCode
+ }
+
+ glog.V(0).Infoln("Post returned status: ", statusCode, string(reply))
+ if statusCode != http.StatusOK {
+ return errors.New(string(reply))
+ }
+
+ return nil
+}
diff --git a/weed/server/raft_server_handlers.go b/weed/server/raft_server_handlers.go
new file mode 100644
index 000000000..335ba668f
--- /dev/null
+++ b/weed/server/raft_server_handlers.go
@@ -0,0 +1,64 @@
+package weed_server
+
+import (
+ "encoding/json"
+ "io/ioutil"
+ "net/http"
+ "strings"
+
+ "github.com/chrislusf/raft"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+)
+
+// Handles incoming RAFT joins.
+func (s *RaftServer) joinHandler(w http.ResponseWriter, req *http.Request) {
+ glog.V(0).Infoln("Processing incoming join. Current Leader", s.raftServer.Leader(), "Self", s.raftServer.Name(), "Peers", s.raftServer.Peers())
+ command := &raft.DefaultJoinCommand{}
+
+ commandText, _ := ioutil.ReadAll(req.Body)
+ glog.V(0).Info("Command:", string(commandText))
+ if err := json.NewDecoder(strings.NewReader(string(commandText))).Decode(&command); err != nil {
+ glog.V(0).Infoln("Error decoding json message:", err, string(commandText))
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ glog.V(0).Infoln("join command from Name", command.Name, "Connection", command.ConnectionString)
+
+ if _, err := s.raftServer.Do(command); err != nil {
+ switch err {
+ case raft.NotLeaderError:
+ s.redirectToLeader(w, req)
+ default:
+ glog.V(0).Infoln("Error processing join:", err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ }
+}
+
+func (s *RaftServer) HandleFunc(pattern string, handler func(http.ResponseWriter, *http.Request)) {
+ s.router.HandleFunc(pattern, handler)
+}
+
+func (s *RaftServer) redirectToLeader(w http.ResponseWriter, req *http.Request) {
+ if leader, e := s.topo.Leader(); e == nil {
+ //http.StatusMovedPermanently does not cause http POST following redirection
+ glog.V(0).Infoln("Redirecting to", http.StatusMovedPermanently, "http://"+leader+req.URL.Path)
+ http.Redirect(w, req, "http://"+leader+req.URL.Path, http.StatusMovedPermanently)
+ } else {
+ glog.V(0).Infoln("Error: Leader Unknown")
+ http.Error(w, "Leader unknown", http.StatusInternalServerError)
+ }
+}
+
+func (s *RaftServer) statusHandler(w http.ResponseWriter, r *http.Request) {
+ ret := operation.ClusterStatusResult{
+ IsLeader: s.topo.IsLeader(),
+ Peers: s.Peers(),
+ }
+ if leader, e := s.topo.Leader(); e == nil {
+ ret.Leader = leader
+ }
+ writeJsonQuiet(w, r, http.StatusOK, ret)
+}
diff --git a/weed/server/volume_server.go b/weed/server/volume_server.go
new file mode 100644
index 000000000..79a4276b1
--- /dev/null
+++ b/weed/server/volume_server.go
@@ -0,0 +1,125 @@
+package weed_server
+
+import (
+ "math/rand"
+ "net/http"
+ "sync"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+type VolumeServer struct {
+ masterNode string
+ mnLock sync.RWMutex
+ pulseSeconds int
+ dataCenter string
+ rack string
+ store *storage.Store
+ guard *security.Guard
+
+ needleMapKind storage.NeedleMapType
+ FixJpgOrientation bool
+ ReadRedirect bool
+}
+
+func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string,
+ port int, publicUrl string,
+ folders []string, maxCounts []int,
+ needleMapKind storage.NeedleMapType,
+ masterNode string, pulseSeconds int,
+ dataCenter string, rack string,
+ whiteList []string,
+ fixJpgOrientation bool,
+ readRedirect bool) *VolumeServer {
+ vs := &VolumeServer{
+ pulseSeconds: pulseSeconds,
+ dataCenter: dataCenter,
+ rack: rack,
+ needleMapKind: needleMapKind,
+ FixJpgOrientation: fixJpgOrientation,
+ ReadRedirect: readRedirect,
+ }
+ vs.SetMasterNode(masterNode)
+ vs.store = storage.NewStore(port, ip, publicUrl, folders, maxCounts, vs.needleMapKind)
+
+ vs.guard = security.NewGuard(whiteList, "")
+
+ adminMux.HandleFunc("/ui/index.html", vs.uiStatusHandler)
+ adminMux.HandleFunc("/status", vs.guard.WhiteList(vs.statusHandler))
+ adminMux.HandleFunc("/admin/assign_volume", vs.guard.WhiteList(vs.assignVolumeHandler))
+ adminMux.HandleFunc("/admin/vacuum/check", vs.guard.WhiteList(vs.vacuumVolumeCheckHandler))
+ adminMux.HandleFunc("/admin/vacuum/compact", vs.guard.WhiteList(vs.vacuumVolumeCompactHandler))
+ adminMux.HandleFunc("/admin/vacuum/commit", vs.guard.WhiteList(vs.vacuumVolumeCommitHandler))
+ adminMux.HandleFunc("/admin/delete_collection", vs.guard.WhiteList(vs.deleteCollectionHandler))
+ adminMux.HandleFunc("/admin/sync/status", vs.guard.WhiteList(vs.getVolumeSyncStatusHandler))
+ adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler))
+ adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler))
+ adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler))
+ adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler))
+ adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler))
+ adminMux.HandleFunc("/delete", vs.guard.WhiteList(vs.batchDeleteHandler))
+ adminMux.HandleFunc("/", vs.privateStoreHandler)
+ if publicMux != adminMux {
+ // separated admin and public port
+ publicMux.HandleFunc("/favicon.ico", vs.faviconHandler)
+ publicMux.HandleFunc("/", vs.publicReadOnlyHandler)
+ }
+
+ go func() {
+ connected := true
+
+ glog.V(0).Infof("Volume server bootstraps with master %s", vs.GetMasterNode())
+ vs.store.SetBootstrapMaster(vs.GetMasterNode())
+ vs.store.SetDataCenter(vs.dataCenter)
+ vs.store.SetRack(vs.rack)
+ for {
+ glog.V(4).Infof("Volume server sending to master %s", vs.GetMasterNode())
+ master, secretKey, err := vs.store.SendHeartbeatToMaster()
+ if err == nil {
+ if !connected {
+ connected = true
+ vs.SetMasterNode(master)
+ vs.guard.SecretKey = secretKey
+ glog.V(0).Infoln("Volume Server Connected with master at", master)
+ }
+ } else {
+ glog.V(1).Infof("Volume Server Failed to talk with master %s: %v", vs.masterNode, err)
+ if connected {
+ connected = false
+ }
+ }
+ if connected {
+ time.Sleep(time.Duration(float32(vs.pulseSeconds*1e3)*(1+rand.Float32())) * time.Millisecond)
+ } else {
+ time.Sleep(time.Duration(float32(vs.pulseSeconds*1e3)*0.25) * time.Millisecond)
+ }
+ }
+ }()
+
+ return vs
+}
+
+func (vs *VolumeServer) GetMasterNode() string {
+ vs.mnLock.RLock()
+ defer vs.mnLock.RUnlock()
+ return vs.masterNode
+}
+
+func (vs *VolumeServer) SetMasterNode(masterNode string) {
+ vs.mnLock.Lock()
+ defer vs.mnLock.Unlock()
+ vs.masterNode = masterNode
+}
+
+func (vs *VolumeServer) Shutdown() {
+ glog.V(0).Infoln("Shutting down volume server...")
+ vs.store.Close()
+ glog.V(0).Infoln("Shut down successfully!")
+}
+
+func (vs *VolumeServer) jwt(fileId string) security.EncodedJwt {
+ return security.GenJwt(vs.guard.SecretKey, fileId)
+}
diff --git a/weed/server/volume_server_handlers.go b/weed/server/volume_server_handlers.go
new file mode 100644
index 000000000..2d6fe7849
--- /dev/null
+++ b/weed/server/volume_server_handlers.go
@@ -0,0 +1,57 @@
+package weed_server
+
+import (
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/stats"
+)
+
+/*
+
+If volume server is started with a separated public port, the public port will
+be more "secure".
+
+Public port currently only supports reads.
+
+Later writes on public port can have one of the 3
+security settings:
+1. not secured
+2. secured by white list
+3. secured by JWT(Json Web Token)
+
+*/
+
+func (vs *VolumeServer) privateStoreHandler(w http.ResponseWriter, r *http.Request) {
+ switch r.Method {
+ case "GET":
+ stats.ReadRequest()
+ vs.GetOrHeadHandler(w, r)
+ case "HEAD":
+ stats.ReadRequest()
+ vs.GetOrHeadHandler(w, r)
+ case "DELETE":
+ stats.DeleteRequest()
+ vs.guard.WhiteList(vs.DeleteHandler)(w, r)
+ case "PUT":
+ stats.WriteRequest()
+ vs.guard.WhiteList(vs.PostHandler)(w, r)
+ case "POST":
+ stats.WriteRequest()
+ vs.guard.WhiteList(vs.PostHandler)(w, r)
+ }
+}
+
+func (vs *VolumeServer) publicReadOnlyHandler(w http.ResponseWriter, r *http.Request) {
+ switch r.Method {
+ case "GET":
+ stats.ReadRequest()
+ vs.GetOrHeadHandler(w, r)
+ case "HEAD":
+ stats.ReadRequest()
+ vs.GetOrHeadHandler(w, r)
+ }
+}
+
+func (vs *VolumeServer) faviconHandler(w http.ResponseWriter, r *http.Request) {
+ vs.FaviconHandler(w, r)
+}
diff --git a/weed/server/volume_server_handlers_admin.go b/weed/server/volume_server_handlers_admin.go
new file mode 100644
index 000000000..ae9817ef6
--- /dev/null
+++ b/weed/server/volume_server_handlers_admin.go
@@ -0,0 +1,50 @@
+package weed_server
+
+import (
+ "net/http"
+ "path/filepath"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/stats"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ m["Volumes"] = vs.store.Status()
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
+
+func (vs *VolumeServer) assignVolumeHandler(w http.ResponseWriter, r *http.Request) {
+ err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), vs.needleMapKind, r.FormValue("replication"), r.FormValue("ttl"))
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""})
+ } else {
+ writeJsonError(w, r, http.StatusNotAcceptable, err)
+ }
+ glog.V(2).Infoln("assign volume =", r.FormValue("volume"), ", collection =", r.FormValue("collection"), ", replication =", r.FormValue("replication"), ", error =", err)
+}
+
+func (vs *VolumeServer) deleteCollectionHandler(w http.ResponseWriter, r *http.Request) {
+ err := vs.store.DeleteCollection(r.FormValue("collection"))
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""})
+ } else {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ }
+ glog.V(2).Infoln("deleting collection =", r.FormValue("collection"), ", error =", err)
+}
+
+func (vs *VolumeServer) statsDiskHandler(w http.ResponseWriter, r *http.Request) {
+ m := make(map[string]interface{})
+ m["Version"] = util.VERSION
+ var ds []*stats.DiskStatus
+ for _, loc := range vs.store.Locations {
+ if dir, e := filepath.Abs(loc.Directory); e == nil {
+ ds = append(ds, stats.NewDiskStatus(dir))
+ }
+ }
+ m["DiskStatuses"] = ds
+ writeJsonQuiet(w, r, http.StatusOK, m)
+}
diff --git a/weed/server/volume_server_handlers_helper.go b/weed/server/volume_server_handlers_helper.go
new file mode 100644
index 000000000..2bab35e45
--- /dev/null
+++ b/weed/server/volume_server_handlers_helper.go
@@ -0,0 +1,115 @@
+package weed_server
+
+import (
+ "errors"
+ "fmt"
+ "mime/multipart"
+ "net/textproto"
+ "strconv"
+ "strings"
+)
+
+// copied from src/pkg/net/http/fs.go
+
+// httpRange specifies the byte range to be sent to the client.
+type httpRange struct {
+ start, length int64
+}
+
+func (r httpRange) contentRange(size int64) string {
+ return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size)
+}
+
+func (r httpRange) mimeHeader(contentType string, size int64) textproto.MIMEHeader {
+ return textproto.MIMEHeader{
+ "Content-Range": {r.contentRange(size)},
+ "Content-Type": {contentType},
+ }
+}
+
+// parseRange parses a Range header string as per RFC 2616.
+func parseRange(s string, size int64) ([]httpRange, error) {
+ if s == "" {
+ return nil, nil // header not present
+ }
+ const b = "bytes="
+ if !strings.HasPrefix(s, b) {
+ return nil, errors.New("invalid range")
+ }
+ var ranges []httpRange
+ for _, ra := range strings.Split(s[len(b):], ",") {
+ ra = strings.TrimSpace(ra)
+ if ra == "" {
+ continue
+ }
+ i := strings.Index(ra, "-")
+ if i < 0 {
+ return nil, errors.New("invalid range")
+ }
+ start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
+ var r httpRange
+ if start == "" {
+ // If no start is specified, end specifies the
+ // range start relative to the end of the file.
+ i, err := strconv.ParseInt(end, 10, 64)
+ if err != nil {
+ return nil, errors.New("invalid range")
+ }
+ if i > size {
+ i = size
+ }
+ r.start = size - i
+ r.length = size - r.start
+ } else {
+ i, err := strconv.ParseInt(start, 10, 64)
+ if err != nil || i > size || i < 0 {
+ return nil, errors.New("invalid range")
+ }
+ r.start = i
+ if end == "" {
+ // If no end is specified, range extends to end of the file.
+ r.length = size - r.start
+ } else {
+ i, err := strconv.ParseInt(end, 10, 64)
+ if err != nil || r.start > i {
+ return nil, errors.New("invalid range")
+ }
+ if i >= size {
+ i = size - 1
+ }
+ r.length = i - r.start + 1
+ }
+ }
+ ranges = append(ranges, r)
+ }
+ return ranges, nil
+}
+
+// countingWriter counts how many bytes have been written to it.
+type countingWriter int64
+
+func (w *countingWriter) Write(p []byte) (n int, err error) {
+ *w += countingWriter(len(p))
+ return len(p), nil
+}
+
+// rangesMIMESize returns the number of bytes it takes to encode the
+// provided ranges as a multipart response.
+func rangesMIMESize(ranges []httpRange, contentType string, contentSize int64) (encSize int64) {
+ var w countingWriter
+ mw := multipart.NewWriter(&w)
+ for _, ra := range ranges {
+ mw.CreatePart(ra.mimeHeader(contentType, contentSize))
+ encSize += ra.length
+ }
+ mw.Close()
+ encSize += int64(w)
+ return
+}
+
+func sumRangesSize(ranges []httpRange) (size int64) {
+ for _, ra := range ranges {
+ size += ra.length
+ }
+ return
+}
diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go
new file mode 100644
index 000000000..3889afe5c
--- /dev/null
+++ b/weed/server/volume_server_handlers_read.go
@@ -0,0 +1,301 @@
+package weed_server
+
+import (
+ "bytes"
+ "io"
+ "mime"
+ "mime/multipart"
+ "net/http"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+
+ "net/url"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/images"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"")
+
+func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) {
+ n := new(storage.Needle)
+ vid, fid, filename, ext, _ := parseURLPath(r.URL.Path)
+ volumeId, err := storage.NewVolumeId(vid)
+ if err != nil {
+ glog.V(2).Infoln("parsing error:", err, r.URL.Path)
+ w.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ err = n.ParsePath(fid)
+ if err != nil {
+ glog.V(2).Infoln("parsing fid error:", err, r.URL.Path)
+ w.WriteHeader(http.StatusBadRequest)
+ return
+ }
+
+ glog.V(4).Infoln("volume", volumeId, "reading", n)
+ if !vs.store.HasVolume(volumeId) {
+ if !vs.ReadRedirect {
+ glog.V(2).Infoln("volume is not local:", err, r.URL.Path)
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ lookupResult, err := operation.Lookup(vs.GetMasterNode(), volumeId.String())
+ glog.V(2).Infoln("volume", volumeId, "found on", lookupResult, "error", err)
+ if err == nil && len(lookupResult.Locations) > 0 {
+ u, _ := url.Parse(util.NormalizeUrl(lookupResult.Locations[0].PublicUrl))
+ u.Path = r.URL.Path
+ arg := url.Values{}
+ if c := r.FormValue("collection"); c != "" {
+ arg.Set("collection", c)
+ }
+ u.RawQuery = arg.Encode()
+ http.Redirect(w, r, u.String(), http.StatusMovedPermanently)
+
+ } else {
+ glog.V(2).Infoln("lookup error:", err, r.URL.Path)
+ w.WriteHeader(http.StatusNotFound)
+ }
+ return
+ }
+ cookie := n.Cookie
+ count, e := vs.store.ReadVolumeNeedle(volumeId, n)
+ glog.V(4).Infoln("read bytes", count, "error", e)
+ if e != nil || count <= 0 {
+ glog.V(0).Infoln("read error:", e, r.URL.Path)
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ defer n.ReleaseMemory()
+ if n.Cookie != cookie {
+ glog.V(0).Infoln("request", r.URL.Path, "with unmaching cookie seen:", cookie, "expected:", n.Cookie, "from", r.RemoteAddr, "agent", r.UserAgent())
+ w.WriteHeader(http.StatusNotFound)
+ return
+ }
+ if n.LastModified != 0 {
+ w.Header().Set("Last-Modified", time.Unix(int64(n.LastModified), 0).UTC().Format(http.TimeFormat))
+ if r.Header.Get("If-Modified-Since") != "" {
+ if t, parseError := time.Parse(http.TimeFormat, r.Header.Get("If-Modified-Since")); parseError == nil {
+ if t.Unix() >= int64(n.LastModified) {
+ w.WriteHeader(http.StatusNotModified)
+ return
+ }
+ }
+ }
+ }
+ etag := n.Etag()
+ if inm := r.Header.Get("If-None-Match"); inm == etag {
+ w.WriteHeader(http.StatusNotModified)
+ return
+ }
+ w.Header().Set("Etag", etag)
+
+ if vs.tryHandleChunkedFile(n, filename, w, r) {
+ return
+ }
+
+ if n.NameSize > 0 && filename == "" {
+ filename = string(n.Name)
+ if ext == "" {
+ ext = path.Ext(filename)
+ }
+ }
+ mtype := ""
+ if n.MimeSize > 0 {
+ mt := string(n.Mime)
+ if !strings.HasPrefix(mt, "application/octet-stream") {
+ mtype = mt
+ }
+ }
+
+ if ext != ".gz" {
+ if n.IsGzipped() {
+ if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
+ w.Header().Set("Content-Encoding", "gzip")
+ } else {
+ if n.Data, err = operation.UnGzipData(n.Data); err != nil {
+ glog.V(0).Infoln("ungzip error:", err, r.URL.Path)
+ }
+ }
+ }
+ }
+ if ext == ".png" || ext == ".jpg" || ext == ".gif" {
+ width, height := 0, 0
+ if r.FormValue("width") != "" {
+ width, _ = strconv.Atoi(r.FormValue("width"))
+ }
+ if r.FormValue("height") != "" {
+ height, _ = strconv.Atoi(r.FormValue("height"))
+ }
+ n.Data, _, _ = images.Resized(ext, n.Data, width, height)
+ }
+
+ if e := writeResponseContent(filename, mtype, bytes.NewReader(n.Data), w, r); e != nil {
+ glog.V(2).Infoln("response write error:", e)
+ }
+}
+
+func (vs *VolumeServer) FaviconHandler(w http.ResponseWriter, r *http.Request) {
+ data, err := images.Asset("favicon/favicon.ico")
+ if err != nil {
+ glog.V(2).Infoln("favicon read error:", err)
+ return
+ }
+
+ if e := writeResponseContent("favicon.ico", "image/x-icon", bytes.NewReader(data), w, r); e != nil {
+ glog.V(2).Infoln("response write error:", e)
+ }
+}
+
+func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) {
+ if !n.IsChunkedManifest() {
+ return false
+ }
+
+ chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped())
+ if e != nil {
+ glog.V(0).Infof("load chunked manifest (%s) error: %v", r.URL.Path, e)
+ return false
+ }
+ if fileName == "" && chunkManifest.Name != "" {
+ fileName = chunkManifest.Name
+ }
+ mType := ""
+ if chunkManifest.Mime != "" {
+ mt := chunkManifest.Mime
+ if !strings.HasPrefix(mt, "application/octet-stream") {
+ mType = mt
+ }
+ }
+
+ w.Header().Set("X-File-Store", "chunked")
+
+ chunkedFileReader := &operation.ChunkedFileReader{
+ Manifest: chunkManifest,
+ Master: vs.GetMasterNode(),
+ }
+ defer chunkedFileReader.Close()
+ if e := writeResponseContent(fileName, mType, chunkedFileReader, w, r); e != nil {
+ glog.V(2).Infoln("response write error:", e)
+ }
+ return true
+}
+
+func writeResponseContent(filename, mimeType string, rs io.ReadSeeker, w http.ResponseWriter, r *http.Request) error {
+ totalSize, e := rs.Seek(0, 2)
+ if mimeType == "" {
+ if ext := path.Ext(filename); ext != "" {
+ mimeType = mime.TypeByExtension(ext)
+ }
+ }
+ if mimeType != "" {
+ w.Header().Set("Content-Type", mimeType)
+ }
+ if filename != "" {
+ contentDisposition := "inline"
+ if r.FormValue("dl") != "" {
+ if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl {
+ contentDisposition = "attachment"
+ }
+ }
+ w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`)
+ }
+ w.Header().Set("Accept-Ranges", "bytes")
+ if r.Method == "HEAD" {
+ w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10))
+ return nil
+ }
+ rangeReq := r.Header.Get("Range")
+ if rangeReq == "" {
+ w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10))
+ if _, e = rs.Seek(0, 0); e != nil {
+ return e
+ }
+ _, e = io.Copy(w, rs)
+ return e
+ }
+
+ //the rest is dealing with partial content request
+ //mostly copy from src/pkg/net/http/fs.go
+ ranges, err := parseRange(rangeReq, totalSize)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable)
+ return nil
+ }
+ if sumRangesSize(ranges) > totalSize {
+ // The total number of bytes in all the ranges
+ // is larger than the size of the file by
+ // itself, so this is probably an attack, or a
+ // dumb client. Ignore the range request.
+ return nil
+ }
+ if len(ranges) == 0 {
+ return nil
+ }
+ if len(ranges) == 1 {
+ // RFC 2616, Section 14.16:
+ // "When an HTTP message includes the content of a single
+ // range (for example, a response to a request for a
+ // single range, or to a request for a set of ranges
+ // that overlap without any holes), this content is
+ // transmitted with a Content-Range header, and a
+ // Content-Length header showing the number of bytes
+ // actually transferred.
+ // ...
+ // A response to a request for a single range MUST NOT
+ // be sent using the multipart/byteranges media type."
+ ra := ranges[0]
+ w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10))
+ w.Header().Set("Content-Range", ra.contentRange(totalSize))
+ w.WriteHeader(http.StatusPartialContent)
+ if _, e = rs.Seek(ra.start, 0); e != nil {
+ return e
+ }
+
+ _, e = io.CopyN(w, rs, ra.length)
+ return e
+ }
+ // process multiple ranges
+ for _, ra := range ranges {
+ if ra.start > totalSize {
+ http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable)
+ return nil
+ }
+ }
+ sendSize := rangesMIMESize(ranges, mimeType, totalSize)
+ pr, pw := io.Pipe()
+ mw := multipart.NewWriter(pw)
+ w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
+ sendContent := pr
+ defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
+ go func() {
+ for _, ra := range ranges {
+ part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize))
+ if e != nil {
+ pw.CloseWithError(e)
+ return
+ }
+ if _, e = rs.Seek(ra.start, 0); e != nil {
+ pw.CloseWithError(e)
+ return
+ }
+ if _, e = io.CopyN(part, rs, ra.length); e != nil {
+ pw.CloseWithError(e)
+ return
+ }
+ }
+ mw.Close()
+ pw.Close()
+ }()
+ if w.Header().Get("Content-Encoding") == "" {
+ w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
+ }
+ w.WriteHeader(http.StatusPartialContent)
+ _, e = io.CopyN(w, sendContent, sendSize)
+ return e
+}
diff --git a/weed/server/volume_server_handlers_sync.go b/weed/server/volume_server_handlers_sync.go
new file mode 100644
index 000000000..8a2e30743
--- /dev/null
+++ b/weed/server/volume_server_handlers_sync.go
@@ -0,0 +1,87 @@
+package weed_server
+
+import (
+ "fmt"
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+func (vs *VolumeServer) getVolumeSyncStatusHandler(w http.ResponseWriter, r *http.Request) {
+ v, err := vs.getVolume("volume", r)
+ if v == nil {
+ writeJsonError(w, r, http.StatusBadRequest, err)
+ return
+ }
+ syncStat := v.GetVolumeSyncStatus()
+ if syncStat.Error != "" {
+ writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Get Volume %d status error: %s", v.Id, syncStat.Error))
+ glog.V(2).Infoln("getVolumeSyncStatusHandler volume =", r.FormValue("volume"), ", error =", err)
+ } else {
+ writeJsonQuiet(w, r, http.StatusOK, syncStat)
+ }
+}
+
+func (vs *VolumeServer) getVolumeIndexContentHandler(w http.ResponseWriter, r *http.Request) {
+ v, err := vs.getVolume("volume", r)
+ if v == nil {
+ writeJsonError(w, r, http.StatusBadRequest, err)
+ return
+ }
+ content, err := v.IndexFileContent()
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ }
+ w.Write(content)
+}
+
+func (vs *VolumeServer) getVolumeDataContentHandler(w http.ResponseWriter, r *http.Request) {
+ v, err := vs.getVolume("volume", r)
+ if v == nil {
+ writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("Not Found volume: %v", err))
+ return
+ }
+ if int(v.SuperBlock.CompactRevision) != util.ParseInt(r.FormValue("revision"), 0) {
+ writeJsonError(w, r, http.StatusExpectationFailed, fmt.Errorf("Requested Volume Revision is %s, but current revision is %d", r.FormValue("revision"), v.SuperBlock.CompactRevision))
+ return
+ }
+ offset := uint32(util.ParseUint64(r.FormValue("offset"), 0))
+ size := uint32(util.ParseUint64(r.FormValue("size"), 0))
+ content, block, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*storage.NeedlePaddingSize, size)
+ defer storage.ReleaseBytes(block.Bytes)
+ if err != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ return
+ }
+
+ id := util.ParseUint64(r.FormValue("id"), 0)
+ n := new(storage.Needle)
+ n.ParseNeedleHeader(content)
+ if id != n.Id {
+ writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("Expected file entry id %d, but found %d", id, n.Id))
+ return
+ }
+
+ w.Write(content)
+}
+
+func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) (*storage.Volume, error) {
+ volumeIdString := r.FormValue(volumeParameterName)
+ if volumeIdString == "" {
+ err := fmt.Errorf("Empty Volume Id: Need to pass in %s=the_volume_id.", volumeParameterName)
+ return nil, err
+ }
+ vid, err := storage.NewVolumeId(volumeIdString)
+ if err != nil {
+ err = fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString)
+ return nil, err
+ }
+ v := vs.store.GetVolume(vid)
+ if v == nil {
+ return nil, fmt.Errorf("Not Found Volume Id %s: %d", volumeIdString, vid)
+ }
+ return v, nil
+}
diff --git a/weed/server/volume_server_handlers_ui.go b/weed/server/volume_server_handlers_ui.go
new file mode 100644
index 000000000..7923c95c0
--- /dev/null
+++ b/weed/server/volume_server_handlers_ui.go
@@ -0,0 +1,38 @@
+package weed_server
+
+import (
+ "net/http"
+ "path/filepath"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/stats"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ ui "github.com/chrislusf/seaweedfs/weed/server/volume_server_ui"
+)
+
+func (vs *VolumeServer) uiStatusHandler(w http.ResponseWriter, r *http.Request) {
+ infos := make(map[string]interface{})
+ infos["Up Time"] = time.Now().Sub(startTime).String()
+ var ds []*stats.DiskStatus
+ for _, loc := range vs.store.Locations {
+ if dir, e := filepath.Abs(loc.Directory); e == nil {
+ ds = append(ds, stats.NewDiskStatus(dir))
+ }
+ }
+ args := struct {
+ Version string
+ Master string
+ Volumes interface{}
+ DiskStatuses interface{}
+ Stats interface{}
+ Counters *stats.ServerStats
+ }{
+ util.VERSION,
+ vs.masterNode,
+ vs.store.Status(),
+ ds,
+ infos,
+ serverStats,
+ }
+ ui.StatusTpl.Execute(w, args)
+}
diff --git a/weed/server/volume_server_handlers_vacuum.go b/weed/server/volume_server_handlers_vacuum.go
new file mode 100644
index 000000000..ef348d35c
--- /dev/null
+++ b/weed/server/volume_server_handlers_vacuum.go
@@ -0,0 +1,35 @@
+package weed_server
+
+import (
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func (vs *VolumeServer) vacuumVolumeCheckHandler(w http.ResponseWriter, r *http.Request) {
+ err, ret := vs.store.CheckCompactVolume(r.FormValue("volume"), r.FormValue("garbageThreshold"))
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusOK, map[string]interface{}{"error": "", "result": ret})
+ } else {
+ writeJsonQuiet(w, r, http.StatusInternalServerError, map[string]interface{}{"error": err.Error(), "result": false})
+ }
+ glog.V(2).Infoln("checked compacting volume =", r.FormValue("volume"), "garbageThreshold =", r.FormValue("garbageThreshold"), "vacuum =", ret)
+}
+func (vs *VolumeServer) vacuumVolumeCompactHandler(w http.ResponseWriter, r *http.Request) {
+ err := vs.store.CompactVolume(r.FormValue("volume"))
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""})
+ } else {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ }
+ glog.V(2).Infoln("compacted volume =", r.FormValue("volume"), ", error =", err)
+}
+func (vs *VolumeServer) vacuumVolumeCommitHandler(w http.ResponseWriter, r *http.Request) {
+ err := vs.store.CommitCompactVolume(r.FormValue("volume"))
+ if err == nil {
+ writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""})
+ } else {
+ writeJsonError(w, r, http.StatusInternalServerError, err)
+ }
+ glog.V(2).Infoln("commit compact volume =", r.FormValue("volume"), ", error =", err)
+}
diff --git a/weed/server/volume_server_handlers_write.go b/weed/server/volume_server_handlers_write.go
new file mode 100644
index 000000000..e7ca2f8e1
--- /dev/null
+++ b/weed/server/volume_server_handlers_write.go
@@ -0,0 +1,165 @@
+package weed_server
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/topology"
+)
+
+func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) {
+ if e := r.ParseForm(); e != nil {
+ glog.V(0).Infoln("form parse error:", e)
+ writeJsonError(w, r, http.StatusBadRequest, e)
+ return
+ }
+ vid, _, _, _, _ := parseURLPath(r.URL.Path)
+ volumeId, ve := storage.NewVolumeId(vid)
+ if ve != nil {
+ glog.V(0).Infoln("NewVolumeId error:", ve)
+ writeJsonError(w, r, http.StatusBadRequest, ve)
+ return
+ }
+ needle, ne := storage.NewNeedle(r, vs.FixJpgOrientation)
+ if ne != nil {
+ writeJsonError(w, r, http.StatusBadRequest, ne)
+ return
+ }
+
+ ret := operation.UploadResult{}
+ size, errorStatus := topology.ReplicatedWrite(vs.GetMasterNode(),
+ vs.store, volumeId, needle, r)
+ httpStatus := http.StatusCreated
+ if errorStatus != "" {
+ httpStatus = http.StatusInternalServerError
+ ret.Error = errorStatus
+ }
+ if needle.HasName() {
+ ret.Name = string(needle.Name)
+ }
+ ret.Size = size
+ writeJsonQuiet(w, r, httpStatus, ret)
+}
+
+func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) {
+ n := new(storage.Needle)
+ vid, fid, _, _, _ := parseURLPath(r.URL.Path)
+ volumeId, _ := storage.NewVolumeId(vid)
+ n.ParsePath(fid)
+
+ glog.V(2).Infoln("deleting", n)
+
+ cookie := n.Cookie
+
+ _, ok := vs.store.ReadVolumeNeedle(volumeId, n)
+ if ok != nil {
+ m := make(map[string]uint32)
+ m["size"] = 0
+ writeJsonQuiet(w, r, http.StatusNotFound, m)
+ return
+ }
+ defer n.ReleaseMemory()
+
+ if n.Cookie != cookie {
+ glog.V(0).Infoln("delete", r.URL.Path, "with unmaching cookie from ", r.RemoteAddr, "agent", r.UserAgent())
+ writeJsonError(w, r, http.StatusBadRequest, errors.New("File Random Cookie does not match."))
+ return
+ }
+
+ count := int64(n.Size)
+
+ if n.IsChunkedManifest() {
+ chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped())
+ if e != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Load chunks manifest error: %v", e))
+ return
+ }
+ // make sure all chunks had deleted before delete manifest
+ if e := chunkManifest.DeleteChunks(vs.GetMasterNode()); e != nil {
+ writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Delete chunks error: %v", e))
+ return
+ }
+ count = chunkManifest.Size
+ }
+
+ _, err := topology.ReplicatedDelete(vs.GetMasterNode(), vs.store, volumeId, n, r)
+
+ if err == nil {
+ m := make(map[string]int64)
+ m["size"] = count
+ writeJsonQuiet(w, r, http.StatusAccepted, m)
+ } else {
+ writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Deletion Failed: %v", err))
+ }
+
+}
+
+//Experts only: takes multiple fid parameters. This function does not propagate deletes to replicas.
+func (vs *VolumeServer) batchDeleteHandler(w http.ResponseWriter, r *http.Request) {
+ r.ParseForm()
+ var ret []operation.DeleteResult
+ for _, fid := range r.Form["fid"] {
+ vid, id_cookie, err := operation.ParseFileId(fid)
+ if err != nil {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusBadRequest,
+ Error: err.Error()})
+ continue
+ }
+ n := new(storage.Needle)
+ volumeId, _ := storage.NewVolumeId(vid)
+ n.ParsePath(id_cookie)
+ glog.V(4).Infoln("batch deleting", n)
+ cookie := n.Cookie
+ if _, err := vs.store.ReadVolumeNeedle(volumeId, n); err != nil {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusNotFound,
+ Error: err.Error(),
+ })
+ continue
+ }
+
+ if n.IsChunkedManifest() {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusNotAcceptable,
+ Error: "ChunkManifest: not allowed in batch delete mode.",
+ })
+ n.ReleaseMemory()
+ continue
+ }
+
+ if n.Cookie != cookie {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusBadRequest,
+ Error: "File Random Cookie does not match.",
+ })
+ glog.V(0).Infoln("deleting", fid, "with unmaching cookie from ", r.RemoteAddr, "agent", r.UserAgent())
+ n.ReleaseMemory()
+ return
+ }
+ if size, err := vs.store.Delete(volumeId, n); err != nil {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusInternalServerError,
+ Error: err.Error()},
+ )
+ } else {
+ ret = append(ret, operation.DeleteResult{
+ Fid: fid,
+ Status: http.StatusAccepted,
+ Size: int(size)},
+ )
+ }
+ n.ReleaseMemory()
+ }
+
+ writeJsonQuiet(w, r, http.StatusAccepted, ret)
+}
diff --git a/weed/server/volume_server_ui/templates.go b/weed/server/volume_server_ui/templates.go
new file mode 100644
index 000000000..c3db6e92a
--- /dev/null
+++ b/weed/server/volume_server_ui/templates.go
@@ -0,0 +1,135 @@
+package master_ui
+
+import (
+ "html/template"
+ "strconv"
+ "strings"
+)
+
+func join(data []int64) string {
+ var ret []string
+ for _, d := range data {
+ ret = append(ret, strconv.Itoa(int(d)))
+ }
+ return strings.Join(ret, ",")
+}
+
+var funcMap = template.FuncMap{
+ "join": join,
+}
+
+var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOCTYPE html>
+<html>
+ <head>
+ <title>SeaweedFS {{ .Version }}</title>
+ <link rel="icon" href="http://7viirv.com1.z0.glb.clouddn.com/seaweed_favicon.png" sizes="32x32" />
+ <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css">
+ <script type="text/javascript" src="https://code.jquery.com/jquery-2.1.3.min.js"></script>
+ <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery-sparklines/2.1.2/jquery.sparkline.min.js"></script>
+ <script type="text/javascript">
+ $(function() {
+ var periods = ['second', 'minute', 'hour', 'day'];
+ for (i = 0; i < periods.length; i++) {
+ var period = periods[i];
+ $('.inlinesparkline-'+period).sparkline('html', {
+ type: 'line',
+ barColor: 'red',
+ tooltipSuffix:' request per '+period,
+ });
+ }
+ });
+ </script>
+ <style>
+ #jqstooltip{
+ height: 28px !important;
+ width: 150px !important;
+ }
+ </style>
+ </head>
+ <body>
+ <div class="container">
+ <div class="page-header">
+ <h1>
+ <img src="http://7viirv.com1.z0.glb.clouddn.com/seaweed50x50.png"></img>
+ SeaweedFS <small>{{ .Version }}</small>
+ </h1>
+ </div>
+
+ <div class="row">
+ <div class="col-sm-6">
+ <h2>Disk Stats</h2>
+ <table class="table table-condensed table-striped">
+ {{ range .DiskStatuses }}
+ <tr>
+ <th>{{ .Dir }}</th>
+ <td>{{ .Free }} Bytes Free</td>
+ </tr>
+ {{ end }}
+ </table>
+ </div>
+
+ <div class="col-sm-6">
+ <h2>System Stats</h2>
+ <table class="table table-condensed table-striped">
+ <tr>
+ <th>Master</th>
+ <td><a href="http://{{.Master}}/ui/index.html">{{.Master}}</a></td>
+ </tr>
+ <tr>
+ <th>Weekly # ReadRequests</th>
+ <td><span class="inlinesparkline-day">{{ .Counters.ReadRequests.WeekCounter.ToList | join }}</span></td>
+ </tr>
+ <tr>
+ <th>Daily # ReadRequests</th>
+ <td><span class="inlinesparkline-hour">{{ .Counters.ReadRequests.DayCounter.ToList | join }}</span></td>
+ </tr>
+ <tr>
+ <th>Hourly # ReadRequests</th>
+ <td><span class="inlinesparkline-minute">{{ .Counters.ReadRequests.HourCounter.ToList | join }}</span></td>
+ </tr>
+ <tr>
+ <th>Last Minute # ReadRequests</th>
+ <td><span class="inlinesparkline-second">{{ .Counters.ReadRequests.MinuteCounter.ToList | join }}</span></td>
+ </tr>
+ {{ range $key, $val := .Stats }}
+ <tr>
+ <th>{{ $key }}</th>
+ <td>{{ $val }}</td>
+ </tr>
+ {{ end }}
+ </table>
+ </div>
+ </div>
+
+ <div class="row">
+ <h2>Volumes</h2>
+ <table class="table table-striped">
+ <thead>
+ <tr>
+ <th>Id</th>
+ <th>Collection</th>
+ <th>Size</th>
+ <th>Files</th>
+ <th>Trash</th>
+ <th>TTL</th>
+ </tr>
+ </thead>
+ <tbody>
+ {{ range .Volumes }}
+ <tr>
+ <td><code>{{ .Id }}</code></td>
+ <td>{{ .Collection }}</td>
+ <td>{{ .Size }} Bytes</td>
+ <td>{{ .FileCount }}</td>
+ <td>{{ .DeleteCount }} / {{.DeletedByteCount}} Bytes</td>
+ <td>{{ .Ttl }}</td>
+ </tr>
+ {{ end }}
+ </tbody>
+ </table>
+ </div>
+
+ </div>
+ </body>
+</html>
+`))
diff --git a/weed/stats/disk.go b/weed/stats/disk.go
new file mode 100644
index 000000000..46d8c465e
--- /dev/null
+++ b/weed/stats/disk.go
@@ -0,0 +1,14 @@
+package stats
+
+type DiskStatus struct {
+ Dir string
+ All uint64
+ Used uint64
+ Free uint64
+}
+
+func NewDiskStatus(path string) (disk *DiskStatus) {
+ disk = &DiskStatus{Dir: path}
+ disk.fillInStatus()
+ return
+}
diff --git a/weed/stats/disk_notsupported.go b/weed/stats/disk_notsupported.go
new file mode 100644
index 000000000..e380d27ea
--- /dev/null
+++ b/weed/stats/disk_notsupported.go
@@ -0,0 +1,7 @@
+// +build windows openbsd netbsd plan9 solaris
+
+package stats
+
+func (disk *DiskStatus) fillInStatus() {
+ return
+}
diff --git a/weed/stats/disk_supported.go b/weed/stats/disk_supported.go
new file mode 100644
index 000000000..d68f0a32e
--- /dev/null
+++ b/weed/stats/disk_supported.go
@@ -0,0 +1,19 @@
+// +build !windows,!openbsd,!netbsd,!plan9,!solaris
+
+package stats
+
+import (
+ "syscall"
+)
+
+func (disk *DiskStatus) fillInStatus() {
+ fs := syscall.Statfs_t{}
+ err := syscall.Statfs(disk.Dir, &fs)
+ if err != nil {
+ return
+ }
+ disk.All = fs.Blocks * uint64(fs.Bsize)
+ disk.Free = fs.Bfree * uint64(fs.Bsize)
+ disk.Used = disk.All - disk.Free
+ return
+}
diff --git a/weed/stats/duration_counter.go b/weed/stats/duration_counter.go
new file mode 100644
index 000000000..69c8be61d
--- /dev/null
+++ b/weed/stats/duration_counter.go
@@ -0,0 +1,94 @@
+package stats
+
+import (
+ "time"
+)
+
+type TimedValue struct {
+ t time.Time
+ val int64
+}
+
+func NewTimedValue(t time.Time, val int64) *TimedValue {
+ return &TimedValue{t: t, val: val}
+}
+
+type RoundRobinCounter struct {
+ LastIndex int
+ Values []int64
+ Counts []int64
+}
+
+func NewRoundRobinCounter(slots int) *RoundRobinCounter {
+ return &RoundRobinCounter{LastIndex: -1, Values: make([]int64, slots), Counts: make([]int64, slots)}
+}
+func (rrc *RoundRobinCounter) Add(index int, val int64) {
+ if index >= len(rrc.Values) {
+ return
+ }
+ for rrc.LastIndex != index {
+ rrc.LastIndex = (rrc.LastIndex + 1) % len(rrc.Values)
+ rrc.Values[rrc.LastIndex] = 0
+ rrc.Counts[rrc.LastIndex] = 0
+ }
+ rrc.Values[index] += val
+ rrc.Counts[index]++
+}
+func (rrc *RoundRobinCounter) Max() (max int64) {
+ for _, val := range rrc.Values {
+ if max < val {
+ max = val
+ }
+ }
+ return
+}
+func (rrc *RoundRobinCounter) Count() (cnt int64) {
+ for _, c := range rrc.Counts {
+ cnt += c
+ }
+ return
+}
+func (rrc *RoundRobinCounter) Sum() (sum int64) {
+ for _, val := range rrc.Values {
+ sum += val
+ }
+ return
+}
+
+func (rrc *RoundRobinCounter) ToList() (ret []int64) {
+ index := rrc.LastIndex
+ step := len(rrc.Values)
+ for step > 0 {
+ step--
+ index++
+ if index >= len(rrc.Values) {
+ index = 0
+ }
+ ret = append(ret, rrc.Values[index])
+ }
+ return
+}
+
+type DurationCounter struct {
+ MinuteCounter *RoundRobinCounter
+ HourCounter *RoundRobinCounter
+ DayCounter *RoundRobinCounter
+ WeekCounter *RoundRobinCounter
+}
+
+func NewDurationCounter() *DurationCounter {
+ return &DurationCounter{
+ MinuteCounter: NewRoundRobinCounter(60),
+ HourCounter: NewRoundRobinCounter(60),
+ DayCounter: NewRoundRobinCounter(24),
+ WeekCounter: NewRoundRobinCounter(7),
+ }
+}
+
+// Add is for cumulative counts
+func (sc *DurationCounter) Add(tv *TimedValue) {
+ sc.MinuteCounter.Add(tv.t.Second(), tv.val)
+ sc.HourCounter.Add(tv.t.Minute(), tv.val)
+ sc.DayCounter.Add(tv.t.Hour(), tv.val)
+ sc.WeekCounter.Add(int(tv.t.Weekday()), tv.val)
+}
diff --git a/weed/stats/duration_counter_test.go b/weed/stats/duration_counter_test.go
new file mode 100644
index 000000000..aa9d61c87
--- /dev/null
+++ b/weed/stats/duration_counter_test.go
@@ -0,0 +1,19 @@
+package stats
+
+import "testing"
+
+func TestRobinCounter(t *testing.T) {
+ rrc := NewRoundRobinCounter(60)
+ rrc.Add(0, 1)
+ rrc.Add(50, 2)
+ if rrc.Count() != 2 {
+ t.Fatal()
+ }
+ if rrc.Sum() != 3 {
+ t.Fatal()
+ }
+ /*
+ index out of range
+ */
+ rrc.Add(61, 1)
+}
diff --git a/weed/stats/memory.go b/weed/stats/memory.go
new file mode 100644
index 000000000..0700d92de
--- /dev/null
+++ b/weed/stats/memory.go
@@ -0,0 +1,28 @@
+package stats
+
+import (
+ "runtime"
+)
+
+type MemStatus struct {
+ Goroutines int
+ All uint64
+ Used uint64
+ Free uint64
+ Self uint64
+ Heap uint64
+ Stack uint64
+}
+
+func MemStat() MemStatus {
+ mem := MemStatus{}
+ mem.Goroutines = runtime.NumGoroutine()
+ memStat := new(runtime.MemStats)
+ runtime.ReadMemStats(memStat)
+ mem.Self = memStat.Alloc
+ mem.Heap = memStat.HeapAlloc
+ mem.Stack = memStat.StackInuse
+
+ mem.fillInStatus()
+ return mem
+}
diff --git a/weed/stats/memory_notsupported.go b/weed/stats/memory_notsupported.go
new file mode 100644
index 000000000..ba8229364
--- /dev/null
+++ b/weed/stats/memory_notsupported.go
@@ -0,0 +1,7 @@
+// +build !linux
+
+package stats
+
+func (mem *MemStatus) fillInStatus() {
+ return
+}
diff --git a/weed/stats/memory_supported.go b/weed/stats/memory_supported.go
new file mode 100644
index 000000000..fd0c36d72
--- /dev/null
+++ b/weed/stats/memory_supported.go
@@ -0,0 +1,18 @@
+// +build linux
+
+package stats
+
+import (
+ "syscall"
+)
+
+func (mem *MemStatus) fillInStatus() {
+ //system memory usage
+ sysInfo := new(syscall.Sysinfo_t)
+ err := syscall.Sysinfo(sysInfo)
+ if err == nil {
+ mem.All = uint64(sysInfo.Totalram) //* uint64(syscall.Getpagesize())
+ mem.Free = uint64(sysInfo.Freeram) //* uint64(syscall.Getpagesize())
+ mem.Used = mem.All - mem.Free
+ }
+}
diff --git a/weed/stats/stats.go b/weed/stats/stats.go
new file mode 100644
index 000000000..09826152f
--- /dev/null
+++ b/weed/stats/stats.go
@@ -0,0 +1,113 @@
+package stats
+
+import (
+ "time"
+)
+
+type ServerStats struct {
+ Requests *DurationCounter
+ Connections *DurationCounter
+ AssignRequests *DurationCounter
+ ReadRequests *DurationCounter
+ WriteRequests *DurationCounter
+ DeleteRequests *DurationCounter
+ BytesIn *DurationCounter
+ BytesOut *DurationCounter
+}
+
+type Channels struct {
+ Connections chan *TimedValue
+ Requests chan *TimedValue
+ AssignRequests chan *TimedValue
+ ReadRequests chan *TimedValue
+ WriteRequests chan *TimedValue
+ DeleteRequests chan *TimedValue
+ BytesIn chan *TimedValue
+ BytesOut chan *TimedValue
+}
+
+var (
+ Chan *Channels
+)
+
+func init() {
+ Chan = &Channels{
+ Connections: make(chan *TimedValue, 100),
+ Requests: make(chan *TimedValue, 100),
+ AssignRequests: make(chan *TimedValue, 100),
+ ReadRequests: make(chan *TimedValue, 100),
+ WriteRequests: make(chan *TimedValue, 100),
+ DeleteRequests: make(chan *TimedValue, 100),
+ BytesIn: make(chan *TimedValue, 100),
+ BytesOut: make(chan *TimedValue, 100),
+ }
+}
+
+func NewServerStats() *ServerStats {
+ return &ServerStats{
+ Requests: NewDurationCounter(),
+ Connections: NewDurationCounter(),
+ AssignRequests: NewDurationCounter(),
+ ReadRequests: NewDurationCounter(),
+ WriteRequests: NewDurationCounter(),
+ DeleteRequests: NewDurationCounter(),
+ BytesIn: NewDurationCounter(),
+ BytesOut: NewDurationCounter(),
+ }
+}
+
+func ConnectionOpen() {
+ Chan.Connections <- NewTimedValue(time.Now(), 1)
+}
+func ConnectionClose() {
+ Chan.Connections <- NewTimedValue(time.Now(), -1)
+}
+func RequestOpen() {
+ Chan.Requests <- NewTimedValue(time.Now(), 1)
+}
+func RequestClose() {
+ Chan.Requests <- NewTimedValue(time.Now(), -1)
+}
+func AssignRequest() {
+ Chan.AssignRequests <- NewTimedValue(time.Now(), 1)
+}
+func ReadRequest() {
+ Chan.ReadRequests <- NewTimedValue(time.Now(), 1)
+}
+func WriteRequest() {
+ Chan.WriteRequests <- NewTimedValue(time.Now(), 1)
+}
+func DeleteRequest() {
+ Chan.DeleteRequests <- NewTimedValue(time.Now(), 1)
+}
+func BytesIn(val int64) {
+ Chan.BytesIn <- NewTimedValue(time.Now(), val)
+}
+func BytesOut(val int64) {
+ Chan.BytesOut <- NewTimedValue(time.Now(), val)
+}
+
+func (ss *ServerStats) Start() {
+ for {
+ select {
+ case tv := <-Chan.Connections:
+ ss.Connections.Add(tv)
+ case tv := <-Chan.Requests:
+ ss.Requests.Add(tv)
+ case tv := <-Chan.AssignRequests:
+ ss.AssignRequests.Add(tv)
+ case tv := <-Chan.ReadRequests:
+ ss.ReadRequests.Add(tv)
+ case tv := <-Chan.WriteRequests:
+ ss.WriteRequests.Add(tv)
+ case tv := <-Chan.ReadRequests:
+ ss.ReadRequests.Add(tv)
+ case tv := <-Chan.DeleteRequests:
+ ss.DeleteRequests.Add(tv)
+ case tv := <-Chan.BytesIn:
+ ss.BytesIn.Add(tv)
+ case tv := <-Chan.BytesOut:
+ ss.BytesOut.Add(tv)
+ }
+ }
+}
diff --git a/weed/storage/compact_map.go b/weed/storage/compact_map.go
new file mode 100644
index 000000000..d4438d044
--- /dev/null
+++ b/weed/storage/compact_map.go
@@ -0,0 +1,207 @@
+package storage
+
+import (
+ "strconv"
+ "sync"
+)
+
+type NeedleValue struct {
+ Key Key
+ Offset uint32 `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G
+ Size uint32 `comment:"Size of the data portion"`
+}
+
+const (
+ batch = 100000
+)
+
+type Key uint64
+
+func (k Key) String() string {
+ return strconv.FormatUint(uint64(k), 10)
+}
+
+type CompactSection struct {
+ sync.RWMutex
+ values []NeedleValue
+ overflow map[Key]NeedleValue
+ start Key
+ end Key
+ counter int
+}
+
+func NewCompactSection(start Key) *CompactSection {
+ return &CompactSection{
+ values: make([]NeedleValue, batch),
+ overflow: make(map[Key]NeedleValue),
+ start: start,
+ }
+}
+
+//return old entry size
+func (cs *CompactSection) Set(key Key, offset uint32, size uint32) uint32 {
+ ret := uint32(0)
+ if key > cs.end {
+ cs.end = key
+ }
+ cs.Lock()
+ if i := cs.binarySearchValues(key); i >= 0 {
+ ret = cs.values[i].Size
+ //println("key", key, "old size", ret)
+ cs.values[i].Offset, cs.values[i].Size = offset, size
+ } else {
+ needOverflow := cs.counter >= batch
+ needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key
+ if needOverflow {
+ //println("start", cs.start, "counter", cs.counter, "key", key)
+ if oldValue, found := cs.overflow[key]; found {
+ ret = oldValue.Size
+ }
+ cs.overflow[key] = NeedleValue{Key: key, Offset: offset, Size: size}
+ } else {
+ p := &cs.values[cs.counter]
+ p.Key, p.Offset, p.Size = key, offset, size
+ //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key)
+ cs.counter++
+ }
+ }
+ cs.Unlock()
+ return ret
+}
+
+//return old entry size
+func (cs *CompactSection) Delete(key Key) uint32 {
+ cs.Lock()
+ ret := uint32(0)
+ if i := cs.binarySearchValues(key); i >= 0 {
+ if cs.values[i].Size > 0 {
+ ret = cs.values[i].Size
+ cs.values[i].Size = 0
+ }
+ }
+ if v, found := cs.overflow[key]; found {
+ delete(cs.overflow, key)
+ ret = v.Size
+ }
+ cs.Unlock()
+ return ret
+}
+func (cs *CompactSection) Get(key Key) (*NeedleValue, bool) {
+ cs.RLock()
+ if v, ok := cs.overflow[key]; ok {
+ cs.RUnlock()
+ return &v, true
+ }
+ if i := cs.binarySearchValues(key); i >= 0 {
+ cs.RUnlock()
+ return &cs.values[i], true
+ }
+ cs.RUnlock()
+ return nil, false
+}
+func (cs *CompactSection) binarySearchValues(key Key) int {
+ l, h := 0, cs.counter-1
+ if h >= 0 && cs.values[h].Key < key {
+ return -2
+ }
+ //println("looking for key", key)
+ for l <= h {
+ m := (l + h) / 2
+ //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size)
+ if cs.values[m].Key < key {
+ l = m + 1
+ } else if key < cs.values[m].Key {
+ h = m - 1
+ } else {
+ //println("found", m)
+ return m
+ }
+ }
+ return -1
+}
+
+//This map assumes mostly inserting increasing keys
+type CompactMap struct {
+ list []*CompactSection
+}
+
+func NewCompactMap() CompactMap {
+ return CompactMap{}
+}
+
+func (cm *CompactMap) Set(key Key, offset uint32, size uint32) uint32 {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ //println(x, "creating", len(cm.list), "section, starting", key)
+ cm.list = append(cm.list, NewCompactSection(key))
+ x = len(cm.list) - 1
+ //keep compact section sorted by start
+ for x > 0 {
+ if cm.list[x-1].start > cm.list[x].start {
+ cm.list[x-1], cm.list[x] = cm.list[x], cm.list[x-1]
+ x = x - 1
+ } else {
+ break
+ }
+ }
+ }
+ return cm.list[x].Set(key, offset, size)
+}
+func (cm *CompactMap) Delete(key Key) uint32 {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ return uint32(0)
+ }
+ return cm.list[x].Delete(key)
+}
+func (cm *CompactMap) Get(key Key) (*NeedleValue, bool) {
+ x := cm.binarySearchCompactSection(key)
+ if x < 0 {
+ return nil, false
+ }
+ return cm.list[x].Get(key)
+}
+func (cm *CompactMap) binarySearchCompactSection(key Key) int {
+ l, h := 0, len(cm.list)-1
+ if h < 0 {
+ return -5
+ }
+ if cm.list[h].start <= key {
+ if cm.list[h].counter < batch || key <= cm.list[h].end {
+ return h
+ }
+ return -4
+ }
+ for l <= h {
+ m := (l + h) / 2
+ if key < cm.list[m].start {
+ h = m - 1
+ } else { // cm.list[m].start <= key
+ if cm.list[m+1].start <= key {
+ l = m + 1
+ } else {
+ return m
+ }
+ }
+ }
+ return -3
+}
+
+// Visit visits all entries or stop if any error when visiting
+func (cm *CompactMap) Visit(visit func(NeedleValue) error) error {
+ for _, cs := range cm.list {
+ for _, v := range cs.overflow {
+ if err := visit(v); err != nil {
+ return err
+ }
+ }
+ for _, v := range cs.values {
+ if _, found := cs.overflow[v.Key]; !found {
+ if err := visit(v); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
diff --git a/weed/storage/compact_map_perf_test.go b/weed/storage/compact_map_perf_test.go
new file mode 100644
index 000000000..cc7669139
--- /dev/null
+++ b/weed/storage/compact_map_perf_test.go
@@ -0,0 +1,45 @@
+package storage
+
+import (
+ "log"
+ "os"
+ "testing"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+func TestMemoryUsage(t *testing.T) {
+
+ indexFile, ie := os.OpenFile("../../test/sample.idx", os.O_RDWR|os.O_RDONLY, 0644)
+ if ie != nil {
+ log.Fatalln(ie)
+ }
+ LoadNewNeedleMap(indexFile)
+
+}
+
+func LoadNewNeedleMap(file *os.File) CompactMap {
+ m := NewCompactMap()
+ bytes := make([]byte, 16*1024)
+ count, e := file.Read(bytes)
+ if count > 0 {
+ fstat, _ := file.Stat()
+ glog.V(0).Infoln("Loading index file", fstat.Name(), "size", fstat.Size())
+ }
+ for count > 0 && e == nil {
+ for i := 0; i < count; i += 16 {
+ key := util.BytesToUint64(bytes[i : i+8])
+ offset := util.BytesToUint32(bytes[i+8 : i+12])
+ size := util.BytesToUint32(bytes[i+12 : i+16])
+ if offset > 0 {
+ m.Set(Key(key), offset, size)
+ } else {
+ //delete(m, key)
+ }
+ }
+
+ count, e = file.Read(bytes)
+ }
+ return m
+}
diff --git a/weed/storage/compact_map_test.go b/weed/storage/compact_map_test.go
new file mode 100644
index 000000000..1ccb48edb
--- /dev/null
+++ b/weed/storage/compact_map_test.go
@@ -0,0 +1,77 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestIssue52(t *testing.T) {
+ m := NewCompactMap()
+ m.Set(Key(10002), 10002, 10002)
+ if element, ok := m.Get(Key(10002)); ok {
+ println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size)
+ }
+ m.Set(Key(10001), 10001, 10001)
+ if element, ok := m.Get(Key(10002)); ok {
+ println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size)
+ } else {
+ t.Fatal("key 10002 missing after setting 10001")
+ }
+}
+
+func TestXYZ(t *testing.T) {
+ m := NewCompactMap()
+ for i := uint32(0); i < 100*batch; i += 2 {
+ m.Set(Key(i), i, i)
+ }
+
+ for i := uint32(0); i < 100*batch; i += 37 {
+ m.Delete(Key(i))
+ }
+
+ for i := uint32(0); i < 10*batch; i += 3 {
+ m.Set(Key(i), i+11, i+5)
+ }
+
+ // for i := uint32(0); i < 100; i++ {
+ // if v := m.Get(Key(i)); v != nil {
+ // glog.V(4).Infoln(i, "=", v.Key, v.Offset, v.Size)
+ // }
+ // }
+
+ for i := uint32(0); i < 10*batch; i++ {
+ v, ok := m.Get(Key(i))
+ if i%3 == 0 {
+ if !ok {
+ t.Fatal("key", i, "missing!")
+ }
+ if v.Size != i+5 {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ } else if i%37 == 0 {
+ if ok && v.Size > 0 {
+ t.Fatal("key", i, "should have been deleted needle value", v)
+ }
+ } else if i%2 == 0 {
+ if v.Size != i {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ }
+ }
+
+ for i := uint32(10 * batch); i < 100*batch; i++ {
+ v, ok := m.Get(Key(i))
+ if i%37 == 0 {
+ if ok && v.Size > 0 {
+ t.Fatal("key", i, "should have been deleted needle value", v)
+ }
+ } else if i%2 == 0 {
+ if v == nil {
+ t.Fatal("key", i, "missing")
+ }
+ if v.Size != i {
+ t.Fatal("key", i, "size", v.Size)
+ }
+ }
+ }
+
+}
diff --git a/weed/storage/crc.go b/weed/storage/crc.go
new file mode 100644
index 000000000..494937784
--- /dev/null
+++ b/weed/storage/crc.go
@@ -0,0 +1,30 @@
+package storage
+
+import (
+ "fmt"
+ "github.com/klauspost/crc32"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var table = crc32.MakeTable(crc32.Castagnoli)
+
+type CRC uint32
+
+func NewCRC(b []byte) CRC {
+ return CRC(0).Update(b)
+}
+
+func (c CRC) Update(b []byte) CRC {
+ return CRC(crc32.Update(uint32(c), table, b))
+}
+
+func (c CRC) Value() uint32 {
+ return uint32(c>>15|c<<17) + 0xa282ead8
+}
+
+func (n *Needle) Etag() string {
+ bits := make([]byte, 4)
+ util.Uint32toBytes(bits, uint32(n.Checksum))
+ return fmt.Sprintf("\"%x\"", bits)
+}
diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go
new file mode 100644
index 000000000..cc3c83b63
--- /dev/null
+++ b/weed/storage/disk_location.go
@@ -0,0 +1,73 @@
+package storage
+
+import (
+ "io/ioutil"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type DiskLocation struct {
+ Directory string
+ MaxVolumeCount int
+ volumes map[VolumeId]*Volume
+}
+
+func NewDiskLocation(dir string, maxVolumeCount int) *DiskLocation {
+ location := &DiskLocation{Directory: dir, MaxVolumeCount: maxVolumeCount}
+ location.volumes = make(map[VolumeId]*Volume)
+ return location
+}
+
+func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapType) {
+
+ if dirs, err := ioutil.ReadDir(l.Directory); err == nil {
+ for _, dir := range dirs {
+ name := dir.Name()
+ if !dir.IsDir() && strings.HasSuffix(name, ".dat") {
+ collection := ""
+ base := name[:len(name)-len(".dat")]
+ i := strings.LastIndex(base, "_")
+ if i > 0 {
+ collection, base = base[0:i], base[i+1:]
+ }
+ if vid, err := NewVolumeId(base); err == nil {
+ if l.volumes[vid] == nil {
+ if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil); e == nil {
+ l.volumes[vid] = v
+ glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), v.Size(), v.Ttl.String())
+ } else {
+ glog.V(0).Infof("new volume %s error %s", name, e)
+ }
+ }
+ }
+ }
+ }
+ }
+ glog.V(0).Infoln("Store started on dir:", l.Directory, "with", len(l.volumes), "volumes", "max", l.MaxVolumeCount)
+}
+
+func (l *DiskLocation) DeleteCollectionFromDiskLocation(collection string) (e error) {
+ for k, v := range l.volumes {
+ if v.Collection == collection {
+ e = l.deleteVolumeById(k)
+ if e != nil {
+ return
+ }
+ }
+ }
+ return
+}
+
+func (l *DiskLocation) deleteVolumeById(vid VolumeId) (e error) {
+ v, ok := l.volumes[vid]
+ if !ok {
+ return
+ }
+ e = v.Destroy()
+ if e != nil {
+ return
+ }
+ delete(l.volumes, vid)
+ return
+}
diff --git a/weed/storage/file_id.go b/weed/storage/file_id.go
new file mode 100644
index 000000000..4cfdb16fa
--- /dev/null
+++ b/weed/storage/file_id.go
@@ -0,0 +1,43 @@
+package storage
+
+import (
+ "encoding/hex"
+ "errors"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type FileId struct {
+ VolumeId VolumeId
+ Key uint64
+ Hashcode uint32
+}
+
+func NewFileIdFromNeedle(VolumeId VolumeId, n *Needle) *FileId {
+ return &FileId{VolumeId: VolumeId, Key: n.Id, Hashcode: n.Cookie}
+}
+func NewFileId(VolumeId VolumeId, Key uint64, Hashcode uint32) *FileId {
+ return &FileId{VolumeId: VolumeId, Key: Key, Hashcode: Hashcode}
+}
+func ParseFileId(fid string) (*FileId, error) {
+ a := strings.Split(fid, ",")
+ if len(a) != 2 {
+ glog.V(1).Infoln("Invalid fid ", fid, ", split length ", len(a))
+ return nil, errors.New("Invalid fid " + fid)
+ }
+ vid_string, key_hash_string := a[0], a[1]
+ volumeId, _ := NewVolumeId(vid_string)
+ key, hash, e := ParseKeyHash(key_hash_string)
+ return &FileId{VolumeId: volumeId, Key: key, Hashcode: hash}, e
+}
+func (n *FileId) String() string {
+ bytes := make([]byte, 12)
+ util.Uint64toBytes(bytes[0:8], n.Key)
+ util.Uint32toBytes(bytes[8:12], n.Hashcode)
+ nonzero_index := 0
+ for ; bytes[nonzero_index] == 0; nonzero_index++ {
+ }
+ return n.VolumeId.String() + "," + hex.EncodeToString(bytes[nonzero_index:])
+}
diff --git a/weed/storage/needle.go b/weed/storage/needle.go
new file mode 100644
index 000000000..29549b323
--- /dev/null
+++ b/weed/storage/needle.go
@@ -0,0 +1,231 @@
+package storage
+
+import (
+ "fmt"
+ "io/ioutil"
+ "mime"
+ "net/http"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/images"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+)
+
+const (
+ NeedleHeaderSize = 16 //should never change this
+ NeedlePaddingSize = 8
+ NeedleChecksumSize = 4
+ MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8
+)
+
+/*
+* A Needle means a uploaded and stored file.
+* Needle file size is limited to 4GB for now.
+ */
+type Needle struct {
+ Cookie uint32 `comment:"random number to mitigate brute force lookups"`
+ Id uint64 `comment:"needle id"`
+ Size uint32 `comment:"sum of DataSize,Data,NameSize,Name,MimeSize,Mime"`
+
+ DataSize uint32 `comment:"Data size"` //version2
+ Data []byte `comment:"The actual file data"`
+ Flags byte `comment:"boolean flags"` //version2
+ NameSize uint8 //version2
+ Name []byte `comment:"maximum 256 characters"` //version2
+ MimeSize uint8 //version2
+ Mime []byte `comment:"maximum 256 characters"` //version2
+ LastModified uint64 //only store LastModifiedBytesLength bytes, which is 5 bytes to disk
+ Ttl *TTL
+
+ Checksum CRC `comment:"CRC32 to check integrity"`
+ Padding []byte `comment:"Aligned to 8 bytes"`
+
+ rawBlock *Block // underlying supporing []byte, fetched and released into a pool
+}
+
+func (n *Needle) String() (str string) {
+ str = fmt.Sprintf("Cookie:%d, Id:%d, Size:%d, DataSize:%d, Name: %s, Mime: %s", n.Cookie, n.Id, n.Size, n.DataSize, n.Name, n.Mime)
+ return
+}
+
+func ParseUpload(r *http.Request) (
+ fileName string, data []byte, mimeType string, isGzipped bool,
+ modifiedTime uint64, ttl *TTL, isChunkedFile bool, e error) {
+ form, fe := r.MultipartReader()
+ if fe != nil {
+ glog.V(0).Infoln("MultipartReader [ERROR]", fe)
+ e = fe
+ return
+ }
+
+ //first multi-part item
+ part, fe := form.NextPart()
+ if fe != nil {
+ glog.V(0).Infoln("Reading Multi part [ERROR]", fe)
+ e = fe
+ return
+ }
+
+ fileName = part.FileName()
+ if fileName != "" {
+ fileName = path.Base(fileName)
+ }
+
+ data, e = ioutil.ReadAll(part)
+ if e != nil {
+ glog.V(0).Infoln("Reading Content [ERROR]", e)
+ return
+ }
+
+ //if the filename is empty string, do a search on the other multi-part items
+ for fileName == "" {
+ part2, fe := form.NextPart()
+ if fe != nil {
+ break // no more or on error, just safely break
+ }
+
+ fName := part2.FileName()
+
+ //found the first <file type> multi-part has filename
+ if fName != "" {
+ data2, fe2 := ioutil.ReadAll(part2)
+ if fe2 != nil {
+ glog.V(0).Infoln("Reading Content [ERROR]", fe2)
+ e = fe2
+ return
+ }
+
+ //update
+ data = data2
+ fileName = path.Base(fName)
+ break
+ }
+ }
+
+ dotIndex := strings.LastIndex(fileName, ".")
+ ext, mtype := "", ""
+ if dotIndex > 0 {
+ ext = strings.ToLower(fileName[dotIndex:])
+ mtype = mime.TypeByExtension(ext)
+ }
+ contentType := part.Header.Get("Content-Type")
+ if contentType != "" && mtype != contentType {
+ mimeType = contentType //only return mime type if not deductable
+ mtype = contentType
+ }
+ if part.Header.Get("Content-Encoding") == "gzip" {
+ isGzipped = true
+ } else if operation.IsGzippable(ext, mtype) {
+ if data, e = operation.GzipData(data); e != nil {
+ return
+ }
+ isGzipped = true
+ }
+ if ext == ".gz" {
+ isGzipped = true
+ }
+ if strings.HasSuffix(fileName, ".gz") &&
+ !strings.HasSuffix(fileName, ".tar.gz") {
+ fileName = fileName[:len(fileName)-3]
+ }
+ modifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64)
+ ttl, _ = ReadTTL(r.FormValue("ttl"))
+ isChunkedFile, _ = strconv.ParseBool(r.FormValue("cm"))
+ return
+}
+func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) {
+ fname, mimeType, isGzipped, isChunkedFile := "", "", false, false
+ n = new(Needle)
+ fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, isChunkedFile, e = ParseUpload(r)
+ if e != nil {
+ return
+ }
+ if len(fname) < 256 {
+ n.Name = []byte(fname)
+ n.SetHasName()
+ }
+ if len(mimeType) < 256 {
+ n.Mime = []byte(mimeType)
+ n.SetHasMime()
+ }
+ if isGzipped {
+ n.SetGzipped()
+ }
+ if n.LastModified == 0 {
+ n.LastModified = uint64(time.Now().Unix())
+ }
+ n.SetHasLastModifiedDate()
+ if n.Ttl != EMPTY_TTL {
+ n.SetHasTtl()
+ }
+
+ if isChunkedFile {
+ n.SetIsChunkManifest()
+ }
+
+ if fixJpgOrientation {
+ loweredName := strings.ToLower(fname)
+ if mimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") {
+ n.Data = images.FixJpgOrientation(n.Data)
+ }
+ }
+
+ n.Checksum = NewCRC(n.Data)
+
+ commaSep := strings.LastIndex(r.URL.Path, ",")
+ dotSep := strings.LastIndex(r.URL.Path, ".")
+ fid := r.URL.Path[commaSep+1:]
+ if dotSep > 0 {
+ fid = r.URL.Path[commaSep+1 : dotSep]
+ }
+
+ e = n.ParsePath(fid)
+
+ return
+}
+func (n *Needle) ParsePath(fid string) (err error) {
+ length := len(fid)
+ if length <= 8 {
+ return fmt.Errorf("Invalid fid: %s", fid)
+ }
+ delta := ""
+ deltaIndex := strings.LastIndex(fid, "_")
+ if deltaIndex > 0 {
+ fid, delta = fid[0:deltaIndex], fid[deltaIndex+1:]
+ }
+ n.Id, n.Cookie, err = ParseKeyHash(fid)
+ if err != nil {
+ return err
+ }
+ if delta != "" {
+ if d, e := strconv.ParseUint(delta, 10, 64); e == nil {
+ n.Id += d
+ } else {
+ return e
+ }
+ }
+ return err
+}
+
+func ParseKeyHash(key_hash_string string) (uint64, uint32, error) {
+ if len(key_hash_string) <= 8 {
+ return 0, 0, fmt.Errorf("KeyHash is too short.")
+ }
+ if len(key_hash_string) > 24 {
+ return 0, 0, fmt.Errorf("KeyHash is too long.")
+ }
+ split := len(key_hash_string) - 8
+ key, err := strconv.ParseUint(key_hash_string[:split], 16, 64)
+ if err != nil {
+ return 0, 0, fmt.Errorf("Parse key error: %v", err)
+ }
+ hash, err := strconv.ParseUint(key_hash_string[split:], 16, 32)
+ if err != nil {
+ return 0, 0, fmt.Errorf("Parse hash error: %v", err)
+ }
+ return key, uint32(hash), nil
+}
diff --git a/weed/storage/needle_byte_cache.go b/weed/storage/needle_byte_cache.go
new file mode 100644
index 000000000..ae35a48ba
--- /dev/null
+++ b/weed/storage/needle_byte_cache.go
@@ -0,0 +1,75 @@
+package storage
+
+import (
+ "fmt"
+ "os"
+ "sync/atomic"
+
+ "github.com/hashicorp/golang-lru"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+var (
+ bytesCache *lru.Cache
+ bytesPool *util.BytesPool
+)
+
+/*
+There are one level of caching, and one level of pooling.
+
+In pooling, all []byte are fetched and returned to the pool bytesPool.
+
+In caching, the string~[]byte mapping is cached
+*/
+func init() {
+ bytesPool = util.NewBytesPool()
+ bytesCache, _ = lru.NewWithEvict(512, func(key interface{}, value interface{}) {
+ value.(*Block).decreaseReference()
+ })
+}
+
+type Block struct {
+ Bytes []byte
+ refCount int32
+}
+
+func (block *Block) decreaseReference() {
+ if atomic.AddInt32(&block.refCount, -1) == 0 {
+ bytesPool.Put(block.Bytes)
+ }
+}
+func (block *Block) increaseReference() {
+ atomic.AddInt32(&block.refCount, 1)
+}
+
+// get bytes from the LRU cache of []byte first, then from the bytes pool
+// when []byte in LRU cache is evicted, it will be put back to the bytes pool
+func getBytesForFileBlock(r *os.File, offset int64, readSize int) (dataSlice []byte, block *Block, err error) {
+ // check cache, return if found
+ cacheKey := fmt.Sprintf("%d:%d:%d", r.Fd(), offset>>3, readSize)
+ if obj, found := bytesCache.Get(cacheKey); found {
+ block = obj.(*Block)
+ block.increaseReference()
+ dataSlice = block.Bytes[0:readSize]
+ return dataSlice, block, nil
+ }
+
+ // get the []byte from pool
+ b := bytesPool.Get(readSize)
+ // refCount = 2, one by the bytesCache, one by the actual needle object
+ block = &Block{Bytes: b, refCount: 2}
+ dataSlice = block.Bytes[0:readSize]
+ _, err = r.ReadAt(dataSlice, offset)
+ bytesCache.Add(cacheKey, block)
+ return dataSlice, block, err
+}
+
+func (n *Needle) ReleaseMemory() {
+ if n.rawBlock != nil {
+ n.rawBlock.decreaseReference()
+ }
+}
+func ReleaseBytes(b []byte) {
+ bytesPool.Put(b)
+}
diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go
new file mode 100644
index 000000000..05bc6e86c
--- /dev/null
+++ b/weed/storage/needle_map.go
@@ -0,0 +1,123 @@
+package storage
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type NeedleMapType int
+
+const (
+ NeedleMapInMemory NeedleMapType = iota
+ NeedleMapLevelDb
+ NeedleMapBoltDb
+)
+
+type NeedleMapper interface {
+ Put(key uint64, offset uint32, size uint32) error
+ Get(key uint64) (element *NeedleValue, ok bool)
+ Delete(key uint64) error
+ Close()
+ Destroy() error
+ ContentSize() uint64
+ DeletedSize() uint64
+ FileCount() int
+ DeletedCount() int
+ MaxFileKey() uint64
+ IndexFileSize() uint64
+ IndexFileContent() ([]byte, error)
+ IndexFileName() string
+}
+
+type baseNeedleMapper struct {
+ indexFile *os.File
+ indexFileAccessLock sync.Mutex
+
+ mapMetric
+}
+
+func (nm *baseNeedleMapper) IndexFileSize() uint64 {
+ stat, err := nm.indexFile.Stat()
+ if err == nil {
+ return uint64(stat.Size())
+ }
+ return 0
+}
+
+func (nm *baseNeedleMapper) IndexFileName() string {
+ return nm.indexFile.Name()
+}
+
+func idxFileEntry(bytes []byte) (key uint64, offset uint32, size uint32) {
+ key = util.BytesToUint64(bytes[:8])
+ offset = util.BytesToUint32(bytes[8:12])
+ size = util.BytesToUint32(bytes[12:16])
+ return
+}
+func (nm *baseNeedleMapper) appendToIndexFile(key uint64, offset uint32, size uint32) error {
+ bytes := make([]byte, 16)
+ util.Uint64toBytes(bytes[0:8], key)
+ util.Uint32toBytes(bytes[8:12], offset)
+ util.Uint32toBytes(bytes[12:16], size)
+
+ nm.indexFileAccessLock.Lock()
+ defer nm.indexFileAccessLock.Unlock()
+ if _, err := nm.indexFile.Seek(0, 2); err != nil {
+ return fmt.Errorf("cannot seek end of indexfile %s: %v",
+ nm.indexFile.Name(), err)
+ }
+ _, err := nm.indexFile.Write(bytes)
+ return err
+}
+func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) {
+ nm.indexFileAccessLock.Lock()
+ defer nm.indexFileAccessLock.Unlock()
+ return ioutil.ReadFile(nm.indexFile.Name())
+}
+
+type mapMetric struct {
+ indexFile *os.File
+
+ DeletionCounter int `json:"DeletionCounter"`
+ FileCounter int `json:"FileCounter"`
+ DeletionByteCounter uint64 `json:"DeletionByteCounter"`
+ FileByteCounter uint64 `json:"FileByteCounter"`
+ MaximumFileKey uint64 `json:"MaxFileKey"`
+}
+
+func (mm *mapMetric) logDelete(deletedByteCount uint32) {
+ mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount)
+ mm.DeletionCounter++
+}
+
+func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) {
+ if key > mm.MaximumFileKey {
+ mm.MaximumFileKey = key
+ }
+ mm.FileCounter++
+ mm.FileByteCounter = mm.FileByteCounter + uint64(newSize)
+ if oldSize > 0 {
+ mm.DeletionCounter++
+ mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize)
+ }
+}
+
+func (mm mapMetric) ContentSize() uint64 {
+ return mm.FileByteCounter
+}
+func (mm mapMetric) DeletedSize() uint64 {
+ return mm.DeletionByteCounter
+}
+func (mm mapMetric) FileCount() int {
+ return mm.FileCounter
+}
+func (mm mapMetric) DeletedCount() int {
+ return mm.DeletionCounter
+}
+func (mm mapMetric) MaxFileKey() uint64 {
+ return mm.MaximumFileKey
+}
diff --git a/weed/storage/needle_map_boltdb.go b/weed/storage/needle_map_boltdb.go
new file mode 100644
index 000000000..bd3edf28d
--- /dev/null
+++ b/weed/storage/needle_map_boltdb.go
@@ -0,0 +1,165 @@
+package storage
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/boltdb/bolt"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type BoltDbNeedleMap struct {
+ dbFileName string
+ db *bolt.DB
+ baseNeedleMapper
+}
+
+var boltdbBucket = []byte("weed")
+
+func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleMap, err error) {
+ m = &BoltDbNeedleMap{dbFileName: dbFileName}
+ m.indexFile = indexFile
+ if !isBoltDbFresh(dbFileName, indexFile) {
+ glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name())
+ generateBoltDbFile(dbFileName, indexFile)
+ glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name())
+ }
+ glog.V(1).Infof("Opening %s...", dbFileName)
+ if m.db, err = bolt.Open(dbFileName, 0644, nil); err != nil {
+ return
+ }
+ glog.V(1).Infof("Loading %s...", indexFile.Name())
+ nm, indexLoadError := LoadNeedleMap(indexFile)
+ if indexLoadError != nil {
+ return nil, indexLoadError
+ }
+ m.mapMetric = nm.mapMetric
+ return
+}
+
+func isBoltDbFresh(dbFileName string, indexFile *os.File) bool {
+ // normally we always write to index file first
+ dbLogFile, err := os.Open(dbFileName)
+ if err != nil {
+ return false
+ }
+ defer dbLogFile.Close()
+ dbStat, dbStatErr := dbLogFile.Stat()
+ indexStat, indexStatErr := indexFile.Stat()
+ if dbStatErr != nil || indexStatErr != nil {
+ glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr)
+ return false
+ }
+
+ return dbStat.ModTime().After(indexStat.ModTime())
+}
+
+func generateBoltDbFile(dbFileName string, indexFile *os.File) error {
+ db, err := bolt.Open(dbFileName, 0644, nil)
+ if err != nil {
+ return err
+ }
+ defer db.Close()
+ return WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error {
+ if offset > 0 {
+ boltDbWrite(db, key, offset, size)
+ } else {
+ boltDbDelete(db, key)
+ }
+ return nil
+ })
+}
+
+func (m *BoltDbNeedleMap) Get(key uint64) (element *NeedleValue, ok bool) {
+ bytes := make([]byte, 8)
+ var data []byte
+ util.Uint64toBytes(bytes, key)
+ err := m.db.View(func(tx *bolt.Tx) error {
+ bucket := tx.Bucket(boltdbBucket)
+ if bucket == nil {
+ return fmt.Errorf("Bucket %q not found!", boltdbBucket)
+ }
+
+ data = bucket.Get(bytes)
+ return nil
+ })
+
+ if err != nil || len(data) != 8 {
+ return nil, false
+ }
+ offset := util.BytesToUint32(data[0:4])
+ size := util.BytesToUint32(data[4:8])
+ return &NeedleValue{Key: Key(key), Offset: offset, Size: size}, true
+}
+
+func (m *BoltDbNeedleMap) Put(key uint64, offset uint32, size uint32) error {
+ var oldSize uint32
+ if oldNeedle, ok := m.Get(key); ok {
+ oldSize = oldNeedle.Size
+ }
+ m.logPut(key, oldSize, size)
+ // write to index file first
+ if err := m.appendToIndexFile(key, offset, size); err != nil {
+ return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err)
+ }
+ return boltDbWrite(m.db, key, offset, size)
+}
+
+func boltDbWrite(db *bolt.DB,
+ key uint64, offset uint32, size uint32) error {
+ bytes := make([]byte, 16)
+ util.Uint64toBytes(bytes[0:8], key)
+ util.Uint32toBytes(bytes[8:12], offset)
+ util.Uint32toBytes(bytes[12:16], size)
+ return db.Update(func(tx *bolt.Tx) error {
+ bucket, err := tx.CreateBucketIfNotExists(boltdbBucket)
+ if err != nil {
+ return err
+ }
+
+ err = bucket.Put(bytes[0:8], bytes[8:16])
+ if err != nil {
+ return err
+ }
+ return nil
+ })
+}
+func boltDbDelete(db *bolt.DB, key uint64) error {
+ bytes := make([]byte, 8)
+ util.Uint64toBytes(bytes, key)
+ return db.Update(func(tx *bolt.Tx) error {
+ bucket, err := tx.CreateBucketIfNotExists(boltdbBucket)
+ if err != nil {
+ return err
+ }
+
+ err = bucket.Delete(bytes)
+ if err != nil {
+ return err
+ }
+ return nil
+ })
+}
+
+func (m *BoltDbNeedleMap) Delete(key uint64) error {
+ if oldNeedle, ok := m.Get(key); ok {
+ m.logDelete(oldNeedle.Size)
+ }
+ // write to index file first
+ if err := m.appendToIndexFile(key, 0, 0); err != nil {
+ return err
+ }
+ return boltDbDelete(m.db, key)
+}
+
+func (m *BoltDbNeedleMap) Close() {
+ m.db.Close()
+}
+
+func (m *BoltDbNeedleMap) Destroy() error {
+ m.Close()
+ os.Remove(m.indexFile.Name())
+ return os.Remove(m.dbFileName)
+}
diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go
new file mode 100644
index 000000000..1789dbb12
--- /dev/null
+++ b/weed/storage/needle_map_leveldb.go
@@ -0,0 +1,134 @@
+package storage
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/syndtr/goleveldb/leveldb"
+)
+
+type LevelDbNeedleMap struct {
+ dbFileName string
+ db *leveldb.DB
+ baseNeedleMapper
+}
+
+func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedleMap, err error) {
+ m = &LevelDbNeedleMap{dbFileName: dbFileName}
+ m.indexFile = indexFile
+ if !isLevelDbFresh(dbFileName, indexFile) {
+ glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name())
+ generateLevelDbFile(dbFileName, indexFile)
+ glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name())
+ }
+ glog.V(1).Infof("Opening %s...", dbFileName)
+ if m.db, err = leveldb.OpenFile(dbFileName, nil); err != nil {
+ return
+ }
+ glog.V(1).Infof("Loading %s...", indexFile.Name())
+ nm, indexLoadError := LoadNeedleMap(indexFile)
+ if indexLoadError != nil {
+ return nil, indexLoadError
+ }
+ m.mapMetric = nm.mapMetric
+ return
+}
+
+func isLevelDbFresh(dbFileName string, indexFile *os.File) bool {
+ // normally we always write to index file first
+ dbLogFile, err := os.Open(filepath.Join(dbFileName, "LOG"))
+ if err != nil {
+ return false
+ }
+ defer dbLogFile.Close()
+ dbStat, dbStatErr := dbLogFile.Stat()
+ indexStat, indexStatErr := indexFile.Stat()
+ if dbStatErr != nil || indexStatErr != nil {
+ glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr)
+ return false
+ }
+
+ return dbStat.ModTime().After(indexStat.ModTime())
+}
+
+func generateLevelDbFile(dbFileName string, indexFile *os.File) error {
+ db, err := leveldb.OpenFile(dbFileName, nil)
+ if err != nil {
+ return err
+ }
+ defer db.Close()
+ return WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error {
+ if offset > 0 {
+ levelDbWrite(db, key, offset, size)
+ } else {
+ levelDbDelete(db, key)
+ }
+ return nil
+ })
+}
+
+func (m *LevelDbNeedleMap) Get(key uint64) (element *NeedleValue, ok bool) {
+ bytes := make([]byte, 8)
+ util.Uint64toBytes(bytes, key)
+ data, err := m.db.Get(bytes, nil)
+ if err != nil || len(data) != 8 {
+ return nil, false
+ }
+ offset := util.BytesToUint32(data[0:4])
+ size := util.BytesToUint32(data[4:8])
+ return &NeedleValue{Key: Key(key), Offset: offset, Size: size}, true
+}
+
+func (m *LevelDbNeedleMap) Put(key uint64, offset uint32, size uint32) error {
+ var oldSize uint32
+ if oldNeedle, ok := m.Get(key); ok {
+ oldSize = oldNeedle.Size
+ }
+ m.logPut(key, oldSize, size)
+ // write to index file first
+ if err := m.appendToIndexFile(key, offset, size); err != nil {
+ return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err)
+ }
+ return levelDbWrite(m.db, key, offset, size)
+}
+
+func levelDbWrite(db *leveldb.DB,
+ key uint64, offset uint32, size uint32) error {
+ bytes := make([]byte, 16)
+ util.Uint64toBytes(bytes[0:8], key)
+ util.Uint32toBytes(bytes[8:12], offset)
+ util.Uint32toBytes(bytes[12:16], size)
+ if err := db.Put(bytes[0:8], bytes[8:16], nil); err != nil {
+ return fmt.Errorf("failed to write leveldb: %v", err)
+ }
+ return nil
+}
+func levelDbDelete(db *leveldb.DB, key uint64) error {
+ bytes := make([]byte, 8)
+ util.Uint64toBytes(bytes, key)
+ return db.Delete(bytes, nil)
+}
+
+func (m *LevelDbNeedleMap) Delete(key uint64) error {
+ if oldNeedle, ok := m.Get(key); ok {
+ m.logDelete(oldNeedle.Size)
+ }
+ // write to index file first
+ if err := m.appendToIndexFile(key, 0, 0); err != nil {
+ return err
+ }
+ return levelDbDelete(m.db, key)
+}
+
+func (m *LevelDbNeedleMap) Close() {
+ m.db.Close()
+}
+
+func (m *LevelDbNeedleMap) Destroy() error {
+ m.Close()
+ os.Remove(m.indexFile.Name())
+ return os.Remove(m.dbFileName)
+}
diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go
new file mode 100644
index 000000000..f2f4835df
--- /dev/null
+++ b/weed/storage/needle_map_memory.go
@@ -0,0 +1,106 @@
+package storage
+
+import (
+ "io"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type NeedleMap struct {
+ m CompactMap
+
+ baseNeedleMapper
+}
+
+func NewNeedleMap(file *os.File) *NeedleMap {
+ nm := &NeedleMap{
+ m: NewCompactMap(),
+ }
+ nm.indexFile = file
+ return nm
+}
+
+const (
+ RowsToRead = 1024
+)
+
+func LoadNeedleMap(file *os.File) (*NeedleMap, error) {
+ nm := NewNeedleMap(file)
+ e := WalkIndexFile(file, func(key uint64, offset, size uint32) error {
+ if key > nm.MaximumFileKey {
+ nm.MaximumFileKey = key
+ }
+ nm.FileCounter++
+ nm.FileByteCounter = nm.FileByteCounter + uint64(size)
+ if offset > 0 {
+ oldSize := nm.m.Set(Key(key), offset, size)
+ glog.V(3).Infoln("reading key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize)
+ if oldSize > 0 {
+ nm.DeletionCounter++
+ nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize)
+ }
+ } else {
+ oldSize := nm.m.Delete(Key(key))
+ glog.V(3).Infoln("removing key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize)
+ nm.DeletionCounter++
+ nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize)
+ }
+ return nil
+ })
+ glog.V(1).Infoln("max file key:", nm.MaximumFileKey)
+ return nm, e
+}
+
+// walks through the index file, calls fn function with each key, offset, size
+// stops with the error returned by the fn function
+func WalkIndexFile(r *os.File, fn func(key uint64, offset, size uint32) error) error {
+ var readerOffset int64
+ bytes := make([]byte, 16*RowsToRead)
+ count, e := r.ReadAt(bytes, readerOffset)
+ glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e)
+ readerOffset += int64(count)
+ var (
+ key uint64
+ offset, size uint32
+ i int
+ )
+
+ for count > 0 && e == nil || e == io.EOF {
+ for i = 0; i+16 <= count; i += 16 {
+ key, offset, size = idxFileEntry(bytes[i : i+16])
+ if e = fn(key, offset, size); e != nil {
+ return e
+ }
+ }
+ if e == io.EOF {
+ return nil
+ }
+ count, e = r.ReadAt(bytes, readerOffset)
+ glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e)
+ readerOffset += int64(count)
+ }
+ return e
+}
+
+func (nm *NeedleMap) Put(key uint64, offset uint32, size uint32) error {
+ oldSize := nm.m.Set(Key(key), offset, size)
+ nm.logPut(key, oldSize, size)
+ return nm.appendToIndexFile(key, offset, size)
+}
+func (nm *NeedleMap) Get(key uint64) (element *NeedleValue, ok bool) {
+ element, ok = nm.m.Get(Key(key))
+ return
+}
+func (nm *NeedleMap) Delete(key uint64) error {
+ deletedBytes := nm.m.Delete(Key(key))
+ nm.logDelete(deletedBytes)
+ return nm.appendToIndexFile(key, 0, 0)
+}
+func (nm *NeedleMap) Close() {
+ _ = nm.indexFile.Close()
+}
+func (nm *NeedleMap) Destroy() error {
+ nm.Close()
+ return os.Remove(nm.indexFile.Name())
+}
diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go
new file mode 100644
index 000000000..2f26147d6
--- /dev/null
+++ b/weed/storage/needle_read_write.go
@@ -0,0 +1,291 @@
+package storage
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+const (
+ FlagGzip = 0x01
+ FlagHasName = 0x02
+ FlagHasMime = 0x04
+ FlagHasLastModifiedDate = 0x08
+ FlagHasTtl = 0x10
+ FlagIsChunkManifest = 0x80
+ LastModifiedBytesLength = 5
+ TtlBytesLength = 2
+)
+
+func (n *Needle) DiskSize() int64 {
+ padding := NeedlePaddingSize - ((NeedleHeaderSize + int64(n.Size) + NeedleChecksumSize) % NeedlePaddingSize)
+ return NeedleHeaderSize + int64(n.Size) + padding + NeedleChecksumSize
+}
+func (n *Needle) Append(w io.Writer, version Version) (size uint32, err error) {
+ if s, ok := w.(io.Seeker); ok {
+ if end, e := s.Seek(0, 1); e == nil {
+ defer func(s io.Seeker, off int64) {
+ if err != nil {
+ if _, e = s.Seek(off, 0); e != nil {
+ glog.V(0).Infof("Failed to seek %s back to %d with error: %v", w, off, e)
+ }
+ }
+ }(s, end)
+ } else {
+ err = fmt.Errorf("Cannot Read Current Volume Position: %v", e)
+ return
+ }
+ }
+ switch version {
+ case Version1:
+ header := make([]byte, NeedleHeaderSize)
+ util.Uint32toBytes(header[0:4], n.Cookie)
+ util.Uint64toBytes(header[4:12], n.Id)
+ n.Size = uint32(len(n.Data))
+ size = n.Size
+ util.Uint32toBytes(header[12:16], n.Size)
+ if _, err = w.Write(header); err != nil {
+ return
+ }
+ if _, err = w.Write(n.Data); err != nil {
+ return
+ }
+ padding := NeedlePaddingSize - ((NeedleHeaderSize + n.Size + NeedleChecksumSize) % NeedlePaddingSize)
+ util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value())
+ _, err = w.Write(header[0 : NeedleChecksumSize+padding])
+ return
+ case Version2:
+ header := make([]byte, NeedleHeaderSize)
+ util.Uint32toBytes(header[0:4], n.Cookie)
+ util.Uint64toBytes(header[4:12], n.Id)
+ n.DataSize, n.NameSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Name)), uint8(len(n.Mime))
+ if n.DataSize > 0 {
+ n.Size = 4 + n.DataSize + 1
+ if n.HasName() {
+ n.Size = n.Size + 1 + uint32(n.NameSize)
+ }
+ if n.HasMime() {
+ n.Size = n.Size + 1 + uint32(n.MimeSize)
+ }
+ if n.HasLastModifiedDate() {
+ n.Size = n.Size + LastModifiedBytesLength
+ }
+ if n.HasTtl() {
+ n.Size = n.Size + TtlBytesLength
+ }
+ } else {
+ n.Size = 0
+ }
+ size = n.DataSize
+ util.Uint32toBytes(header[12:16], n.Size)
+ if _, err = w.Write(header); err != nil {
+ return
+ }
+ if n.DataSize > 0 {
+ util.Uint32toBytes(header[0:4], n.DataSize)
+ if _, err = w.Write(header[0:4]); err != nil {
+ return
+ }
+ if _, err = w.Write(n.Data); err != nil {
+ return
+ }
+ util.Uint8toBytes(header[0:1], n.Flags)
+ if _, err = w.Write(header[0:1]); err != nil {
+ return
+ }
+ if n.HasName() {
+ util.Uint8toBytes(header[0:1], n.NameSize)
+ if _, err = w.Write(header[0:1]); err != nil {
+ return
+ }
+ if _, err = w.Write(n.Name); err != nil {
+ return
+ }
+ }
+ if n.HasMime() {
+ util.Uint8toBytes(header[0:1], n.MimeSize)
+ if _, err = w.Write(header[0:1]); err != nil {
+ return
+ }
+ if _, err = w.Write(n.Mime); err != nil {
+ return
+ }
+ }
+ if n.HasLastModifiedDate() {
+ util.Uint64toBytes(header[0:8], n.LastModified)
+ if _, err = w.Write(header[8-LastModifiedBytesLength : 8]); err != nil {
+ return
+ }
+ }
+ if n.HasTtl() && n.Ttl != nil {
+ n.Ttl.ToBytes(header[0:TtlBytesLength])
+ if _, err = w.Write(header[0:TtlBytesLength]); err != nil {
+ return
+ }
+ }
+ }
+ padding := NeedlePaddingSize - ((NeedleHeaderSize + n.Size + NeedleChecksumSize) % NeedlePaddingSize)
+ util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value())
+ _, err = w.Write(header[0 : NeedleChecksumSize+padding])
+ return n.DataSize, err
+ }
+ return 0, fmt.Errorf("Unsupported Version! (%d)", version)
+}
+
+func ReadNeedleBlob(r *os.File, offset int64, size uint32) (dataSlice []byte, block *Block, err error) {
+ padding := NeedlePaddingSize - ((NeedleHeaderSize + size + NeedleChecksumSize) % NeedlePaddingSize)
+ readSize := NeedleHeaderSize + size + NeedleChecksumSize + padding
+ return getBytesForFileBlock(r, offset, int(readSize))
+}
+
+func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version) (err error) {
+ bytes, block, err := ReadNeedleBlob(r, offset, size)
+ if err != nil {
+ return err
+ }
+ n.rawBlock = block
+ n.ParseNeedleHeader(bytes)
+ if n.Size != size {
+ return fmt.Errorf("File Entry Not Found. Needle %d Memory %d", n.Size, size)
+ }
+ switch version {
+ case Version1:
+ n.Data = bytes[NeedleHeaderSize : NeedleHeaderSize+size]
+ case Version2:
+ n.readNeedleDataVersion2(bytes[NeedleHeaderSize : NeedleHeaderSize+int(n.Size)])
+ }
+ checksum := util.BytesToUint32(bytes[NeedleHeaderSize+size : NeedleHeaderSize+size+NeedleChecksumSize])
+ newChecksum := NewCRC(n.Data)
+ if checksum != newChecksum.Value() {
+ return errors.New("CRC error! Data On Disk Corrupted")
+ }
+ n.Checksum = newChecksum
+ return nil
+}
+func (n *Needle) ParseNeedleHeader(bytes []byte) {
+ n.Cookie = util.BytesToUint32(bytes[0:4])
+ n.Id = util.BytesToUint64(bytes[4:12])
+ n.Size = util.BytesToUint32(bytes[12:NeedleHeaderSize])
+}
+func (n *Needle) readNeedleDataVersion2(bytes []byte) {
+ index, lenBytes := 0, len(bytes)
+ if index < lenBytes {
+ n.DataSize = util.BytesToUint32(bytes[index : index+4])
+ index = index + 4
+ if int(n.DataSize)+index > lenBytes {
+ // this if clause is due to bug #87 and #93, fixed in v0.69
+ // remove this clause later
+ return
+ }
+ n.Data = bytes[index : index+int(n.DataSize)]
+ index = index + int(n.DataSize)
+ n.Flags = bytes[index]
+ index = index + 1
+ }
+ if index < lenBytes && n.HasName() {
+ n.NameSize = uint8(bytes[index])
+ index = index + 1
+ n.Name = bytes[index : index+int(n.NameSize)]
+ index = index + int(n.NameSize)
+ }
+ if index < lenBytes && n.HasMime() {
+ n.MimeSize = uint8(bytes[index])
+ index = index + 1
+ n.Mime = bytes[index : index+int(n.MimeSize)]
+ index = index + int(n.MimeSize)
+ }
+ if index < lenBytes && n.HasLastModifiedDate() {
+ n.LastModified = util.BytesToUint64(bytes[index : index+LastModifiedBytesLength])
+ index = index + LastModifiedBytesLength
+ }
+ if index < lenBytes && n.HasTtl() {
+ n.Ttl = LoadTTLFromBytes(bytes[index : index+TtlBytesLength])
+ index = index + TtlBytesLength
+ }
+}
+
+func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bodyLength uint32, err error) {
+ n = new(Needle)
+ if version == Version1 || version == Version2 {
+ bytes := make([]byte, NeedleHeaderSize)
+ var count int
+ count, err = r.ReadAt(bytes, offset)
+ if count <= 0 || err != nil {
+ return nil, 0, err
+ }
+ n.ParseNeedleHeader(bytes)
+ padding := NeedlePaddingSize - ((n.Size + NeedleHeaderSize + NeedleChecksumSize) % NeedlePaddingSize)
+ bodyLength = n.Size + NeedleChecksumSize + padding
+ }
+ return
+}
+
+//n should be a needle already read the header
+//the input stream will read until next file entry
+func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (err error) {
+ if bodyLength <= 0 {
+ return nil
+ }
+ switch version {
+ case Version1:
+ bytes := make([]byte, bodyLength)
+ if _, err = r.ReadAt(bytes, offset); err != nil {
+ return
+ }
+ n.Data = bytes[:n.Size]
+ n.Checksum = NewCRC(n.Data)
+ case Version2:
+ bytes := make([]byte, bodyLength)
+ if _, err = r.ReadAt(bytes, offset); err != nil {
+ return
+ }
+ n.readNeedleDataVersion2(bytes[0:n.Size])
+ n.Checksum = NewCRC(n.Data)
+ default:
+ err = fmt.Errorf("Unsupported Version! (%d)", version)
+ }
+ return
+}
+
+func (n *Needle) IsGzipped() bool {
+ return n.Flags&FlagGzip > 0
+}
+func (n *Needle) SetGzipped() {
+ n.Flags = n.Flags | FlagGzip
+}
+func (n *Needle) HasName() bool {
+ return n.Flags&FlagHasName > 0
+}
+func (n *Needle) SetHasName() {
+ n.Flags = n.Flags | FlagHasName
+}
+func (n *Needle) HasMime() bool {
+ return n.Flags&FlagHasMime > 0
+}
+func (n *Needle) SetHasMime() {
+ n.Flags = n.Flags | FlagHasMime
+}
+func (n *Needle) HasLastModifiedDate() bool {
+ return n.Flags&FlagHasLastModifiedDate > 0
+}
+func (n *Needle) SetHasLastModifiedDate() {
+ n.Flags = n.Flags | FlagHasLastModifiedDate
+}
+func (n *Needle) HasTtl() bool {
+ return n.Flags&FlagHasTtl > 0
+}
+func (n *Needle) SetHasTtl() {
+ n.Flags = n.Flags | FlagHasTtl
+}
+
+func (n *Needle) IsChunkedManifest() bool {
+ return n.Flags&FlagIsChunkManifest > 0
+}
+
+func (n *Needle) SetIsChunkManifest() {
+ n.Flags = n.Flags | FlagIsChunkManifest
+}
diff --git a/weed/storage/needle_test.go b/weed/storage/needle_test.go
new file mode 100644
index 000000000..c05afda2f
--- /dev/null
+++ b/weed/storage/needle_test.go
@@ -0,0 +1,45 @@
+package storage
+
+import "testing"
+
+func TestParseKeyHash(t *testing.T) {
+ testcases := []struct {
+ KeyHash string
+ ID uint64
+ Cookie uint32
+ Err bool
+ }{
+ // normal
+ {"4ed4c8116e41", 0x4ed4, 0xc8116e41, false},
+ // cookie with leading zeros
+ {"4ed401116e41", 0x4ed4, 0x01116e41, false},
+ // odd length
+ {"ed400116e41", 0xed4, 0x00116e41, false},
+ // uint
+ {"fed4c8114ed4c811f0116e41", 0xfed4c8114ed4c811, 0xf0116e41, false},
+ // err: too short
+ {"4ed4c811", 0, 0, true},
+ // err: too long
+ {"4ed4c8114ed4c8114ed4c8111", 0, 0, true},
+ // err: invalid character
+ {"helloworld", 0, 0, true},
+ }
+
+ for _, tc := range testcases {
+ if id, cookie, err := ParseKeyHash(tc.KeyHash); err != nil && !tc.Err {
+ t.Fatalf("Parse %s error: %v", tc.KeyHash, err)
+ } else if err == nil && tc.Err {
+ t.Fatalf("Parse %s expected error got nil", tc.KeyHash)
+ } else if id != tc.ID || cookie != tc.Cookie {
+ t.Fatalf("Parse %s wrong result. Expected: (%d, %d) got: (%d, %d)", tc.KeyHash, tc.ID, tc.Cookie, id, cookie)
+ }
+ }
+}
+
+func BenchmarkParseKeyHash(b *testing.B) {
+ b.ReportAllocs()
+
+ for i := 0; i < b.N; i++ {
+ ParseKeyHash("4ed44ed44ed44ed4c8116e41")
+ }
+}
diff --git a/weed/storage/replica_placement.go b/weed/storage/replica_placement.go
new file mode 100644
index 000000000..c1aca52eb
--- /dev/null
+++ b/weed/storage/replica_placement.go
@@ -0,0 +1,53 @@
+package storage
+
+import (
+ "errors"
+ "fmt"
+)
+
+type ReplicaPlacement struct {
+ SameRackCount int
+ DiffRackCount int
+ DiffDataCenterCount int
+}
+
+func NewReplicaPlacementFromString(t string) (*ReplicaPlacement, error) {
+ rp := &ReplicaPlacement{}
+ for i, c := range t {
+ count := int(c - '0')
+ if 0 <= count && count <= 2 {
+ switch i {
+ case 0:
+ rp.DiffDataCenterCount = count
+ case 1:
+ rp.DiffRackCount = count
+ case 2:
+ rp.SameRackCount = count
+ }
+ } else {
+ return rp, errors.New("Unknown Replication Type:" + t)
+ }
+ }
+ return rp, nil
+}
+
+func NewReplicaPlacementFromByte(b byte) (*ReplicaPlacement, error) {
+ return NewReplicaPlacementFromString(fmt.Sprintf("%03d", b))
+}
+
+func (rp *ReplicaPlacement) Byte() byte {
+ ret := rp.DiffDataCenterCount*100 + rp.DiffRackCount*10 + rp.SameRackCount
+ return byte(ret)
+}
+
+func (rp *ReplicaPlacement) String() string {
+ b := make([]byte, 3)
+ b[0] = byte(rp.DiffDataCenterCount + '0')
+ b[1] = byte(rp.DiffRackCount + '0')
+ b[2] = byte(rp.SameRackCount + '0')
+ return string(b)
+}
+
+func (rp *ReplicaPlacement) GetCopyCount() int {
+ return rp.DiffDataCenterCount + rp.DiffRackCount + rp.SameRackCount + 1
+}
diff --git a/weed/storage/replica_placement_test.go b/weed/storage/replica_placement_test.go
new file mode 100644
index 000000000..9c2161e94
--- /dev/null
+++ b/weed/storage/replica_placement_test.go
@@ -0,0 +1,14 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestReplicaPlacemnetSerialDeserial(t *testing.T) {
+ rp, _ := NewReplicaPlacementFromString("001")
+ new_rp, _ := NewReplicaPlacementFromByte(rp.Byte())
+ if rp.String() != new_rp.String() {
+ println("expected:", rp.String(), "actual:", new_rp.String())
+ t.Fail()
+ }
+}
diff --git a/weed/storage/store.go b/weed/storage/store.go
new file mode 100644
index 000000000..d44d6a863
--- /dev/null
+++ b/weed/storage/store.go
@@ -0,0 +1,340 @@
+package storage
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "math/rand"
+ "strconv"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/util"
+ "github.com/golang/protobuf/proto"
+)
+
+const (
+ MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes
+)
+
+type MasterNodes struct {
+ nodes []string
+ lastNode int
+}
+
+func (mn *MasterNodes) String() string {
+ return fmt.Sprintf("nodes:%v, lastNode:%d", mn.nodes, mn.lastNode)
+}
+
+func NewMasterNodes(bootstrapNode string) (mn *MasterNodes) {
+ mn = &MasterNodes{nodes: []string{bootstrapNode}, lastNode: -1}
+ return
+}
+func (mn *MasterNodes) reset() {
+ glog.V(4).Infof("Resetting master nodes: %v", mn)
+ if len(mn.nodes) > 1 && mn.lastNode >= 0 {
+ glog.V(0).Infof("Reset master %s from: %v", mn.nodes[mn.lastNode], mn.nodes)
+ mn.lastNode = -mn.lastNode - 1
+ }
+}
+func (mn *MasterNodes) findMaster() (string, error) {
+ if len(mn.nodes) == 0 {
+ return "", errors.New("No master node found!")
+ }
+ if mn.lastNode < 0 {
+ for _, m := range mn.nodes {
+ glog.V(4).Infof("Listing masters on %s", m)
+ if masters, e := operation.ListMasters(m); e == nil {
+ if len(masters) == 0 {
+ continue
+ }
+ mn.nodes = append(masters, m)
+ mn.lastNode = rand.Intn(len(mn.nodes))
+ glog.V(2).Infof("current master nodes is %v", mn)
+ break
+ } else {
+ glog.V(4).Infof("Failed listing masters on %s: %v", m, e)
+ }
+ }
+ }
+ if mn.lastNode < 0 {
+ return "", errors.New("No master node available!")
+ }
+ return mn.nodes[mn.lastNode], nil
+}
+
+/*
+ * A VolumeServer contains one Store
+ */
+type Store struct {
+ Ip string
+ Port int
+ PublicUrl string
+ Locations []*DiskLocation
+ dataCenter string //optional informaton, overwriting master setting if exists
+ rack string //optional information, overwriting master setting if exists
+ connected bool
+ volumeSizeLimit uint64 //read from the master
+ masterNodes *MasterNodes
+}
+
+func (s *Store) String() (str string) {
+ str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d, masterNodes:%s", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.volumeSizeLimit, s.masterNodes)
+ return
+}
+
+func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) {
+ s = &Store{Port: port, Ip: ip, PublicUrl: publicUrl}
+ s.Locations = make([]*DiskLocation, 0)
+ for i := 0; i < len(dirnames); i++ {
+ location := NewDiskLocation(dirnames[i], maxVolumeCounts[i])
+ location.loadExistingVolumes(needleMapKind)
+ s.Locations = append(s.Locations, location)
+ }
+ return
+}
+func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string) error {
+ rt, e := NewReplicaPlacementFromString(replicaPlacement)
+ if e != nil {
+ return e
+ }
+ ttl, e := ReadTTL(ttlString)
+ if e != nil {
+ return e
+ }
+ for _, range_string := range strings.Split(volumeListString, ",") {
+ if strings.Index(range_string, "-") < 0 {
+ id_string := range_string
+ id, err := NewVolumeId(id_string)
+ if err != nil {
+ return fmt.Errorf("Volume Id %s is not a valid unsigned integer!", id_string)
+ }
+ e = s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl)
+ } else {
+ pair := strings.Split(range_string, "-")
+ start, start_err := strconv.ParseUint(pair[0], 10, 64)
+ if start_err != nil {
+ return fmt.Errorf("Volume Start Id %s is not a valid unsigned integer!", pair[0])
+ }
+ end, end_err := strconv.ParseUint(pair[1], 10, 64)
+ if end_err != nil {
+ return fmt.Errorf("Volume End Id %s is not a valid unsigned integer!", pair[1])
+ }
+ for id := start; id <= end; id++ {
+ if err := s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl); err != nil {
+ e = err
+ }
+ }
+ }
+ }
+ return e
+}
+func (s *Store) DeleteCollection(collection string) (e error) {
+ for _, location := range s.Locations {
+ e = location.DeleteCollectionFromDiskLocation(collection)
+ if e != nil {
+ return
+ }
+ }
+ return
+}
+
+func (s *Store) findVolume(vid VolumeId) *Volume {
+ for _, location := range s.Locations {
+ if v, found := location.volumes[vid]; found {
+ return v
+ }
+ }
+ return nil
+}
+func (s *Store) findFreeLocation() (ret *DiskLocation) {
+ max := 0
+ for _, location := range s.Locations {
+ currentFreeCount := location.MaxVolumeCount - len(location.volumes)
+ if currentFreeCount > max {
+ max = currentFreeCount
+ ret = location
+ }
+ }
+ return ret
+}
+func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) error {
+ if s.findVolume(vid) != nil {
+ return fmt.Errorf("Volume Id %d already exists!", vid)
+ }
+ if location := s.findFreeLocation(); location != nil {
+ glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v",
+ location.Directory, vid, collection, replicaPlacement, ttl)
+ if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl); err == nil {
+ location.volumes[vid] = volume
+ return nil
+ } else {
+ return err
+ }
+ }
+ return fmt.Errorf("No more free space left")
+}
+
+func (s *Store) Status() []*VolumeInfo {
+ var stats []*VolumeInfo
+ for _, location := range s.Locations {
+ for k, v := range location.volumes {
+ s := &VolumeInfo{
+ Id: VolumeId(k),
+ Size: v.ContentSize(),
+ Collection: v.Collection,
+ ReplicaPlacement: v.ReplicaPlacement,
+ Version: v.Version(),
+ FileCount: v.nm.FileCount(),
+ DeleteCount: v.nm.DeletedCount(),
+ DeletedByteCount: v.nm.DeletedSize(),
+ ReadOnly: v.readOnly,
+ Ttl: v.Ttl}
+ stats = append(stats, s)
+ }
+ }
+ sortVolumeInfos(stats)
+ return stats
+}
+
+func (s *Store) SetDataCenter(dataCenter string) {
+ s.dataCenter = dataCenter
+}
+func (s *Store) SetRack(rack string) {
+ s.rack = rack
+}
+
+func (s *Store) SetBootstrapMaster(bootstrapMaster string) {
+ s.masterNodes = NewMasterNodes(bootstrapMaster)
+}
+func (s *Store) SendHeartbeatToMaster() (masterNode string, secretKey security.Secret, e error) {
+ masterNode, e = s.masterNodes.findMaster()
+ if e != nil {
+ return
+ }
+ var volumeMessages []*operation.VolumeInformationMessage
+ maxVolumeCount := 0
+ var maxFileKey uint64
+ for _, location := range s.Locations {
+ maxVolumeCount = maxVolumeCount + location.MaxVolumeCount
+ for k, v := range location.volumes {
+ if maxFileKey < v.nm.MaxFileKey() {
+ maxFileKey = v.nm.MaxFileKey()
+ }
+ if !v.expired(s.volumeSizeLimit) {
+ volumeMessage := &operation.VolumeInformationMessage{
+ Id: proto.Uint32(uint32(k)),
+ Size: proto.Uint64(uint64(v.Size())),
+ Collection: proto.String(v.Collection),
+ FileCount: proto.Uint64(uint64(v.nm.FileCount())),
+ DeleteCount: proto.Uint64(uint64(v.nm.DeletedCount())),
+ DeletedByteCount: proto.Uint64(v.nm.DeletedSize()),
+ ReadOnly: proto.Bool(v.readOnly),
+ ReplicaPlacement: proto.Uint32(uint32(v.ReplicaPlacement.Byte())),
+ Version: proto.Uint32(uint32(v.Version())),
+ Ttl: proto.Uint32(v.Ttl.ToUint32()),
+ }
+ volumeMessages = append(volumeMessages, volumeMessage)
+ } else {
+ if v.exiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) {
+ location.deleteVolumeById(v.Id)
+ glog.V(0).Infoln("volume", v.Id, "is deleted.")
+ } else {
+ glog.V(0).Infoln("volume", v.Id, "is expired.")
+ }
+ }
+ }
+ }
+
+ joinMessage := &operation.JoinMessage{
+ IsInit: proto.Bool(!s.connected),
+ Ip: proto.String(s.Ip),
+ Port: proto.Uint32(uint32(s.Port)),
+ PublicUrl: proto.String(s.PublicUrl),
+ MaxVolumeCount: proto.Uint32(uint32(maxVolumeCount)),
+ MaxFileKey: proto.Uint64(maxFileKey),
+ DataCenter: proto.String(s.dataCenter),
+ Rack: proto.String(s.rack),
+ Volumes: volumeMessages,
+ }
+
+ data, err := proto.Marshal(joinMessage)
+ if err != nil {
+ return "", "", err
+ }
+
+ joinUrl := "http://" + masterNode + "/dir/join"
+ glog.V(4).Infof("Connecting to %s ...", joinUrl)
+
+ jsonBlob, err := util.PostBytes(joinUrl, data)
+ if err != nil {
+ s.masterNodes.reset()
+ return "", "", err
+ }
+ var ret operation.JoinResult
+ if err := json.Unmarshal(jsonBlob, &ret); err != nil {
+ glog.V(0).Infof("Failed to join %s with response: %s", joinUrl, string(jsonBlob))
+ s.masterNodes.reset()
+ return masterNode, "", err
+ }
+ if ret.Error != "" {
+ s.masterNodes.reset()
+ return masterNode, "", errors.New(ret.Error)
+ }
+ s.volumeSizeLimit = ret.VolumeSizeLimit
+ secretKey = security.Secret(ret.SecretKey)
+ s.connected = true
+ return
+}
+func (s *Store) Close() {
+ for _, location := range s.Locations {
+ for _, v := range location.volumes {
+ v.Close()
+ }
+ }
+}
+func (s *Store) Write(i VolumeId, n *Needle) (size uint32, err error) {
+ if v := s.findVolume(i); v != nil {
+ if v.readOnly {
+ err = fmt.Errorf("Volume %d is read only", i)
+ return
+ }
+ if MaxPossibleVolumeSize >= v.ContentSize()+uint64(size) {
+ size, err = v.write(n)
+ } else {
+ err = fmt.Errorf("Volume Size Limit %d Exceeded! Current size is %d", s.volumeSizeLimit, v.ContentSize())
+ }
+ if s.volumeSizeLimit < v.ContentSize()+3*uint64(size) {
+ glog.V(0).Infoln("volume", i, "size", v.ContentSize(), "will exceed limit", s.volumeSizeLimit)
+ if _, _, e := s.SendHeartbeatToMaster(); e != nil {
+ glog.V(0).Infoln("error when reporting size:", e)
+ }
+ }
+ return
+ }
+ glog.V(0).Infoln("volume", i, "not found!")
+ err = fmt.Errorf("Volume %d not found!", i)
+ return
+}
+func (s *Store) Delete(i VolumeId, n *Needle) (uint32, error) {
+ if v := s.findVolume(i); v != nil && !v.readOnly {
+ return v.delete(n)
+ }
+ return 0, nil
+}
+func (s *Store) ReadVolumeNeedle(i VolumeId, n *Needle) (int, error) {
+ if v := s.findVolume(i); v != nil {
+ return v.readNeedle(n)
+ }
+ return 0, fmt.Errorf("Volume %v not found!", i)
+}
+func (s *Store) GetVolume(i VolumeId) *Volume {
+ return s.findVolume(i)
+}
+
+func (s *Store) HasVolume(i VolumeId) bool {
+ v := s.findVolume(i)
+ return v != nil
+}
diff --git a/weed/storage/store_vacuum.go b/weed/storage/store_vacuum.go
new file mode 100644
index 000000000..03825c159
--- /dev/null
+++ b/weed/storage/store_vacuum.go
@@ -0,0 +1,44 @@
+package storage
+
+import (
+ "fmt"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func (s *Store) CheckCompactVolume(volumeIdString string, garbageThresholdString string) (error, bool) {
+ vid, err := NewVolumeId(volumeIdString)
+ if err != nil {
+ return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString), false
+ }
+ garbageThreshold, e := strconv.ParseFloat(garbageThresholdString, 32)
+ if e != nil {
+ return fmt.Errorf("garbageThreshold %s is not a valid float number", garbageThresholdString), false
+ }
+ if v := s.findVolume(vid); v != nil {
+ glog.V(3).Infoln(vid, "garbage level is", v.garbageLevel())
+ return nil, garbageThreshold < v.garbageLevel()
+ }
+ return fmt.Errorf("volume id %d is not found during check compact", vid), false
+}
+func (s *Store) CompactVolume(volumeIdString string) error {
+ vid, err := NewVolumeId(volumeIdString)
+ if err != nil {
+ return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString)
+ }
+ if v := s.findVolume(vid); v != nil {
+ return v.Compact()
+ }
+ return fmt.Errorf("volume id %d is not found during compact", vid)
+}
+func (s *Store) CommitCompactVolume(volumeIdString string) error {
+ vid, err := NewVolumeId(volumeIdString)
+ if err != nil {
+ return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString)
+ }
+ if v := s.findVolume(vid); v != nil {
+ return v.commitCompact()
+ }
+ return fmt.Errorf("volume id %d is not found during commit compact", vid)
+}
diff --git a/weed/storage/volume.go b/weed/storage/volume.go
new file mode 100644
index 000000000..d40bdc565
--- /dev/null
+++ b/weed/storage/volume.go
@@ -0,0 +1,430 @@
+package storage
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path"
+ "sync"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type Volume struct {
+ Id VolumeId
+ dir string
+ Collection string
+ dataFile *os.File
+ nm NeedleMapper
+ needleMapKind NeedleMapType
+ readOnly bool
+
+ SuperBlock
+
+ dataFileAccessLock sync.Mutex
+ lastModifiedTime uint64 //unix time in seconds
+}
+
+func NewVolume(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) (v *Volume, e error) {
+ v = &Volume{dir: dirname, Collection: collection, Id: id}
+ v.SuperBlock = SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl}
+ v.needleMapKind = needleMapKind
+ e = v.load(true, true, needleMapKind)
+ return
+}
+func (v *Volume) String() string {
+ return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, readOnly:%v", v.Id, v.dir, v.Collection, v.dataFile, v.nm, v.readOnly)
+}
+
+func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType) (v *Volume, e error) {
+ v = &Volume{dir: dirname, Collection: collection, Id: id}
+ v.SuperBlock = SuperBlock{}
+ v.needleMapKind = needleMapKind
+ e = v.load(false, false, needleMapKind)
+ return
+}
+func (v *Volume) FileName() (fileName string) {
+ if v.Collection == "" {
+ fileName = path.Join(v.dir, v.Id.String())
+ } else {
+ fileName = path.Join(v.dir, v.Collection+"_"+v.Id.String())
+ }
+ return
+}
+func (v *Volume) DataFile() *os.File {
+ return v.dataFile
+}
+func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind NeedleMapType) error {
+ var e error
+ fileName := v.FileName()
+
+ if exists, canRead, canWrite, modifiedTime := checkFile(fileName + ".dat"); exists {
+ if !canRead {
+ return fmt.Errorf("cannot read Volume Data file %s.dat", fileName)
+ }
+ if canWrite {
+ v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644)
+ v.lastModifiedTime = uint64(modifiedTime.Unix())
+ } else {
+ glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode")
+ v.dataFile, e = os.Open(fileName + ".dat")
+ v.readOnly = true
+ }
+ } else {
+ if createDatIfMissing {
+ v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644)
+ } else {
+ return fmt.Errorf("Volume Data file %s.dat does not exist.", fileName)
+ }
+ }
+
+ if e != nil {
+ if !os.IsPermission(e) {
+ return fmt.Errorf("cannot load Volume Data %s.dat: %v", fileName, e)
+ }
+ }
+
+ if v.ReplicaPlacement == nil {
+ e = v.readSuperBlock()
+ } else {
+ e = v.maybeWriteSuperBlock()
+ }
+ if e == nil && alsoLoadIndex {
+ var indexFile *os.File
+ if v.readOnly {
+ glog.V(1).Infoln("open to read file", fileName+".idx")
+ if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); e != nil {
+ return fmt.Errorf("cannot read Volume Index %s.idx: %v", fileName, e)
+ }
+ } else {
+ glog.V(1).Infoln("open to write file", fileName+".idx")
+ if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); e != nil {
+ return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, e)
+ }
+ }
+ switch needleMapKind {
+ case NeedleMapInMemory:
+ glog.V(0).Infoln("loading index file", fileName+".idx", "readonly", v.readOnly)
+ if v.nm, e = LoadNeedleMap(indexFile); e != nil {
+ glog.V(0).Infof("loading index %s error: %v", fileName+".idx", e)
+ }
+ case NeedleMapLevelDb:
+ glog.V(0).Infoln("loading leveldb file", fileName+".ldb")
+ if v.nm, e = NewLevelDbNeedleMap(fileName+".ldb", indexFile); e != nil {
+ glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", e)
+ }
+ case NeedleMapBoltDb:
+ glog.V(0).Infoln("loading boltdb file", fileName+".bdb")
+ if v.nm, e = NewBoltDbNeedleMap(fileName+".bdb", indexFile); e != nil {
+ glog.V(0).Infof("loading boltdb %s error: %v", fileName+".bdb", e)
+ }
+ }
+ }
+ return e
+}
+func (v *Volume) Version() Version {
+ return v.SuperBlock.Version()
+}
+func (v *Volume) Size() int64 {
+ stat, e := v.dataFile.Stat()
+ if e == nil {
+ return stat.Size()
+ }
+ glog.V(0).Infof("Failed to read file size %s %v", v.dataFile.Name(), e)
+ return -1
+}
+
+// Close cleanly shuts down this volume
+func (v *Volume) Close() {
+ v.dataFileAccessLock.Lock()
+ defer v.dataFileAccessLock.Unlock()
+ v.nm.Close()
+ _ = v.dataFile.Close()
+}
+
+func (v *Volume) NeedToReplicate() bool {
+ return v.ReplicaPlacement.GetCopyCount() > 1
+}
+
+// isFileUnchanged checks whether this needle to write is same as last one.
+// It requires serialized access in the same volume.
+func (v *Volume) isFileUnchanged(n *Needle) bool {
+ if v.Ttl.String() != "" {
+ return false
+ }
+ nv, ok := v.nm.Get(n.Id)
+ if ok && nv.Offset > 0 {
+ oldNeedle := new(Needle)
+ err := oldNeedle.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version())
+ if err != nil {
+ glog.V(0).Infof("Failed to check updated file %v", err)
+ return false
+ }
+ defer oldNeedle.ReleaseMemory()
+ if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) {
+ n.DataSize = oldNeedle.DataSize
+ return true
+ }
+ }
+ return false
+}
+
+// Destroy removes everything related to this volume
+func (v *Volume) Destroy() (err error) {
+ if v.readOnly {
+ err = fmt.Errorf("%s is read-only", v.dataFile.Name())
+ return
+ }
+ v.Close()
+ err = os.Remove(v.dataFile.Name())
+ if err != nil {
+ return
+ }
+ err = v.nm.Destroy()
+ return
+}
+
+// AppendBlob append a blob to end of the data file, used in replication
+func (v *Volume) AppendBlob(b []byte) (offset int64, err error) {
+ if v.readOnly {
+ err = fmt.Errorf("%s is read-only", v.dataFile.Name())
+ return
+ }
+ v.dataFileAccessLock.Lock()
+ defer v.dataFileAccessLock.Unlock()
+ if offset, err = v.dataFile.Seek(0, 2); err != nil {
+ glog.V(0).Infof("failed to seek the end of file: %v", err)
+ return
+ }
+ //ensure file writing starting from aligned positions
+ if offset%NeedlePaddingSize != 0 {
+ offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize)
+ if offset, err = v.dataFile.Seek(offset, 0); err != nil {
+ glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err)
+ return
+ }
+ }
+ v.dataFile.Write(b)
+ return
+}
+
+func (v *Volume) write(n *Needle) (size uint32, err error) {
+ glog.V(4).Infof("writing needle %s", NewFileIdFromNeedle(v.Id, n).String())
+ if v.readOnly {
+ err = fmt.Errorf("%s is read-only", v.dataFile.Name())
+ return
+ }
+ v.dataFileAccessLock.Lock()
+ defer v.dataFileAccessLock.Unlock()
+ if v.isFileUnchanged(n) {
+ size = n.DataSize
+ glog.V(4).Infof("needle is unchanged!")
+ return
+ }
+ var offset int64
+ if offset, err = v.dataFile.Seek(0, 2); err != nil {
+ glog.V(0).Infof("failed to seek the end of file: %v", err)
+ return
+ }
+
+ //ensure file writing starting from aligned positions
+ if offset%NeedlePaddingSize != 0 {
+ offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize)
+ if offset, err = v.dataFile.Seek(offset, 0); err != nil {
+ glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err)
+ return
+ }
+ }
+
+ if size, err = n.Append(v.dataFile, v.Version()); err != nil {
+ if e := v.dataFile.Truncate(offset); e != nil {
+ err = fmt.Errorf("%s\ncannot truncate %s: %v", err, v.dataFile.Name(), e)
+ }
+ return
+ }
+ nv, ok := v.nm.Get(n.Id)
+ if !ok || int64(nv.Offset)*NeedlePaddingSize < offset {
+ if err = v.nm.Put(n.Id, uint32(offset/NeedlePaddingSize), n.Size); err != nil {
+ glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err)
+ }
+ }
+ if v.lastModifiedTime < n.LastModified {
+ v.lastModifiedTime = n.LastModified
+ }
+ return
+}
+
+func (v *Volume) delete(n *Needle) (uint32, error) {
+ glog.V(4).Infof("delete needle %s", NewFileIdFromNeedle(v.Id, n).String())
+ if v.readOnly {
+ return 0, fmt.Errorf("%s is read-only", v.dataFile.Name())
+ }
+ v.dataFileAccessLock.Lock()
+ defer v.dataFileAccessLock.Unlock()
+ nv, ok := v.nm.Get(n.Id)
+ //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size)
+ if ok {
+ size := nv.Size
+ if err := v.nm.Delete(n.Id); err != nil {
+ return size, err
+ }
+ if _, err := v.dataFile.Seek(0, 2); err != nil {
+ return size, err
+ }
+ n.Data = nil
+ _, err := n.Append(v.dataFile, v.Version())
+ return size, err
+ }
+ return 0, nil
+}
+
+// read fills in Needle content by looking up n.Id from NeedleMapper
+func (v *Volume) readNeedle(n *Needle) (int, error) {
+ nv, ok := v.nm.Get(n.Id)
+ if !ok || nv.Offset == 0 {
+ return -1, errors.New("Not Found")
+ }
+ err := n.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version())
+ if err != nil {
+ return 0, err
+ }
+ bytesRead := len(n.Data)
+ if !n.HasTtl() {
+ return bytesRead, nil
+ }
+ ttlMinutes := n.Ttl.Minutes()
+ if ttlMinutes == 0 {
+ return bytesRead, nil
+ }
+ if !n.HasLastModifiedDate() {
+ return bytesRead, nil
+ }
+ if uint64(time.Now().Unix()) < n.LastModified+uint64(ttlMinutes*60) {
+ return bytesRead, nil
+ }
+ n.ReleaseMemory()
+ return -1, errors.New("Not Found")
+}
+
+func ScanVolumeFile(dirname string, collection string, id VolumeId,
+ needleMapKind NeedleMapType,
+ visitSuperBlock func(SuperBlock) error,
+ readNeedleBody bool,
+ visitNeedle func(n *Needle, offset int64) error) (err error) {
+ var v *Volume
+ if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil {
+ return fmt.Errorf("Failed to load volume %d: %v", id, err)
+ }
+ if err = visitSuperBlock(v.SuperBlock); err != nil {
+ return fmt.Errorf("Failed to process volume %d super block: %v", id, err)
+ }
+
+ version := v.Version()
+
+ offset := int64(SuperBlockSize)
+ n, rest, e := ReadNeedleHeader(v.dataFile, version, offset)
+ if e != nil {
+ err = fmt.Errorf("cannot read needle header: %v", e)
+ return
+ }
+ for n != nil {
+ if readNeedleBody {
+ if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil {
+ glog.V(0).Infof("cannot read needle body: %v", err)
+ //err = fmt.Errorf("cannot read needle body: %v", err)
+ //return
+ }
+ if n.DataSize >= n.Size {
+ // this should come from a bug reported on #87 and #93
+ // fixed in v0.69
+ // remove this whole "if" clause later, long after 0.69
+ oldRest, oldSize := rest, n.Size
+ padding := NeedlePaddingSize - ((n.Size + NeedleHeaderSize + NeedleChecksumSize) % NeedlePaddingSize)
+ n.Size = 0
+ rest = n.Size + NeedleChecksumSize + padding
+ if rest%NeedlePaddingSize != 0 {
+ rest += (NeedlePaddingSize - rest%NeedlePaddingSize)
+ }
+ glog.V(4).Infof("Adjusting n.Size %d=>0 rest:%d=>%d %+v", oldSize, oldRest, rest, n)
+ }
+ }
+ if err = visitNeedle(n, offset); err != nil {
+ glog.V(0).Infof("visit needle error: %v", err)
+ }
+ offset += int64(NeedleHeaderSize) + int64(rest)
+ glog.V(4).Infof("==> new entry offset %d", offset)
+ if n, rest, err = ReadNeedleHeader(v.dataFile, version, offset); err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return fmt.Errorf("cannot read needle header: %v", err)
+ }
+ glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest)
+ }
+
+ return
+}
+
+func (v *Volume) ContentSize() uint64 {
+ return v.nm.ContentSize()
+}
+
+func checkFile(filename string) (exists, canRead, canWrite bool, modTime time.Time) {
+ exists = true
+ fi, err := os.Stat(filename)
+ if os.IsNotExist(err) {
+ exists = false
+ return
+ }
+ if fi.Mode()&0400 != 0 {
+ canRead = true
+ }
+ if fi.Mode()&0200 != 0 {
+ canWrite = true
+ }
+ modTime = fi.ModTime()
+ return
+}
+
+// volume is expired if modified time + volume ttl < now
+// except when volume is empty
+// or when the volume does not have a ttl
+// or when volumeSizeLimit is 0 when server just starts
+func (v *Volume) expired(volumeSizeLimit uint64) bool {
+ if volumeSizeLimit == 0 {
+ //skip if we don't know size limit
+ return false
+ }
+ if v.ContentSize() == 0 {
+ return false
+ }
+ if v.Ttl == nil || v.Ttl.Minutes() == 0 {
+ return false
+ }
+ glog.V(0).Infof("now:%v lastModified:%v", time.Now().Unix(), v.lastModifiedTime)
+ livedMinutes := (time.Now().Unix() - int64(v.lastModifiedTime)) / 60
+ glog.V(0).Infof("ttl:%v lived:%v", v.Ttl, livedMinutes)
+ if int64(v.Ttl.Minutes()) < livedMinutes {
+ return true
+ }
+ return false
+}
+
+// wait either maxDelayMinutes or 10% of ttl minutes
+func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool {
+ if v.Ttl == nil || v.Ttl.Minutes() == 0 {
+ return false
+ }
+ removalDelay := v.Ttl.Minutes() / 10
+ if removalDelay > maxDelayMinutes {
+ removalDelay = maxDelayMinutes
+ }
+
+ if uint64(v.Ttl.Minutes()+removalDelay)*60+v.lastModifiedTime < uint64(time.Now().Unix()) {
+ return true
+ }
+ return false
+}
diff --git a/weed/storage/volume_id.go b/weed/storage/volume_id.go
new file mode 100644
index 000000000..0333c6cf0
--- /dev/null
+++ b/weed/storage/volume_id.go
@@ -0,0 +1,18 @@
+package storage
+
+import (
+ "strconv"
+)
+
+type VolumeId uint32
+
+func NewVolumeId(vid string) (VolumeId, error) {
+ volumeId, err := strconv.ParseUint(vid, 10, 64)
+ return VolumeId(volumeId), err
+}
+func (vid *VolumeId) String() string {
+ return strconv.FormatUint(uint64(*vid), 10)
+}
+func (vid *VolumeId) Next() VolumeId {
+ return VolumeId(uint32(*vid) + 1)
+}
diff --git a/weed/storage/volume_info.go b/weed/storage/volume_info.go
new file mode 100644
index 000000000..b3068eec3
--- /dev/null
+++ b/weed/storage/volume_info.go
@@ -0,0 +1,65 @@
+package storage
+
+import (
+ "fmt"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "sort"
+)
+
+type VolumeInfo struct {
+ Id VolumeId
+ Size uint64
+ ReplicaPlacement *ReplicaPlacement
+ Ttl *TTL
+ Collection string
+ Version Version
+ FileCount int
+ DeleteCount int
+ DeletedByteCount uint64
+ ReadOnly bool
+}
+
+func NewVolumeInfo(m *operation.VolumeInformationMessage) (vi VolumeInfo, err error) {
+ vi = VolumeInfo{
+ Id: VolumeId(*m.Id),
+ Size: *m.Size,
+ Collection: *m.Collection,
+ FileCount: int(*m.FileCount),
+ DeleteCount: int(*m.DeleteCount),
+ DeletedByteCount: *m.DeletedByteCount,
+ ReadOnly: *m.ReadOnly,
+ Version: Version(*m.Version),
+ }
+ rp, e := NewReplicaPlacementFromByte(byte(*m.ReplicaPlacement))
+ if e != nil {
+ return vi, e
+ }
+ vi.ReplicaPlacement = rp
+ vi.Ttl = LoadTTLFromUint32(*m.Ttl)
+ return vi, nil
+}
+
+func (vi VolumeInfo) String() string {
+ return fmt.Sprintf("Id:%d, Size:%d, ReplicaPlacement:%s, Collection:%s, Version:%v, FileCount:%d, DeleteCount:%d, DeletedByteCount:%d, ReadOnly:%v",
+ vi.Id, vi.Size, vi.ReplicaPlacement, vi.Collection, vi.Version, vi.FileCount, vi.DeleteCount, vi.DeletedByteCount, vi.ReadOnly)
+}
+
+/*VolumesInfo sorting*/
+
+type volumeInfos []*VolumeInfo
+
+func (vis volumeInfos) Len() int {
+ return len(vis)
+}
+
+func (vis volumeInfos) Less(i, j int) bool {
+ return vis[i].Id < vis[j].Id
+}
+
+func (vis volumeInfos) Swap(i, j int) {
+ vis[i], vis[j] = vis[j], vis[i]
+}
+
+func sortVolumeInfos(vis volumeInfos) {
+ sort.Sort(vis)
+}
diff --git a/weed/storage/volume_info_test.go b/weed/storage/volume_info_test.go
new file mode 100644
index 000000000..9a9c43ad2
--- /dev/null
+++ b/weed/storage/volume_info_test.go
@@ -0,0 +1,23 @@
+package storage
+
+import "testing"
+
+func TestSortVolumeInfos(t *testing.T) {
+ vis := []*VolumeInfo{
+ &VolumeInfo{
+ Id: 2,
+ },
+ &VolumeInfo{
+ Id: 1,
+ },
+ &VolumeInfo{
+ Id: 3,
+ },
+ }
+ sortVolumeInfos(vis)
+ for i := 0; i < len(vis); i++ {
+ if vis[i].Id != VolumeId(i+1) {
+ t.Fatal()
+ }
+ }
+}
diff --git a/weed/storage/volume_super_block.go b/weed/storage/volume_super_block.go
new file mode 100644
index 000000000..fc773273d
--- /dev/null
+++ b/weed/storage/volume_super_block.go
@@ -0,0 +1,81 @@
+package storage
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+const (
+ SuperBlockSize = 8
+)
+
+/*
+* Super block currently has 8 bytes allocated for each volume.
+* Byte 0: version, 1 or 2
+* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc
+* Byte 2 and byte 3: Time to live. See TTL for definition
+* Byte 4 and byte 5: The number of times the volume has been compacted.
+* Rest bytes: Reserved
+ */
+type SuperBlock struct {
+ version Version
+ ReplicaPlacement *ReplicaPlacement
+ Ttl *TTL
+ CompactRevision uint16
+}
+
+func (s *SuperBlock) Version() Version {
+ return s.version
+}
+func (s *SuperBlock) Bytes() []byte {
+ header := make([]byte, SuperBlockSize)
+ header[0] = byte(s.version)
+ header[1] = s.ReplicaPlacement.Byte()
+ s.Ttl.ToBytes(header[2:4])
+ util.Uint16toBytes(header[4:6], s.CompactRevision)
+ return header
+}
+
+func (v *Volume) maybeWriteSuperBlock() error {
+ stat, e := v.dataFile.Stat()
+ if e != nil {
+ glog.V(0).Infof("failed to stat datafile %s: %v", v.dataFile, e)
+ return e
+ }
+ if stat.Size() == 0 {
+ v.SuperBlock.version = CurrentVersion
+ _, e = v.dataFile.Write(v.SuperBlock.Bytes())
+ if e != nil && os.IsPermission(e) {
+ //read-only, but zero length - recreate it!
+ if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil {
+ if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil {
+ v.readOnly = false
+ }
+ }
+ }
+ }
+ return e
+}
+func (v *Volume) readSuperBlock() (err error) {
+ if _, err = v.dataFile.Seek(0, 0); err != nil {
+ return fmt.Errorf("cannot seek to the beginning of %s: %v", v.dataFile.Name(), err)
+ }
+ header := make([]byte, SuperBlockSize)
+ if _, e := v.dataFile.Read(header); e != nil {
+ return fmt.Errorf("cannot read volume %d super block: %v", v.Id, e)
+ }
+ v.SuperBlock, err = ParseSuperBlock(header)
+ return err
+}
+func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) {
+ superBlock.version = Version(header[0])
+ if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil {
+ err = fmt.Errorf("cannot read replica type: %s", err.Error())
+ }
+ superBlock.Ttl = LoadTTLFromBytes(header[2:4])
+ superBlock.CompactRevision = util.BytesToUint16(header[4:6])
+ return
+}
diff --git a/weed/storage/volume_super_block_test.go b/weed/storage/volume_super_block_test.go
new file mode 100644
index 000000000..13db4b194
--- /dev/null
+++ b/weed/storage/volume_super_block_test.go
@@ -0,0 +1,23 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestSuperBlockReadWrite(t *testing.T) {
+ rp, _ := NewReplicaPlacementFromByte(byte(001))
+ ttl, _ := ReadTTL("15d")
+ s := &SuperBlock{
+ version: CurrentVersion,
+ ReplicaPlacement: rp,
+ Ttl: ttl,
+ }
+
+ bytes := s.Bytes()
+
+ if !(bytes[2] == 15 && bytes[3] == Day) {
+ println("byte[2]:", bytes[2], "byte[3]:", bytes[3])
+ t.Fail()
+ }
+
+}
diff --git a/weed/storage/volume_sync.go b/weed/storage/volume_sync.go
new file mode 100644
index 000000000..231ff31c2
--- /dev/null
+++ b/weed/storage/volume_sync.go
@@ -0,0 +1,213 @@
+package storage
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net/url"
+ "os"
+ "sort"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+// The volume sync with a master volume via 2 steps:
+// 1. The slave checks master side to find subscription checkpoint
+// to setup the replication.
+// 2. The slave receives the updates from master
+
+/*
+Assume the slave volume needs to follow the master volume.
+
+The master volume could be compacted, and could be many files ahead of
+slave volume.
+
+Step 1:
+The slave volume will ask the master volume for a snapshot
+of (existing file entries, last offset, number of compacted times).
+
+For each entry x in master existing file entries:
+ if x does not exist locally:
+ add x locally
+
+For each entry y in local slave existing file entries:
+ if y does not exist on master:
+ delete y locally
+
+Step 2:
+After this, use the last offset and number of compacted times to request
+the master volume to send a new file, and keep looping. If the number of
+compacted times is changed, go back to step 1 (very likely this can be
+optimized more later).
+
+*/
+
+func (v *Volume) Synchronize(volumeServer string) (err error) {
+ var lastCompactRevision uint16 = 0
+ var compactRevision uint16 = 0
+ var masterMap CompactMap
+ for i := 0; i < 3; i++ {
+ if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, v.Id); err != nil {
+ return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err)
+ }
+ if lastCompactRevision != compactRevision && lastCompactRevision != 0 {
+ if err = v.Compact(); err != nil {
+ return fmt.Errorf("Compact Volume before synchronizing %v", err)
+ }
+ if err = v.commitCompact(); err != nil {
+ return fmt.Errorf("Commit Compact before synchronizing %v", err)
+ }
+ }
+ lastCompactRevision = compactRevision
+ if err = v.trySynchronizing(volumeServer, masterMap, compactRevision); err == nil {
+ return
+ }
+ }
+ return
+}
+
+type ByOffset []NeedleValue
+
+func (a ByOffset) Len() int { return len(a) }
+func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset }
+
+// trySynchronizing sync with remote volume server incrementally by
+// make up the local and remote delta.
+func (v *Volume) trySynchronizing(volumeServer string, masterMap CompactMap, compactRevision uint16) error {
+ slaveIdxFile, err := os.Open(v.nm.IndexFileName())
+ if err != nil {
+ return fmt.Errorf("Open volume %d index file: %v", v.Id, err)
+ }
+ defer slaveIdxFile.Close()
+ slaveMap, err := LoadNeedleMap(slaveIdxFile)
+ if err != nil {
+ return fmt.Errorf("Load volume %d index file: %v", v.Id, err)
+ }
+ var delta []NeedleValue
+ if err := masterMap.Visit(func(needleValue NeedleValue) error {
+ if needleValue.Key == 0 {
+ return nil
+ }
+ if _, ok := slaveMap.Get(uint64(needleValue.Key)); ok {
+ return nil // skip intersection
+ }
+ delta = append(delta, needleValue)
+ return nil
+ }); err != nil {
+ return fmt.Errorf("Add master entry: %v", err)
+ }
+ if err := slaveMap.m.Visit(func(needleValue NeedleValue) error {
+ if needleValue.Key == 0 {
+ return nil
+ }
+ if _, ok := masterMap.Get(needleValue.Key); ok {
+ return nil // skip intersection
+ }
+ needleValue.Size = 0
+ delta = append(delta, needleValue)
+ return nil
+ }); err != nil {
+ return fmt.Errorf("Remove local entry: %v", err)
+ }
+
+ // simulate to same ordering of remote .dat file needle entries
+ sort.Sort(ByOffset(delta))
+
+ // make up the delta
+ fetchCount := 0
+ volumeDataContentHandlerUrl := "http://" + volumeServer + "/admin/sync/data"
+ for _, needleValue := range delta {
+ if needleValue.Size == 0 {
+ // remove file entry from local
+ v.removeNeedle(needleValue.Key)
+ continue
+ }
+ // add master file entry to local data file
+ if err := v.fetchNeedle(volumeDataContentHandlerUrl, needleValue, compactRevision); err != nil {
+ glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err)
+ return err
+ }
+ fetchCount++
+ }
+ glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer)
+ return nil
+}
+
+func fetchVolumeFileEntries(volumeServer string, vid VolumeId) (m CompactMap, lastOffset uint64, compactRevision uint16, err error) {
+ m = NewCompactMap()
+
+ syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, vid.String())
+ if err != nil {
+ return m, 0, 0, err
+ }
+
+ total := 0
+ err = operation.GetVolumeIdxEntries(volumeServer, vid.String(), func(key uint64, offset, size uint32) {
+ // println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size)
+ if offset != 0 && size != 0 {
+ m.Set(Key(key), offset, size)
+ } else {
+ m.Delete(Key(key))
+ }
+ total++
+ })
+
+ glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision)
+ return m, syncStatus.TailOffset, syncStatus.CompactRevision, err
+
+}
+
+func (v *Volume) GetVolumeSyncStatus() operation.SyncVolumeResponse {
+ var syncStatus = operation.SyncVolumeResponse{}
+ if stat, err := v.dataFile.Stat(); err == nil {
+ syncStatus.TailOffset = uint64(stat.Size())
+ }
+ syncStatus.IdxFileSize = v.nm.IndexFileSize()
+ syncStatus.CompactRevision = v.SuperBlock.CompactRevision
+ syncStatus.Ttl = v.SuperBlock.Ttl.String()
+ syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String()
+ return syncStatus
+}
+
+func (v *Volume) IndexFileContent() ([]byte, error) {
+ return v.nm.IndexFileContent()
+}
+
+// removeNeedle removes one needle by needle key
+func (v *Volume) removeNeedle(key Key) {
+ n := new(Needle)
+ n.Id = uint64(key)
+ v.delete(n)
+}
+
+// fetchNeedle fetches a remote volume needle by vid, id, offset
+// The compact revision is checked first in case the remote volume
+// is compacted and the offset is invalid any more.
+func (v *Volume) fetchNeedle(volumeDataContentHandlerUrl string,
+ needleValue NeedleValue, compactRevision uint16) error {
+ // add master file entry to local data file
+ values := make(url.Values)
+ values.Add("revision", strconv.Itoa(int(compactRevision)))
+ values.Add("volume", v.Id.String())
+ values.Add("id", needleValue.Key.String())
+ values.Add("offset", strconv.FormatUint(uint64(needleValue.Offset), 10))
+ values.Add("size", strconv.FormatUint(uint64(needleValue.Size), 10))
+ glog.V(4).Infof("Fetch %+v", needleValue)
+ return util.GetUrlStream(volumeDataContentHandlerUrl, values, func(r io.Reader) error {
+ b, err := ioutil.ReadAll(r)
+ if err != nil {
+ return fmt.Errorf("Reading from %s error: %v", volumeDataContentHandlerUrl, err)
+ }
+ offset, err := v.AppendBlob(b)
+ if err != nil {
+ return fmt.Errorf("Appending volume %d error: %v", v.Id, err)
+ }
+ // println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size)
+ v.nm.Put(uint64(needleValue.Key), uint32(offset/NeedlePaddingSize), needleValue.Size)
+ return nil
+ })
+}
diff --git a/weed/storage/volume_ttl.go b/weed/storage/volume_ttl.go
new file mode 100644
index 000000000..4318bb048
--- /dev/null
+++ b/weed/storage/volume_ttl.go
@@ -0,0 +1,135 @@
+package storage
+
+import (
+ "strconv"
+)
+
+const (
+ //stored unit types
+ Empty byte = iota
+ Minute
+ Hour
+ Day
+ Week
+ Month
+ Year
+)
+
+type TTL struct {
+ count byte
+ unit byte
+}
+
+var EMPTY_TTL = &TTL{}
+
+// translate a readable ttl to internal ttl
+// Supports format example:
+// 3m: 3 minutes
+// 4h: 4 hours
+// 5d: 5 days
+// 6w: 6 weeks
+// 7M: 7 months
+// 8y: 8 years
+func ReadTTL(ttlString string) (*TTL, error) {
+ if ttlString == "" {
+ return EMPTY_TTL, nil
+ }
+ ttlBytes := []byte(ttlString)
+ unitByte := ttlBytes[len(ttlBytes)-1]
+ countBytes := ttlBytes[0 : len(ttlBytes)-1]
+ if '0' <= unitByte && unitByte <= '9' {
+ countBytes = ttlBytes
+ unitByte = 'm'
+ }
+ count, err := strconv.Atoi(string(countBytes))
+ unit := toStoredByte(unitByte)
+ return &TTL{count: byte(count), unit: unit}, err
+}
+
+// read stored bytes to a ttl
+func LoadTTLFromBytes(input []byte) (t *TTL) {
+ return &TTL{count: input[0], unit: input[1]}
+}
+
+// read stored bytes to a ttl
+func LoadTTLFromUint32(ttl uint32) (t *TTL) {
+ input := make([]byte, 2)
+ input[1] = byte(ttl)
+ input[0] = byte(ttl >> 8)
+ return LoadTTLFromBytes(input)
+}
+
+// save stored bytes to an output with 2 bytes
+func (t *TTL) ToBytes(output []byte) {
+ output[0] = t.count
+ output[1] = t.unit
+}
+
+func (t *TTL) ToUint32() (output uint32) {
+ output = uint32(t.count) << 8
+ output += uint32(t.unit)
+ return output
+}
+
+func (t *TTL) String() string {
+ if t == nil || t.count == 0 {
+ return ""
+ }
+ if t.unit == Empty {
+ return ""
+ }
+ countString := strconv.Itoa(int(t.count))
+ switch t.unit {
+ case Minute:
+ return countString + "m"
+ case Hour:
+ return countString + "h"
+ case Day:
+ return countString + "d"
+ case Week:
+ return countString + "w"
+ case Month:
+ return countString + "M"
+ case Year:
+ return countString + "y"
+ }
+ return ""
+}
+
+func toStoredByte(readableUnitByte byte) byte {
+ switch readableUnitByte {
+ case 'm':
+ return Minute
+ case 'h':
+ return Hour
+ case 'd':
+ return Day
+ case 'w':
+ return Week
+ case 'M':
+ return Month
+ case 'y':
+ return Year
+ }
+ return 0
+}
+
+func (t TTL) Minutes() uint32 {
+ switch t.unit {
+ case Empty:
+ return 0
+ case Minute:
+ return uint32(t.count)
+ case Hour:
+ return uint32(t.count) * 60
+ case Day:
+ return uint32(t.count) * 60 * 24
+ case Week:
+ return uint32(t.count) * 60 * 24 * 7
+ case Month:
+ return uint32(t.count) * 60 * 24 * 31
+ case Year:
+ return uint32(t.count) * 60 * 24 * 365
+ }
+ return 0
+}
diff --git a/weed/storage/volume_ttl_test.go b/weed/storage/volume_ttl_test.go
new file mode 100644
index 000000000..216469a4c
--- /dev/null
+++ b/weed/storage/volume_ttl_test.go
@@ -0,0 +1,60 @@
+package storage
+
+import (
+ "testing"
+)
+
+func TestTTLReadWrite(t *testing.T) {
+ ttl, _ := ReadTTL("")
+ if ttl.Minutes() != 0 {
+ t.Errorf("empty ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("9")
+ if ttl.Minutes() != 9 {
+ t.Errorf("9 ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("8m")
+ if ttl.Minutes() != 8 {
+ t.Errorf("8m ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("5h")
+ if ttl.Minutes() != 300 {
+ t.Errorf("5h ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("5d")
+ if ttl.Minutes() != 5*24*60 {
+ t.Errorf("5d ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("5w")
+ if ttl.Minutes() != 5*7*24*60 {
+ t.Errorf("5w ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("5M")
+ if ttl.Minutes() != 5*31*24*60 {
+ t.Errorf("5M ttl:%v", ttl)
+ }
+
+ ttl, _ = ReadTTL("5y")
+ if ttl.Minutes() != 5*365*24*60 {
+ t.Errorf("5y ttl:%v", ttl)
+ }
+
+ output := make([]byte, 2)
+ ttl.ToBytes(output)
+ ttl2 := LoadTTLFromBytes(output)
+ if ttl.Minutes() != ttl2.Minutes() {
+ t.Errorf("ttl:%v ttl2:%v", ttl, ttl2)
+ }
+
+ ttl3 := LoadTTLFromUint32(ttl.ToUint32())
+ if ttl.Minutes() != ttl3.Minutes() {
+ t.Errorf("ttl:%v ttl3:%v", ttl, ttl3)
+ }
+
+}
diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go
new file mode 100644
index 000000000..9b9a27816
--- /dev/null
+++ b/weed/storage/volume_vacuum.go
@@ -0,0 +1,93 @@
+package storage
+
+import (
+ "fmt"
+ "os"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func (v *Volume) garbageLevel() float64 {
+ return float64(v.nm.DeletedSize()) / float64(v.ContentSize())
+}
+
+func (v *Volume) Compact() error {
+ glog.V(3).Infof("Compacting ...")
+ //no need to lock for copy on write
+ //v.accessLock.Lock()
+ //defer v.accessLock.Unlock()
+ //glog.V(3).Infof("Got Compaction lock...")
+
+ filePath := v.FileName()
+ glog.V(3).Infof("creating copies for volume %d ...", v.Id)
+ return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx")
+}
+func (v *Volume) commitCompact() error {
+ glog.V(3).Infof("Committing vacuuming...")
+ v.dataFileAccessLock.Lock()
+ defer v.dataFileAccessLock.Unlock()
+ glog.V(3).Infof("Got Committing lock...")
+ v.nm.Close()
+ _ = v.dataFile.Close()
+ var e error
+ if e = os.Rename(v.FileName()+".cpd", v.FileName()+".dat"); e != nil {
+ return e
+ }
+ if e = os.Rename(v.FileName()+".cpx", v.FileName()+".idx"); e != nil {
+ return e
+ }
+ //glog.V(3).Infof("Pretending to be vacuuming...")
+ //time.Sleep(20 * time.Second)
+ glog.V(3).Infof("Loading Commit file...")
+ if e = v.load(true, false, v.needleMapKind); e != nil {
+ return e
+ }
+ return nil
+}
+
+func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) {
+ var (
+ dst, idx *os.File
+ )
+ if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
+ return
+ }
+ defer dst.Close()
+
+ if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
+ return
+ }
+ defer idx.Close()
+
+ nm := NewNeedleMap(idx)
+ new_offset := int64(SuperBlockSize)
+
+ now := uint64(time.Now().Unix())
+
+ err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind,
+ func(superBlock SuperBlock) error {
+ superBlock.CompactRevision++
+ _, err = dst.Write(superBlock.Bytes())
+ return err
+ }, true, func(n *Needle, offset int64) error {
+ if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) {
+ return nil
+ }
+ nv, ok := v.nm.Get(n.Id)
+ glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv)
+ if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 {
+ if err = nm.Put(n.Id, uint32(new_offset/NeedlePaddingSize), n.Size); err != nil {
+ return fmt.Errorf("cannot put needle: %s", err)
+ }
+ if _, err = n.Append(dst, v.Version()); err != nil {
+ return fmt.Errorf("cannot append needle: %s", err)
+ }
+ new_offset += n.DiskSize()
+ glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size)
+ }
+ return nil
+ })
+
+ return
+}
diff --git a/weed/storage/volume_version.go b/weed/storage/volume_version.go
new file mode 100644
index 000000000..2e9f58aa2
--- /dev/null
+++ b/weed/storage/volume_version.go
@@ -0,0 +1,9 @@
+package storage
+
+type Version uint8
+
+const (
+ Version1 = Version(1)
+ Version2 = Version(2)
+ CurrentVersion = Version2
+)
diff --git a/weed/tools/read_index.go b/weed/tools/read_index.go
new file mode 100644
index 000000000..642ff786b
--- /dev/null
+++ b/weed/tools/read_index.go
@@ -0,0 +1,28 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+var (
+ indexFileName = flag.String("file", "", ".idx file to analyze")
+)
+
+func main() {
+ flag.Parse()
+ indexFile, err := os.OpenFile(*indexFileName, os.O_RDONLY, 0644)
+ if err != nil {
+ log.Fatalf("Create Volume Index [ERROR] %s\n", err)
+ }
+ defer indexFile.Close()
+
+ storage.WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error {
+ fmt.Printf("key %d, offset %d, size %d, nextOffset %d\n", key, offset*8, size, offset*8+size)
+ return nil
+ })
+}
diff --git a/weed/topology/allocate_volume.go b/weed/topology/allocate_volume.go
new file mode 100644
index 000000000..7b267a805
--- /dev/null
+++ b/weed/topology/allocate_volume.go
@@ -0,0 +1,35 @@
+package topology
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "net/url"
+
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type AllocateVolumeResult struct {
+ Error string
+}
+
+func AllocateVolume(dn *DataNode, vid storage.VolumeId, option *VolumeGrowOption) error {
+ values := make(url.Values)
+ values.Add("volume", vid.String())
+ values.Add("collection", option.Collection)
+ values.Add("replication", option.ReplicaPlacement.String())
+ values.Add("ttl", option.Ttl.String())
+ jsonBlob, err := util.Post("http://"+dn.Url()+"/admin/assign_volume", values)
+ if err != nil {
+ return err
+ }
+ var ret AllocateVolumeResult
+ if err := json.Unmarshal(jsonBlob, &ret); err != nil {
+ return fmt.Errorf("Invalid JSON result for %s: %s", "/admin/assign_volum", string(jsonBlob))
+ }
+ if ret.Error != "" {
+ return errors.New(ret.Error)
+ }
+ return nil
+}
diff --git a/weed/topology/cluster_commands.go b/weed/topology/cluster_commands.go
new file mode 100644
index 000000000..53f45ec4d
--- /dev/null
+++ b/weed/topology/cluster_commands.go
@@ -0,0 +1,31 @@
+package topology
+
+import (
+ "github.com/chrislusf/raft"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+type MaxVolumeIdCommand struct {
+ MaxVolumeId storage.VolumeId `json:"maxVolumeId"`
+}
+
+func NewMaxVolumeIdCommand(value storage.VolumeId) *MaxVolumeIdCommand {
+ return &MaxVolumeIdCommand{
+ MaxVolumeId: value,
+ }
+}
+
+func (c *MaxVolumeIdCommand) CommandName() string {
+ return "MaxVolumeId"
+}
+
+func (c *MaxVolumeIdCommand) Apply(server raft.Server) (interface{}, error) {
+ topo := server.Context().(*Topology)
+ before := topo.GetMaxVolumeId()
+ topo.UpAdjustMaxVolumeId(c.MaxVolumeId)
+
+ glog.V(4).Infoln("max volume id", before, "==>", topo.GetMaxVolumeId())
+
+ return nil, nil
+}
diff --git a/weed/topology/collection.go b/weed/topology/collection.go
new file mode 100644
index 000000000..a17f0c961
--- /dev/null
+++ b/weed/topology/collection.go
@@ -0,0 +1,57 @@
+package topology
+
+import (
+ "fmt"
+
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type Collection struct {
+ Name string
+ volumeSizeLimit uint64
+ storageType2VolumeLayout *util.ConcurrentReadMap
+}
+
+func NewCollection(name string, volumeSizeLimit uint64) *Collection {
+ c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit}
+ c.storageType2VolumeLayout = util.NewConcurrentReadMap()
+ return c
+}
+
+func (c *Collection) String() string {
+ return fmt.Sprintf("Name:%s, volumeSizeLimit:%d, storageType2VolumeLayout:%v", c.Name, c.volumeSizeLimit, c.storageType2VolumeLayout)
+}
+
+func (c *Collection) GetOrCreateVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout {
+ keyString := rp.String()
+ if ttl != nil {
+ keyString += ttl.String()
+ }
+ vl := c.storageType2VolumeLayout.Get(keyString, func() interface{} {
+ return NewVolumeLayout(rp, ttl, c.volumeSizeLimit)
+ })
+ return vl.(*VolumeLayout)
+}
+
+func (c *Collection) Lookup(vid storage.VolumeId) []*DataNode {
+ for _, vl := range c.storageType2VolumeLayout.Items() {
+ if vl != nil {
+ if list := vl.(*VolumeLayout).Lookup(vid); list != nil {
+ return list
+ }
+ }
+ }
+ return nil
+}
+
+func (c *Collection) ListVolumeServers() (nodes []*DataNode) {
+ for _, vl := range c.storageType2VolumeLayout.Items() {
+ if vl != nil {
+ if list := vl.(*VolumeLayout).ListVolumeServers(); list != nil {
+ nodes = append(nodes, list...)
+ }
+ }
+ }
+ return
+}
diff --git a/weed/topology/configuration.go b/weed/topology/configuration.go
new file mode 100644
index 000000000..ffcebb59c
--- /dev/null
+++ b/weed/topology/configuration.go
@@ -0,0 +1,65 @@
+package topology
+
+import (
+ "encoding/xml"
+)
+
+type loc struct {
+ dcName string
+ rackName string
+}
+type rack struct {
+ Name string `xml:"name,attr"`
+ Ips []string `xml:"Ip"`
+}
+type dataCenter struct {
+ Name string `xml:"name,attr"`
+ Racks []rack `xml:"Rack"`
+}
+type topology struct {
+ DataCenters []dataCenter `xml:"DataCenter"`
+}
+type Configuration struct {
+ XMLName xml.Name `xml:"Configuration"`
+ Topo topology `xml:"Topology"`
+ ip2location map[string]loc
+}
+
+func NewConfiguration(b []byte) (*Configuration, error) {
+ c := &Configuration{}
+ err := xml.Unmarshal(b, c)
+ c.ip2location = make(map[string]loc)
+ for _, dc := range c.Topo.DataCenters {
+ for _, rack := range dc.Racks {
+ for _, ip := range rack.Ips {
+ c.ip2location[ip] = loc{dcName: dc.Name, rackName: rack.Name}
+ }
+ }
+ }
+ return c, err
+}
+
+func (c *Configuration) String() string {
+ if b, e := xml.MarshalIndent(c, " ", " "); e == nil {
+ return string(b)
+ }
+ return ""
+}
+
+func (c *Configuration) Locate(ip string, dcName string, rackName string) (dc string, rack string) {
+ if c != nil && c.ip2location != nil {
+ if loc, ok := c.ip2location[ip]; ok {
+ return loc.dcName, loc.rackName
+ }
+ }
+
+ if dcName == "" {
+ dcName = "DefaultDataCenter"
+ }
+
+ if rackName == "" {
+ rackName = "DefaultRack"
+ }
+
+ return dcName, rackName
+}
diff --git a/weed/topology/configuration_test.go b/weed/topology/configuration_test.go
new file mode 100644
index 000000000..0a353d16e
--- /dev/null
+++ b/weed/topology/configuration_test.go
@@ -0,0 +1,42 @@
+package topology
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestLoadConfiguration(t *testing.T) {
+
+ confContent := `
+
+<?xml version="1.0" encoding="UTF-8" ?>
+<Configuration>
+ <Topology>
+ <DataCenter name="dc1">
+ <Rack name="rack1">
+ <Ip>192.168.1.1</Ip>
+ </Rack>
+ </DataCenter>
+ <DataCenter name="dc2">
+ <Rack name="rack1">
+ <Ip>192.168.1.2</Ip>
+ </Rack>
+ <Rack name="rack2">
+ <Ip>192.168.1.3</Ip>
+ <Ip>192.168.1.4</Ip>
+ </Rack>
+ </DataCenter>
+ </Topology>
+</Configuration>
+`
+ c, err := NewConfiguration([]byte(confContent))
+
+ fmt.Printf("%s\n", c)
+ if err != nil {
+ t.Fatalf("unmarshal error:%v", err)
+ }
+
+ if len(c.Topo.DataCenters) <= 0 || c.Topo.DataCenters[0].Name != "dc1" {
+ t.Fatalf("unmarshal error:%s", c)
+ }
+}
diff --git a/weed/topology/data_center.go b/weed/topology/data_center.go
new file mode 100644
index 000000000..bcf2dfd31
--- /dev/null
+++ b/weed/topology/data_center.go
@@ -0,0 +1,40 @@
+package topology
+
+type DataCenter struct {
+ NodeImpl
+}
+
+func NewDataCenter(id string) *DataCenter {
+ dc := &DataCenter{}
+ dc.id = NodeId(id)
+ dc.nodeType = "DataCenter"
+ dc.children = make(map[NodeId]Node)
+ dc.NodeImpl.value = dc
+ return dc
+}
+
+func (dc *DataCenter) GetOrCreateRack(rackName string) *Rack {
+ for _, c := range dc.Children() {
+ rack := c.(*Rack)
+ if string(rack.Id()) == rackName {
+ return rack
+ }
+ }
+ rack := NewRack(rackName)
+ dc.LinkChildNode(rack)
+ return rack
+}
+
+func (dc *DataCenter) ToMap() interface{} {
+ m := make(map[string]interface{})
+ m["Id"] = dc.Id()
+ m["Max"] = dc.GetMaxVolumeCount()
+ m["Free"] = dc.FreeSpace()
+ var racks []interface{}
+ for _, c := range dc.Children() {
+ rack := c.(*Rack)
+ racks = append(racks, rack.ToMap())
+ }
+ m["Racks"] = racks
+ return m
+}
diff --git a/weed/topology/data_node.go b/weed/topology/data_node.go
new file mode 100644
index 000000000..1404d4aa8
--- /dev/null
+++ b/weed/topology/data_node.go
@@ -0,0 +1,115 @@
+package topology
+
+import (
+ "fmt"
+ "strconv"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+type DataNode struct {
+ NodeImpl
+ volumes map[storage.VolumeId]storage.VolumeInfo
+ Ip string
+ Port int
+ PublicUrl string
+ LastSeen int64 // unix time in seconds
+ Dead bool
+}
+
+func NewDataNode(id string) *DataNode {
+ s := &DataNode{}
+ s.id = NodeId(id)
+ s.nodeType = "DataNode"
+ s.volumes = make(map[storage.VolumeId]storage.VolumeInfo)
+ s.NodeImpl.value = s
+ return s
+}
+
+func (dn *DataNode) String() string {
+ dn.RLock()
+ defer dn.RUnlock()
+ return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s, Dead:%v", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl, dn.Dead)
+}
+
+func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) {
+ dn.Lock()
+ defer dn.Unlock()
+ if _, ok := dn.volumes[v.Id]; !ok {
+ dn.volumes[v.Id] = v
+ dn.UpAdjustVolumeCountDelta(1)
+ if !v.ReadOnly {
+ dn.UpAdjustActiveVolumeCountDelta(1)
+ }
+ dn.UpAdjustMaxVolumeId(v.Id)
+ } else {
+ dn.volumes[v.Id] = v
+ }
+}
+
+func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (deletedVolumes []storage.VolumeInfo) {
+ actualVolumeMap := make(map[storage.VolumeId]storage.VolumeInfo)
+ for _, v := range actualVolumes {
+ actualVolumeMap[v.Id] = v
+ }
+ dn.RLock()
+ for vid, v := range dn.volumes {
+ if _, ok := actualVolumeMap[vid]; !ok {
+ glog.V(0).Infoln("Deleting volume id:", vid)
+ delete(dn.volumes, vid)
+ deletedVolumes = append(deletedVolumes, v)
+ dn.UpAdjustVolumeCountDelta(-1)
+ dn.UpAdjustActiveVolumeCountDelta(-1)
+ }
+ } //TODO: adjust max volume id, if need to reclaim volume ids
+ dn.RUnlock()
+ for _, v := range actualVolumes {
+ dn.AddOrUpdateVolume(v)
+ }
+ return
+}
+
+func (dn *DataNode) GetVolumes() (ret []storage.VolumeInfo) {
+ dn.RLock()
+ for _, v := range dn.volumes {
+ ret = append(ret, v)
+ }
+ dn.RUnlock()
+ return ret
+}
+
+func (dn *DataNode) GetDataCenter() *DataCenter {
+ return dn.Parent().Parent().(*NodeImpl).value.(*DataCenter)
+}
+
+func (dn *DataNode) GetRack() *Rack {
+ return dn.Parent().(*NodeImpl).value.(*Rack)
+}
+
+func (dn *DataNode) GetTopology() *Topology {
+ p := dn.Parent()
+ for p.Parent() != nil {
+ p = p.Parent()
+ }
+ t := p.(*Topology)
+ return t
+}
+
+func (dn *DataNode) MatchLocation(ip string, port int) bool {
+ return dn.Ip == ip && dn.Port == port
+}
+
+func (dn *DataNode) Url() string {
+ return dn.Ip + ":" + strconv.Itoa(dn.Port)
+}
+
+func (dn *DataNode) ToMap() interface{} {
+ ret := make(map[string]interface{})
+ ret["Url"] = dn.Url()
+ ret["Volumes"] = dn.GetVolumeCount()
+ ret["Max"] = dn.GetMaxVolumeCount()
+ ret["Free"] = dn.FreeSpace()
+ ret["PublicUrl"] = dn.PublicUrl
+ return ret
+}
diff --git a/weed/topology/node.go b/weed/topology/node.go
new file mode 100644
index 000000000..4ce35f4b0
--- /dev/null
+++ b/weed/topology/node.go
@@ -0,0 +1,272 @@
+package topology
+
+import (
+ "errors"
+ "math/rand"
+ "strings"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+type NodeId string
+type Node interface {
+ Id() NodeId
+ String() string
+ FreeSpace() int
+ ReserveOneVolume(r int) (*DataNode, error)
+ UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int)
+ UpAdjustVolumeCountDelta(volumeCountDelta int)
+ UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int)
+ UpAdjustMaxVolumeId(vid storage.VolumeId)
+
+ GetVolumeCount() int
+ GetActiveVolumeCount() int
+ GetMaxVolumeCount() int
+ GetMaxVolumeId() storage.VolumeId
+ SetParent(Node)
+ LinkChildNode(node Node)
+ UnlinkChildNode(nodeId NodeId)
+ CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64)
+
+ IsDataNode() bool
+ IsRack() bool
+ IsDataCenter() bool
+ Children() []Node
+ Parent() Node
+
+ GetValue() interface{} //get reference to the topology,dc,rack,datanode
+}
+type NodeImpl struct {
+ id NodeId
+ volumeCount int
+ activeVolumeCount int
+ maxVolumeCount int
+ parent Node
+ sync.RWMutex // lock children
+ children map[NodeId]Node
+ maxVolumeId storage.VolumeId
+
+ //for rack, data center, topology
+ nodeType string
+ value interface{}
+}
+
+// the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot
+func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) {
+ candidates := make([]Node, 0, len(n.children))
+ var errs []string
+ n.RLock()
+ for _, node := range n.children {
+ if err := filterFirstNodeFn(node); err == nil {
+ candidates = append(candidates, node)
+ } else {
+ errs = append(errs, string(node.Id())+":"+err.Error())
+ }
+ }
+ n.RUnlock()
+ if len(candidates) == 0 {
+ return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n"))
+ }
+ firstNode = candidates[rand.Intn(len(candidates))]
+ glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id())
+
+ restNodes = make([]Node, numberOfNodes-1)
+ candidates = candidates[:0]
+ n.RLock()
+ for _, node := range n.children {
+ if node.Id() == firstNode.Id() {
+ continue
+ }
+ if node.FreeSpace() <= 0 {
+ continue
+ }
+ glog.V(2).Infoln("select rest node candidate:", node.Id())
+ candidates = append(candidates, node)
+ }
+ n.RUnlock()
+ glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates")
+ ret := len(restNodes) == 0
+ for k, node := range candidates {
+ if k < len(restNodes) {
+ restNodes[k] = node
+ if k == len(restNodes)-1 {
+ ret = true
+ }
+ } else {
+ r := rand.Intn(k + 1)
+ if r < len(restNodes) {
+ restNodes[r] = node
+ }
+ }
+ }
+ if !ret {
+ glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates")
+ err = errors.New("Not enough data node found!")
+ }
+ return
+}
+
+func (n *NodeImpl) IsDataNode() bool {
+ return n.nodeType == "DataNode"
+}
+func (n *NodeImpl) IsRack() bool {
+ return n.nodeType == "Rack"
+}
+func (n *NodeImpl) IsDataCenter() bool {
+ return n.nodeType == "DataCenter"
+}
+func (n *NodeImpl) String() string {
+ if n.parent != nil {
+ return n.parent.String() + ":" + string(n.id)
+ }
+ return string(n.id)
+}
+func (n *NodeImpl) Id() NodeId {
+ return n.id
+}
+func (n *NodeImpl) FreeSpace() int {
+ return n.maxVolumeCount - n.volumeCount
+}
+func (n *NodeImpl) SetParent(node Node) {
+ n.parent = node
+}
+func (n *NodeImpl) Children() (ret []Node) {
+ n.RLock()
+ defer n.RUnlock()
+ for _, c := range n.children {
+ ret = append(ret, c)
+ }
+ return ret
+}
+func (n *NodeImpl) Parent() Node {
+ return n.parent
+}
+func (n *NodeImpl) GetValue() interface{} {
+ return n.value
+}
+func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) {
+ n.RLock()
+ defer n.RUnlock()
+ for _, node := range n.children {
+ freeSpace := node.FreeSpace()
+ // fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace)
+ if freeSpace <= 0 {
+ continue
+ }
+ if r >= freeSpace {
+ r -= freeSpace
+ } else {
+ if node.IsDataNode() && node.FreeSpace() > 0 {
+ // fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace())
+ return node.(*DataNode), nil
+ }
+ assignedNode, err = node.ReserveOneVolume(r)
+ if err != nil {
+ return
+ }
+ }
+ }
+ return
+}
+
+func (n *NodeImpl) UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) { //can be negative
+ n.maxVolumeCount += maxVolumeCountDelta
+ if n.parent != nil {
+ n.parent.UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta)
+ }
+}
+func (n *NodeImpl) UpAdjustVolumeCountDelta(volumeCountDelta int) { //can be negative
+ n.volumeCount += volumeCountDelta
+ if n.parent != nil {
+ n.parent.UpAdjustVolumeCountDelta(volumeCountDelta)
+ }
+}
+func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) { //can be negative
+ n.activeVolumeCount += activeVolumeCountDelta
+ if n.parent != nil {
+ n.parent.UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta)
+ }
+}
+func (n *NodeImpl) UpAdjustMaxVolumeId(vid storage.VolumeId) { //can be negative
+ if n.maxVolumeId < vid {
+ n.maxVolumeId = vid
+ if n.parent != nil {
+ n.parent.UpAdjustMaxVolumeId(vid)
+ }
+ }
+}
+func (n *NodeImpl) GetMaxVolumeId() storage.VolumeId {
+ return n.maxVolumeId
+}
+func (n *NodeImpl) GetVolumeCount() int {
+ return n.volumeCount
+}
+func (n *NodeImpl) GetActiveVolumeCount() int {
+ return n.activeVolumeCount
+}
+func (n *NodeImpl) GetMaxVolumeCount() int {
+ return n.maxVolumeCount
+}
+
+func (n *NodeImpl) LinkChildNode(node Node) {
+ n.Lock()
+ defer n.Unlock()
+ if n.children[node.Id()] == nil {
+ n.children[node.Id()] = node
+ n.UpAdjustMaxVolumeCountDelta(node.GetMaxVolumeCount())
+ n.UpAdjustMaxVolumeId(node.GetMaxVolumeId())
+ n.UpAdjustVolumeCountDelta(node.GetVolumeCount())
+ n.UpAdjustActiveVolumeCountDelta(node.GetActiveVolumeCount())
+ node.SetParent(n)
+ glog.V(0).Infoln(n, "adds child", node.Id())
+ }
+}
+
+func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) {
+ n.Lock()
+ defer n.Unlock()
+ node := n.children[nodeId]
+ if node != nil {
+ node.SetParent(nil)
+ delete(n.children, node.Id())
+ n.UpAdjustVolumeCountDelta(-node.GetVolumeCount())
+ n.UpAdjustActiveVolumeCountDelta(-node.GetActiveVolumeCount())
+ n.UpAdjustMaxVolumeCountDelta(-node.GetMaxVolumeCount())
+ glog.V(0).Infoln(n, "removes", node, "volumeCount =", n.activeVolumeCount)
+ }
+}
+
+func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64) {
+ if n.IsRack() {
+ for _, c := range n.Children() {
+ dn := c.(*DataNode) //can not cast n to DataNode
+ if dn.LastSeen < freshThreshHold {
+ if !dn.Dead {
+ dn.Dead = true
+ n.GetTopology().chanDeadDataNodes <- dn
+ }
+ }
+ for _, v := range dn.GetVolumes() {
+ if uint64(v.Size) >= volumeSizeLimit {
+ //fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit)
+ n.GetTopology().chanFullVolumes <- v
+ }
+ }
+ }
+ } else {
+ for _, c := range n.Children() {
+ c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit)
+ }
+ }
+}
+
+func (n *NodeImpl) GetTopology() *Topology {
+ var p Node
+ p = n
+ for p.Parent() != nil {
+ p = p.Parent()
+ }
+ return p.GetValue().(*Topology)
+}
diff --git a/weed/topology/rack.go b/weed/topology/rack.go
new file mode 100644
index 000000000..1ca2f8de8
--- /dev/null
+++ b/weed/topology/rack.go
@@ -0,0 +1,65 @@
+package topology
+
+import (
+ "strconv"
+ "time"
+)
+
+type Rack struct {
+ NodeImpl
+}
+
+func NewRack(id string) *Rack {
+ r := &Rack{}
+ r.id = NodeId(id)
+ r.nodeType = "Rack"
+ r.children = make(map[NodeId]Node)
+ r.NodeImpl.value = r
+ return r
+}
+
+func (r *Rack) FindDataNode(ip string, port int) *DataNode {
+ for _, c := range r.Children() {
+ dn := c.(*DataNode)
+ if dn.MatchLocation(ip, port) {
+ return dn
+ }
+ }
+ return nil
+}
+func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCount int) *DataNode {
+ for _, c := range r.Children() {
+ dn := c.(*DataNode)
+ if dn.MatchLocation(ip, port) {
+ dn.LastSeen = time.Now().Unix()
+ if dn.Dead {
+ dn.Dead = false
+ r.GetTopology().chanRecoveredDataNodes <- dn
+ dn.UpAdjustMaxVolumeCountDelta(maxVolumeCount - dn.maxVolumeCount)
+ }
+ return dn
+ }
+ }
+ dn := NewDataNode(ip + ":" + strconv.Itoa(port))
+ dn.Ip = ip
+ dn.Port = port
+ dn.PublicUrl = publicUrl
+ dn.maxVolumeCount = maxVolumeCount
+ dn.LastSeen = time.Now().Unix()
+ r.LinkChildNode(dn)
+ return dn
+}
+
+func (r *Rack) ToMap() interface{} {
+ m := make(map[string]interface{})
+ m["Id"] = r.Id()
+ m["Max"] = r.GetMaxVolumeCount()
+ m["Free"] = r.FreeSpace()
+ var dns []interface{}
+ for _, c := range r.Children() {
+ dn := c.(*DataNode)
+ dns = append(dns, dn.ToMap())
+ }
+ m["DataNodes"] = dns
+ return m
+}
diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go
new file mode 100644
index 000000000..be5777167
--- /dev/null
+++ b/weed/topology/store_replicate.go
@@ -0,0 +1,150 @@
+package topology
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "net/http"
+ "strconv"
+ "strings"
+
+ "net/url"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/security"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+func ReplicatedWrite(masterNode string, s *storage.Store,
+ volumeId storage.VolumeId, needle *storage.Needle,
+ r *http.Request) (size uint32, errorStatus string) {
+
+ //check JWT
+ jwt := security.GetJwt(r)
+
+ ret, err := s.Write(volumeId, needle)
+ needToReplicate := !s.HasVolume(volumeId)
+ if err != nil {
+ errorStatus = "Failed to write to local disk (" + err.Error() + ")"
+ } else if ret > 0 {
+ needToReplicate = needToReplicate || s.GetVolume(volumeId).NeedToReplicate()
+ } else {
+ errorStatus = "Failed to write to local disk"
+ }
+ if !needToReplicate && ret > 0 {
+ needToReplicate = s.GetVolume(volumeId).NeedToReplicate()
+ }
+ if needToReplicate { //send to other replica locations
+ if r.FormValue("type") != "replicate" {
+
+ if err = distributedOperation(masterNode, s, volumeId, func(location operation.Location) error {
+ u := url.URL{
+ Scheme: "http",
+ Host: location.Url,
+ Path: r.URL.Path,
+ }
+ q := url.Values{
+ "type": {"replicate"},
+ }
+ if needle.LastModified > 0 {
+ q.Set("ts", strconv.FormatUint(needle.LastModified, 10))
+ }
+ if needle.IsChunkedManifest() {
+ q.Set("cm", "true")
+ }
+ u.RawQuery = q.Encode()
+ _, err := operation.Upload(u.String(),
+ string(needle.Name), bytes.NewReader(needle.Data), needle.IsGzipped(), string(needle.Mime),
+ jwt)
+ return err
+ }); err != nil {
+ ret = 0
+ errorStatus = fmt.Sprintf("Failed to write to replicas for volume %d: %v", volumeId, err)
+ }
+ }
+ }
+ size = ret
+ return
+}
+
+func ReplicatedDelete(masterNode string, store *storage.Store,
+ volumeId storage.VolumeId, n *storage.Needle,
+ r *http.Request) (uint32, error) {
+
+ //check JWT
+ jwt := security.GetJwt(r)
+
+ ret, err := store.Delete(volumeId, n)
+ if err != nil {
+ glog.V(0).Infoln("delete error:", err)
+ return ret, err
+ }
+
+ needToReplicate := !store.HasVolume(volumeId)
+ if !needToReplicate && ret > 0 {
+ needToReplicate = store.GetVolume(volumeId).NeedToReplicate()
+ }
+ if needToReplicate { //send to other replica locations
+ if r.FormValue("type") != "replicate" {
+ if err = distributedOperation(masterNode, store, volumeId, func(location operation.Location) error {
+ return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", jwt)
+ }); err != nil {
+ ret = 0
+ }
+ }
+ }
+ return ret, err
+}
+
+type DistributedOperationResult map[string]error
+
+func (dr DistributedOperationResult) Error() error {
+ var errs []string
+ for k, v := range dr {
+ if v != nil {
+ errs = append(errs, fmt.Sprintf("[%s]: %v", k, v))
+ }
+ }
+ if len(errs) == 0 {
+ return nil
+ }
+ return errors.New(strings.Join(errs, "\n"))
+}
+
+type RemoteResult struct {
+ Host string
+ Error error
+}
+
+func distributedOperation(masterNode string, store *storage.Store, volumeId storage.VolumeId, op func(location operation.Location) error) error {
+ if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil {
+ length := 0
+ selfUrl := (store.Ip + ":" + strconv.Itoa(store.Port))
+ results := make(chan RemoteResult)
+ for _, location := range lookupResult.Locations {
+ if location.Url != selfUrl {
+ length++
+ go func(location operation.Location, results chan RemoteResult) {
+ results <- RemoteResult{location.Url, op(location)}
+ }(location, results)
+ }
+ }
+ ret := DistributedOperationResult(make(map[string]error))
+ for i := 0; i < length; i++ {
+ result := <-results
+ ret[result.Host] = result.Error
+ }
+ if volume := store.GetVolume(volumeId); volume != nil {
+ if length+1 < volume.ReplicaPlacement.GetCopyCount() {
+ return fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount())
+ }
+ }
+ return ret.Error()
+ } else {
+ glog.V(0).Infoln()
+ return fmt.Errorf("Failed to lookup for %d: %v", volumeId, lookupErr)
+ }
+ return nil
+}
diff --git a/weed/topology/topo_test.go b/weed/topology/topo_test.go
new file mode 100644
index 000000000..9a0dbc6b8
--- /dev/null
+++ b/weed/topology/topo_test.go
@@ -0,0 +1,17 @@
+package topology
+
+import (
+ "testing"
+)
+
+func TestRemoveDataCenter(t *testing.T) {
+ topo := setup(topologyLayout)
+ topo.UnlinkChildNode(NodeId("dc2"))
+ if topo.GetActiveVolumeCount() != 15 {
+ t.Fail()
+ }
+ topo.UnlinkChildNode(NodeId("dc3"))
+ if topo.GetActiveVolumeCount() != 12 {
+ t.Fail()
+ }
+}
diff --git a/weed/topology/topology.go b/weed/topology/topology.go
new file mode 100644
index 000000000..04b500053
--- /dev/null
+++ b/weed/topology/topology.go
@@ -0,0 +1,189 @@
+package topology
+
+import (
+ "errors"
+ "io/ioutil"
+ "math/rand"
+
+ "github.com/chrislusf/raft"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/operation"
+ "github.com/chrislusf/seaweedfs/weed/sequence"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type Topology struct {
+ NodeImpl
+
+ collectionMap *util.ConcurrentReadMap
+
+ pulse int64
+
+ volumeSizeLimit uint64
+
+ Sequence sequence.Sequencer
+
+ chanDeadDataNodes chan *DataNode
+ chanRecoveredDataNodes chan *DataNode
+ chanFullVolumes chan storage.VolumeInfo
+
+ configuration *Configuration
+
+ RaftServer raft.Server
+}
+
+func NewTopology(id string, confFile string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) {
+ t := &Topology{}
+ t.id = NodeId(id)
+ t.nodeType = "Topology"
+ t.NodeImpl.value = t
+ t.children = make(map[NodeId]Node)
+ t.collectionMap = util.NewConcurrentReadMap()
+ t.pulse = int64(pulse)
+ t.volumeSizeLimit = volumeSizeLimit
+
+ t.Sequence = seq
+
+ t.chanDeadDataNodes = make(chan *DataNode)
+ t.chanRecoveredDataNodes = make(chan *DataNode)
+ t.chanFullVolumes = make(chan storage.VolumeInfo)
+
+ err := t.loadConfiguration(confFile)
+
+ return t, err
+}
+
+func (t *Topology) IsLeader() bool {
+ if leader, e := t.Leader(); e == nil {
+ return leader == t.RaftServer.Name()
+ }
+ return false
+}
+
+func (t *Topology) Leader() (string, error) {
+ l := ""
+ if t.RaftServer != nil {
+ l = t.RaftServer.Leader()
+ } else {
+ return "", errors.New("Raft Server not ready yet!")
+ }
+
+ if l == "" {
+ // We are a single node cluster, we are the leader
+ return t.RaftServer.Name(), errors.New("Raft Server not initialized!")
+ }
+
+ return l, nil
+}
+
+func (t *Topology) loadConfiguration(configurationFile string) error {
+ b, e := ioutil.ReadFile(configurationFile)
+ if e == nil {
+ t.configuration, e = NewConfiguration(b)
+ return e
+ }
+ glog.V(0).Infoln("Using default configurations.")
+ return nil
+}
+
+func (t *Topology) Lookup(collection string, vid storage.VolumeId) []*DataNode {
+ //maybe an issue if lots of collections?
+ if collection == "" {
+ for _, c := range t.collectionMap.Items() {
+ if list := c.(*Collection).Lookup(vid); list != nil {
+ return list
+ }
+ }
+ } else {
+ if c, ok := t.collectionMap.Find(collection); ok {
+ return c.(*Collection).Lookup(vid)
+ }
+ }
+ return nil
+}
+
+func (t *Topology) NextVolumeId() storage.VolumeId {
+ vid := t.GetMaxVolumeId()
+ next := vid.Next()
+ go t.RaftServer.Do(NewMaxVolumeIdCommand(next))
+ return next
+}
+
+func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool {
+ vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl)
+ return vl.GetActiveVolumeCount(option) > 0
+}
+
+func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) {
+ vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option)
+ if err != nil || datanodes.Length() == 0 {
+ return "", 0, nil, errors.New("No writable volumes available!")
+ }
+ fileId, count := t.Sequence.NextFileId(count)
+ return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil
+}
+
+func (t *Topology) GetVolumeLayout(collectionName string, rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout {
+ return t.collectionMap.Get(collectionName, func() interface{} {
+ return NewCollection(collectionName, t.volumeSizeLimit)
+ }).(*Collection).GetOrCreateVolumeLayout(rp, ttl)
+}
+
+func (t *Topology) FindCollection(collectionName string) (*Collection, bool) {
+ c, hasCollection := t.collectionMap.Find(collectionName)
+ return c.(*Collection), hasCollection
+}
+
+func (t *Topology) DeleteCollection(collectionName string) {
+ t.collectionMap.Delete(collectionName)
+}
+
+func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
+ t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).RegisterVolume(&v, dn)
+}
+func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
+ glog.Infof("removing volume info:%+v", v)
+ t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).UnRegisterVolume(&v, dn)
+}
+
+func (t *Topology) ProcessJoinMessage(joinMessage *operation.JoinMessage) {
+ t.Sequence.SetMax(*joinMessage.MaxFileKey)
+ dcName, rackName := t.configuration.Locate(*joinMessage.Ip, *joinMessage.DataCenter, *joinMessage.Rack)
+ dc := t.GetOrCreateDataCenter(dcName)
+ rack := dc.GetOrCreateRack(rackName)
+ dn := rack.FindDataNode(*joinMessage.Ip, int(*joinMessage.Port))
+ if *joinMessage.IsInit && dn != nil {
+ t.UnRegisterDataNode(dn)
+ }
+ dn = rack.GetOrCreateDataNode(*joinMessage.Ip,
+ int(*joinMessage.Port), *joinMessage.PublicUrl,
+ int(*joinMessage.MaxVolumeCount))
+ var volumeInfos []storage.VolumeInfo
+ for _, v := range joinMessage.Volumes {
+ if vi, err := storage.NewVolumeInfo(v); err == nil {
+ volumeInfos = append(volumeInfos, vi)
+ } else {
+ glog.V(0).Infoln("Fail to convert joined volume information:", err.Error())
+ }
+ }
+ deletedVolumes := dn.UpdateVolumes(volumeInfos)
+ for _, v := range volumeInfos {
+ t.RegisterVolumeLayout(v, dn)
+ }
+ for _, v := range deletedVolumes {
+ t.UnRegisterVolumeLayout(v, dn)
+ }
+}
+
+func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter {
+ for _, c := range t.Children() {
+ dc := c.(*DataCenter)
+ if string(dc.Id()) == dcName {
+ return dc
+ }
+ }
+ dc := NewDataCenter(dcName)
+ t.LinkChildNode(dc)
+ return dc
+}
diff --git a/weed/topology/topology_event_handling.go b/weed/topology/topology_event_handling.go
new file mode 100644
index 000000000..737b94482
--- /dev/null
+++ b/weed/topology/topology_event_handling.go
@@ -0,0 +1,74 @@
+package topology
+
+import (
+ "math/rand"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) {
+ go func() {
+ for {
+ if t.IsLeader() {
+ freshThreshHold := time.Now().Unix() - 3*t.pulse //3 times of sleep interval
+ t.CollectDeadNodeAndFullVolumes(freshThreshHold, t.volumeSizeLimit)
+ }
+ time.Sleep(time.Duration(float32(t.pulse*1e3)*(1+rand.Float32())) * time.Millisecond)
+ }
+ }()
+ go func(garbageThreshold string) {
+ c := time.Tick(15 * time.Minute)
+ for _ = range c {
+ if t.IsLeader() {
+ t.Vacuum(garbageThreshold)
+ }
+ }
+ }(garbageThreshold)
+ go func() {
+ for {
+ select {
+ case v := <-t.chanFullVolumes:
+ t.SetVolumeCapacityFull(v)
+ case dn := <-t.chanRecoveredDataNodes:
+ t.RegisterRecoveredDataNode(dn)
+ glog.V(0).Infoln("DataNode", dn, "is back alive!")
+ case dn := <-t.chanDeadDataNodes:
+ t.UnRegisterDataNode(dn)
+ glog.V(0).Infoln("DataNode", dn, "is dead!")
+ }
+ }
+ }()
+}
+func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool {
+ vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl)
+ if !vl.SetVolumeCapacityFull(volumeInfo.Id) {
+ return false
+ }
+ for _, dn := range vl.vid2location[volumeInfo.Id].list {
+ if !volumeInfo.ReadOnly {
+ dn.UpAdjustActiveVolumeCountDelta(-1)
+ }
+ }
+ return true
+}
+func (t *Topology) UnRegisterDataNode(dn *DataNode) {
+ for _, v := range dn.GetVolumes() {
+ glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn)
+ vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl)
+ vl.SetVolumeUnavailable(dn, v.Id)
+ }
+ dn.UpAdjustVolumeCountDelta(-dn.GetVolumeCount())
+ dn.UpAdjustActiveVolumeCountDelta(-dn.GetActiveVolumeCount())
+ dn.UpAdjustMaxVolumeCountDelta(-dn.GetMaxVolumeCount())
+ dn.Parent().UnlinkChildNode(dn.Id())
+}
+func (t *Topology) RegisterRecoveredDataNode(dn *DataNode) {
+ for _, v := range dn.GetVolumes() {
+ vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl)
+ if vl.isWritable(&v) {
+ vl.SetVolumeAvailable(dn, v.Id)
+ }
+ }
+}
diff --git a/weed/topology/topology_map.go b/weed/topology/topology_map.go
new file mode 100644
index 000000000..ce8e9e663
--- /dev/null
+++ b/weed/topology/topology_map.go
@@ -0,0 +1,53 @@
+package topology
+
+func (t *Topology) ToMap() interface{} {
+ m := make(map[string]interface{})
+ m["Max"] = t.GetMaxVolumeCount()
+ m["Free"] = t.FreeSpace()
+ var dcs []interface{}
+ for _, c := range t.Children() {
+ dc := c.(*DataCenter)
+ dcs = append(dcs, dc.ToMap())
+ }
+ m["DataCenters"] = dcs
+ var layouts []interface{}
+ for _, col := range t.collectionMap.Items() {
+ c := col.(*Collection)
+ for _, layout := range c.storageType2VolumeLayout.Items() {
+ if layout != nil {
+ tmp := layout.(*VolumeLayout).ToMap()
+ tmp["collection"] = c.Name
+ layouts = append(layouts, tmp)
+ }
+ }
+ }
+ m["layouts"] = layouts
+ return m
+}
+
+func (t *Topology) ToVolumeMap() interface{} {
+ m := make(map[string]interface{})
+ m["Max"] = t.GetMaxVolumeCount()
+ m["Free"] = t.FreeSpace()
+ dcs := make(map[NodeId]interface{})
+ for _, c := range t.Children() {
+ dc := c.(*DataCenter)
+ racks := make(map[NodeId]interface{})
+ for _, r := range dc.Children() {
+ rack := r.(*Rack)
+ dataNodes := make(map[NodeId]interface{})
+ for _, d := range rack.Children() {
+ dn := d.(*DataNode)
+ var volumes []interface{}
+ for _, v := range dn.GetVolumes() {
+ volumes = append(volumes, v)
+ }
+ dataNodes[d.Id()] = volumes
+ }
+ racks[r.Id()] = dataNodes
+ }
+ dcs[dc.Id()] = racks
+ }
+ m["DataCenters"] = dcs
+ return m
+}
diff --git a/weed/topology/topology_vacuum.go b/weed/topology/topology_vacuum.go
new file mode 100644
index 000000000..8cf8dfbeb
--- /dev/null
+++ b/weed/topology/topology_vacuum.go
@@ -0,0 +1,158 @@
+package topology
+
+import (
+ "encoding/json"
+ "errors"
+ "net/url"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+func batchVacuumVolumeCheck(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList, garbageThreshold string) bool {
+ ch := make(chan bool, locationlist.Length())
+ for index, dn := range locationlist.list {
+ go func(index int, url string, vid storage.VolumeId) {
+ //glog.V(0).Infoln(index, "Check vacuuming", vid, "on", dn.Url())
+ if e, ret := vacuumVolume_Check(url, vid, garbageThreshold); e != nil {
+ //glog.V(0).Infoln(index, "Error when checking vacuuming", vid, "on", url, e)
+ ch <- false
+ } else {
+ //glog.V(0).Infoln(index, "Checked vacuuming", vid, "on", url, "needVacuum", ret)
+ ch <- ret
+ }
+ }(index, dn.Url(), vid)
+ }
+ isCheckSuccess := true
+ for _ = range locationlist.list {
+ select {
+ case canVacuum := <-ch:
+ isCheckSuccess = isCheckSuccess && canVacuum
+ case <-time.After(30 * time.Minute):
+ isCheckSuccess = false
+ break
+ }
+ }
+ return isCheckSuccess
+}
+func batchVacuumVolumeCompact(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) bool {
+ vl.removeFromWritable(vid)
+ ch := make(chan bool, locationlist.Length())
+ for index, dn := range locationlist.list {
+ go func(index int, url string, vid storage.VolumeId) {
+ glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url)
+ if e := vacuumVolume_Compact(url, vid); e != nil {
+ glog.V(0).Infoln(index, "Error when vacuuming", vid, "on", url, e)
+ ch <- false
+ } else {
+ glog.V(0).Infoln(index, "Complete vacuuming", vid, "on", url)
+ ch <- true
+ }
+ }(index, dn.Url(), vid)
+ }
+ isVacuumSuccess := true
+ for _ = range locationlist.list {
+ select {
+ case _ = <-ch:
+ case <-time.After(30 * time.Minute):
+ isVacuumSuccess = false
+ break
+ }
+ }
+ return isVacuumSuccess
+}
+func batchVacuumVolumeCommit(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) bool {
+ isCommitSuccess := true
+ for _, dn := range locationlist.list {
+ glog.V(0).Infoln("Start Commiting vacuum", vid, "on", dn.Url())
+ if e := vacuumVolume_Commit(dn.Url(), vid); e != nil {
+ glog.V(0).Infoln("Error when committing vacuum", vid, "on", dn.Url(), e)
+ isCommitSuccess = false
+ } else {
+ glog.V(0).Infoln("Complete Commiting vacuum", vid, "on", dn.Url())
+ }
+ if isCommitSuccess {
+ vl.SetVolumeAvailable(dn, vid)
+ }
+ }
+ return isCommitSuccess
+}
+func (t *Topology) Vacuum(garbageThreshold string) int {
+ glog.V(0).Infoln("Start vacuum on demand")
+ for _, col := range t.collectionMap.Items() {
+ c := col.(*Collection)
+ glog.V(0).Infoln("vacuum on collection:", c.Name)
+ for _, vl := range c.storageType2VolumeLayout.Items() {
+ if vl != nil {
+ volumeLayout := vl.(*VolumeLayout)
+ for vid, locationlist := range volumeLayout.vid2location {
+ glog.V(0).Infoln("vacuum on collection:", c.Name, "volume", vid)
+ if batchVacuumVolumeCheck(volumeLayout, vid, locationlist, garbageThreshold) {
+ if batchVacuumVolumeCompact(volumeLayout, vid, locationlist) {
+ batchVacuumVolumeCommit(volumeLayout, vid, locationlist)
+ }
+ }
+ }
+ }
+ }
+ }
+ return 0
+}
+
+type VacuumVolumeResult struct {
+ Result bool
+ Error string
+}
+
+func vacuumVolume_Check(urlLocation string, vid storage.VolumeId, garbageThreshold string) (error, bool) {
+ values := make(url.Values)
+ values.Add("volume", vid.String())
+ values.Add("garbageThreshold", garbageThreshold)
+ jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/check", values)
+ if err != nil {
+ glog.V(0).Infoln("parameters:", values)
+ return err, false
+ }
+ var ret VacuumVolumeResult
+ if err := json.Unmarshal(jsonBlob, &ret); err != nil {
+ return err, false
+ }
+ if ret.Error != "" {
+ return errors.New(ret.Error), false
+ }
+ return nil, ret.Result
+}
+func vacuumVolume_Compact(urlLocation string, vid storage.VolumeId) error {
+ values := make(url.Values)
+ values.Add("volume", vid.String())
+ jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/compact", values)
+ if err != nil {
+ return err
+ }
+ var ret VacuumVolumeResult
+ if err := json.Unmarshal(jsonBlob, &ret); err != nil {
+ return err
+ }
+ if ret.Error != "" {
+ return errors.New(ret.Error)
+ }
+ return nil
+}
+func vacuumVolume_Commit(urlLocation string, vid storage.VolumeId) error {
+ values := make(url.Values)
+ values.Add("volume", vid.String())
+ jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/commit", values)
+ if err != nil {
+ return err
+ }
+ var ret VacuumVolumeResult
+ if err := json.Unmarshal(jsonBlob, &ret); err != nil {
+ return err
+ }
+ if ret.Error != "" {
+ return errors.New(ret.Error)
+ }
+ return nil
+}
diff --git a/weed/topology/volume_growth.go b/weed/topology/volume_growth.go
new file mode 100644
index 000000000..3a1c9c567
--- /dev/null
+++ b/weed/topology/volume_growth.go
@@ -0,0 +1,211 @@
+package topology
+
+import (
+ "fmt"
+ "math/rand"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+/*
+This package is created to resolve these replica placement issues:
+1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies
+2. in time of tight storage, how to reduce replica level
+3. optimizing for hot data on faster disk, cold data on cheaper storage,
+4. volume allocation for each bucket
+*/
+
+type VolumeGrowOption struct {
+ Collection string
+ ReplicaPlacement *storage.ReplicaPlacement
+ Ttl *storage.TTL
+ DataCenter string
+ Rack string
+ DataNode string
+}
+
+type VolumeGrowth struct {
+ accessLock sync.Mutex
+}
+
+func (o *VolumeGrowOption) String() string {
+ return fmt.Sprintf("Collection:%s, ReplicaPlacement:%v, Ttl:%v, DataCenter:%s, Rack:%s, DataNode:%s", o.Collection, o.ReplicaPlacement, o.Ttl, o.DataCenter, o.Rack, o.DataNode)
+}
+
+func NewDefaultVolumeGrowth() *VolumeGrowth {
+ return &VolumeGrowth{}
+}
+
+// one replication type may need rp.GetCopyCount() actual volumes
+// given copyCount, how many logical volumes to create
+func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) {
+ switch copyCount {
+ case 1:
+ count = 7
+ case 2:
+ count = 6
+ case 3:
+ count = 3
+ default:
+ count = 1
+ }
+ return
+}
+
+func (vg *VolumeGrowth) AutomaticGrowByType(option *VolumeGrowOption, topo *Topology) (count int, err error) {
+ count, err = vg.GrowByCountAndType(vg.findVolumeCount(option.ReplicaPlacement.GetCopyCount()), option, topo)
+ if count > 0 && count%option.ReplicaPlacement.GetCopyCount() == 0 {
+ return count, nil
+ }
+ return count, err
+}
+func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOption, topo *Topology) (counter int, err error) {
+ vg.accessLock.Lock()
+ defer vg.accessLock.Unlock()
+
+ for i := 0; i < targetCount; i++ {
+ if c, e := vg.findAndGrow(topo, option); e == nil {
+ counter += c
+ } else {
+ return counter, e
+ }
+ }
+ return
+}
+
+func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) {
+ servers, e := vg.findEmptySlotsForOneVolume(topo, option)
+ if e != nil {
+ return 0, e
+ }
+ vid := topo.NextVolumeId()
+ err := vg.grow(topo, vid, option, servers...)
+ return len(servers), err
+}
+
+// 1. find the main data node
+// 1.1 collect all data nodes that have 1 slots
+// 2.2 collect all racks that have rp.SameRackCount+1
+// 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1
+// 2. find rest data nodes
+func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) {
+ //find main datacenter and other data centers
+ rp := option.ReplicaPlacement
+ mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error {
+ if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) {
+ return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter)
+ }
+ if len(node.Children()) < rp.DiffRackCount+1 {
+ return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1)
+ }
+ if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 {
+ return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1)
+ }
+ possibleRacksCount := 0
+ for _, rack := range node.Children() {
+ possibleDataNodesCount := 0
+ for _, n := range rack.Children() {
+ if n.FreeSpace() >= 1 {
+ possibleDataNodesCount++
+ }
+ }
+ if possibleDataNodesCount >= rp.SameRackCount+1 {
+ possibleRacksCount++
+ }
+ }
+ if possibleRacksCount < rp.DiffRackCount+1 {
+ return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1)
+ }
+ return nil
+ })
+ if dc_err != nil {
+ return nil, dc_err
+ }
+
+ //find main rack and other racks
+ mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error {
+ if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) {
+ return fmt.Errorf("Not matching preferred rack:%s", option.Rack)
+ }
+ if node.FreeSpace() < rp.SameRackCount+1 {
+ return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1)
+ }
+ if len(node.Children()) < rp.SameRackCount+1 {
+ // a bit faster way to test free racks
+ return fmt.Errorf("Only has %d data nodes, not enough for %d.", len(node.Children()), rp.SameRackCount+1)
+ }
+ possibleDataNodesCount := 0
+ for _, n := range node.Children() {
+ if n.FreeSpace() >= 1 {
+ possibleDataNodesCount++
+ }
+ }
+ if possibleDataNodesCount < rp.SameRackCount+1 {
+ return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1)
+ }
+ return nil
+ })
+ if rack_err != nil {
+ return nil, rack_err
+ }
+
+ //find main rack and other racks
+ mainServer, otherServers, server_err := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error {
+ if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) {
+ return fmt.Errorf("Not matching preferred data node:%s", option.DataNode)
+ }
+ if node.FreeSpace() < 1 {
+ return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1)
+ }
+ return nil
+ })
+ if server_err != nil {
+ return nil, server_err
+ }
+
+ servers = append(servers, mainServer.(*DataNode))
+ for _, server := range otherServers {
+ servers = append(servers, server.(*DataNode))
+ }
+ for _, rack := range otherRacks {
+ r := rand.Intn(rack.FreeSpace())
+ if server, e := rack.ReserveOneVolume(r); e == nil {
+ servers = append(servers, server)
+ } else {
+ return servers, e
+ }
+ }
+ for _, datacenter := range otherDataCenters {
+ r := rand.Intn(datacenter.FreeSpace())
+ if server, e := datacenter.ReserveOneVolume(r); e == nil {
+ servers = append(servers, server)
+ } else {
+ return servers, e
+ }
+ }
+ return
+}
+
+func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error {
+ for _, server := range servers {
+ if err := AllocateVolume(server, vid, option); err == nil {
+ vi := storage.VolumeInfo{
+ Id: vid,
+ Size: 0,
+ Collection: option.Collection,
+ ReplicaPlacement: option.ReplicaPlacement,
+ Ttl: option.Ttl,
+ Version: storage.CurrentVersion,
+ }
+ server.AddOrUpdateVolume(vi)
+ topo.RegisterVolumeLayout(vi, server)
+ glog.V(0).Infoln("Created Volume", vid, "on", server.NodeImpl.String())
+ } else {
+ glog.V(0).Infoln("Failed to assign volume", vid, "to", servers, "error", err)
+ return fmt.Errorf("Failed to assign %d: %v", vid, err)
+ }
+ }
+ return nil
+}
diff --git a/weed/topology/volume_growth_test.go b/weed/topology/volume_growth_test.go
new file mode 100644
index 000000000..e5716674a
--- /dev/null
+++ b/weed/topology/volume_growth_test.go
@@ -0,0 +1,135 @@
+package topology
+
+import (
+ "encoding/json"
+ "fmt"
+ "testing"
+
+ "github.com/chrislusf/seaweedfs/weed/sequence"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+var topologyLayout = `
+{
+ "dc1":{
+ "rack1":{
+ "server111":{
+ "volumes":[
+ {"id":1, "size":12312},
+ {"id":2, "size":12312},
+ {"id":3, "size":12312}
+ ],
+ "limit":3
+ },
+ "server112":{
+ "volumes":[
+ {"id":4, "size":12312},
+ {"id":5, "size":12312},
+ {"id":6, "size":12312}
+ ],
+ "limit":10
+ }
+ },
+ "rack2":{
+ "server121":{
+ "volumes":[
+ {"id":4, "size":12312},
+ {"id":5, "size":12312},
+ {"id":6, "size":12312}
+ ],
+ "limit":4
+ },
+ "server122":{
+ "volumes":[],
+ "limit":4
+ },
+ "server123":{
+ "volumes":[
+ {"id":2, "size":12312},
+ {"id":3, "size":12312},
+ {"id":4, "size":12312}
+ ],
+ "limit":5
+ }
+ }
+ },
+ "dc2":{
+ },
+ "dc3":{
+ "rack2":{
+ "server321":{
+ "volumes":[
+ {"id":1, "size":12312},
+ {"id":3, "size":12312},
+ {"id":5, "size":12312}
+ ],
+ "limit":4
+ }
+ }
+ }
+}
+`
+
+func setup(topologyLayout string) *Topology {
+ var data interface{}
+ err := json.Unmarshal([]byte(topologyLayout), &data)
+ if err != nil {
+ fmt.Println("error:", err)
+ }
+ fmt.Println("data:", data)
+
+ //need to connect all nodes first before server adding volumes
+ topo, err := NewTopology("weedfs", "/etc/weedfs/weedfs.conf",
+ sequence.NewMemorySequencer(), 32*1024, 5)
+ if err != nil {
+ panic("error: " + err.Error())
+ }
+ mTopology := data.(map[string]interface{})
+ for dcKey, dcValue := range mTopology {
+ dc := NewDataCenter(dcKey)
+ dcMap := dcValue.(map[string]interface{})
+ topo.LinkChildNode(dc)
+ for rackKey, rackValue := range dcMap {
+ rack := NewRack(rackKey)
+ rackMap := rackValue.(map[string]interface{})
+ dc.LinkChildNode(rack)
+ for serverKey, serverValue := range rackMap {
+ server := NewDataNode(serverKey)
+ serverMap := serverValue.(map[string]interface{})
+ rack.LinkChildNode(server)
+ for _, v := range serverMap["volumes"].([]interface{}) {
+ m := v.(map[string]interface{})
+ vi := storage.VolumeInfo{
+ Id: storage.VolumeId(int64(m["id"].(float64))),
+ Size: uint64(m["size"].(float64)),
+ Version: storage.CurrentVersion}
+ server.AddOrUpdateVolume(vi)
+ }
+ server.UpAdjustMaxVolumeCountDelta(int(serverMap["limit"].(float64)))
+ }
+ }
+ }
+
+ return topo
+}
+
+func TestFindEmptySlotsForOneVolume(t *testing.T) {
+ topo := setup(topologyLayout)
+ vg := NewDefaultVolumeGrowth()
+ rp, _ := storage.NewReplicaPlacementFromString("002")
+ volumeGrowOption := &VolumeGrowOption{
+ Collection: "",
+ ReplicaPlacement: rp,
+ DataCenter: "dc1",
+ Rack: "",
+ DataNode: "",
+ }
+ servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption)
+ if err != nil {
+ fmt.Println("finding empty slots error :", err)
+ t.Fail()
+ }
+ for _, server := range servers {
+ fmt.Println("assigned node :", server.Id())
+ }
+}
diff --git a/weed/topology/volume_layout.go b/weed/topology/volume_layout.go
new file mode 100644
index 000000000..e500de583
--- /dev/null
+++ b/weed/topology/volume_layout.go
@@ -0,0 +1,226 @@
+package topology
+
+import (
+ "errors"
+ "fmt"
+ "math/rand"
+ "sync"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+)
+
+// mapping from volume to its locations, inverted from server to volume
+type VolumeLayout struct {
+ rp *storage.ReplicaPlacement
+ ttl *storage.TTL
+ vid2location map[storage.VolumeId]*VolumeLocationList
+ writables []storage.VolumeId // transient array of writable volume id
+ volumeSizeLimit uint64
+ accessLock sync.RWMutex
+}
+
+func NewVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL, volumeSizeLimit uint64) *VolumeLayout {
+ return &VolumeLayout{
+ rp: rp,
+ ttl: ttl,
+ vid2location: make(map[storage.VolumeId]*VolumeLocationList),
+ writables: *new([]storage.VolumeId),
+ volumeSizeLimit: volumeSizeLimit,
+ }
+}
+
+func (vl *VolumeLayout) String() string {
+ return fmt.Sprintf("rp:%v, ttl:%v, vid2location:%v, writables:%v, volumeSizeLimit:%v", vl.rp, vl.ttl, vl.vid2location, vl.writables, vl.volumeSizeLimit)
+}
+
+func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ if _, ok := vl.vid2location[v.Id]; !ok {
+ vl.vid2location[v.Id] = NewVolumeLocationList()
+ }
+ vl.vid2location[v.Id].Set(dn)
+ glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length(), "copy", v.ReplicaPlacement.GetCopyCount())
+ if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) {
+ vl.addToWritable(v.Id)
+ } else {
+ vl.removeFromWritable(v.Id)
+ }
+}
+
+func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ vl.removeFromWritable(v.Id)
+ delete(vl.vid2location, v.Id)
+}
+
+func (vl *VolumeLayout) addToWritable(vid storage.VolumeId) {
+ for _, id := range vl.writables {
+ if vid == id {
+ return
+ }
+ }
+ vl.writables = append(vl.writables, vid)
+}
+
+func (vl *VolumeLayout) isWritable(v *storage.VolumeInfo) bool {
+ return uint64(v.Size) < vl.volumeSizeLimit &&
+ v.Version == storage.CurrentVersion &&
+ !v.ReadOnly
+}
+
+func (vl *VolumeLayout) Lookup(vid storage.VolumeId) []*DataNode {
+ vl.accessLock.RLock()
+ defer vl.accessLock.RUnlock()
+
+ if location := vl.vid2location[vid]; location != nil {
+ return location.list
+ }
+ return nil
+}
+
+func (vl *VolumeLayout) ListVolumeServers() (nodes []*DataNode) {
+ vl.accessLock.RLock()
+ defer vl.accessLock.RUnlock()
+
+ for _, location := range vl.vid2location {
+ nodes = append(nodes, location.list...)
+ }
+ return
+}
+
+func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*storage.VolumeId, uint64, *VolumeLocationList, error) {
+ vl.accessLock.RLock()
+ defer vl.accessLock.RUnlock()
+
+ len_writers := len(vl.writables)
+ if len_writers <= 0 {
+ glog.V(0).Infoln("No more writable volumes!")
+ return nil, 0, nil, errors.New("No more writable volumes!")
+ }
+ if option.DataCenter == "" {
+ vid := vl.writables[rand.Intn(len_writers)]
+ locationList := vl.vid2location[vid]
+ if locationList != nil {
+ return &vid, count, locationList, nil
+ }
+ return nil, 0, nil, errors.New("Strangely vid " + vid.String() + " is on no machine!")
+ }
+ var vid storage.VolumeId
+ var locationList *VolumeLocationList
+ counter := 0
+ for _, v := range vl.writables {
+ volumeLocationList := vl.vid2location[v]
+ for _, dn := range volumeLocationList.list {
+ if dn.GetDataCenter().Id() == NodeId(option.DataCenter) {
+ if option.Rack != "" && dn.GetRack().Id() != NodeId(option.Rack) {
+ continue
+ }
+ if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) {
+ continue
+ }
+ counter++
+ if rand.Intn(counter) < 1 {
+ vid, locationList = v, volumeLocationList
+ }
+ }
+ }
+ }
+ return &vid, count, locationList, nil
+}
+
+func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) int {
+ vl.accessLock.RLock()
+ defer vl.accessLock.RUnlock()
+
+ if option.DataCenter == "" {
+ return len(vl.writables)
+ }
+ counter := 0
+ for _, v := range vl.writables {
+ for _, dn := range vl.vid2location[v].list {
+ if dn.GetDataCenter().Id() == NodeId(option.DataCenter) {
+ if option.Rack != "" && dn.GetRack().Id() != NodeId(option.Rack) {
+ continue
+ }
+ if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) {
+ continue
+ }
+ counter++
+ }
+ }
+ }
+ return counter
+}
+
+func (vl *VolumeLayout) removeFromWritable(vid storage.VolumeId) bool {
+ toDeleteIndex := -1
+ for k, id := range vl.writables {
+ if id == vid {
+ toDeleteIndex = k
+ break
+ }
+ }
+ if toDeleteIndex >= 0 {
+ glog.V(0).Infoln("Volume", vid, "becomes unwritable")
+ vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...)
+ return true
+ }
+ return false
+}
+func (vl *VolumeLayout) setVolumeWritable(vid storage.VolumeId) bool {
+ for _, v := range vl.writables {
+ if v == vid {
+ return false
+ }
+ }
+ glog.V(0).Infoln("Volume", vid, "becomes writable")
+ vl.writables = append(vl.writables, vid)
+ return true
+}
+
+func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid storage.VolumeId) bool {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ if location, ok := vl.vid2location[vid]; ok {
+ if location.Remove(dn) {
+ if location.Length() < vl.rp.GetCopyCount() {
+ glog.V(0).Infoln("Volume", vid, "has", location.Length(), "replica, less than required", vl.rp.GetCopyCount())
+ return vl.removeFromWritable(vid)
+ }
+ }
+ }
+ return false
+}
+func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid storage.VolumeId) bool {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ vl.vid2location[vid].Set(dn)
+ if vl.vid2location[vid].Length() >= vl.rp.GetCopyCount() {
+ return vl.setVolumeWritable(vid)
+ }
+ return false
+}
+
+func (vl *VolumeLayout) SetVolumeCapacityFull(vid storage.VolumeId) bool {
+ vl.accessLock.Lock()
+ defer vl.accessLock.Unlock()
+
+ // glog.V(0).Infoln("Volume", vid, "reaches full capacity.")
+ return vl.removeFromWritable(vid)
+}
+
+func (vl *VolumeLayout) ToMap() map[string]interface{} {
+ m := make(map[string]interface{})
+ m["replication"] = vl.rp.String()
+ m["ttl"] = vl.ttl.String()
+ m["writables"] = vl.writables
+ //m["locations"] = vl.vid2location
+ return m
+}
diff --git a/weed/topology/volume_location_list.go b/weed/topology/volume_location_list.go
new file mode 100644
index 000000000..d5eaf5e92
--- /dev/null
+++ b/weed/topology/volume_location_list.go
@@ -0,0 +1,65 @@
+package topology
+
+import (
+ "fmt"
+)
+
+type VolumeLocationList struct {
+ list []*DataNode
+}
+
+func NewVolumeLocationList() *VolumeLocationList {
+ return &VolumeLocationList{}
+}
+
+func (dnll *VolumeLocationList) String() string {
+ return fmt.Sprintf("%v", dnll.list)
+}
+
+func (dnll *VolumeLocationList) Head() *DataNode {
+ //mark first node as master volume
+ return dnll.list[0]
+}
+
+func (dnll *VolumeLocationList) Length() int {
+ return len(dnll.list)
+}
+
+func (dnll *VolumeLocationList) Set(loc *DataNode) {
+ for i := 0; i < len(dnll.list); i++ {
+ if loc.Ip == dnll.list[i].Ip && loc.Port == dnll.list[i].Port {
+ dnll.list[i] = loc
+ return
+ }
+ }
+ dnll.list = append(dnll.list, loc)
+}
+
+func (dnll *VolumeLocationList) Remove(loc *DataNode) bool {
+ for i, dnl := range dnll.list {
+ if loc.Ip == dnl.Ip && loc.Port == dnl.Port {
+ dnll.list = append(dnll.list[:i], dnll.list[i+1:]...)
+ return true
+ }
+ }
+ return false
+}
+
+func (dnll *VolumeLocationList) Refresh(freshThreshHold int64) {
+ var changed bool
+ for _, dnl := range dnll.list {
+ if dnl.LastSeen < freshThreshHold {
+ changed = true
+ break
+ }
+ }
+ if changed {
+ var l []*DataNode
+ for _, dnl := range dnll.list {
+ if dnl.LastSeen >= freshThreshHold {
+ l = append(l, dnl)
+ }
+ }
+ dnll.list = l
+ }
+}
diff --git a/weed/util/bytes.go b/weed/util/bytes.go
new file mode 100644
index 000000000..dfa4ae665
--- /dev/null
+++ b/weed/util/bytes.go
@@ -0,0 +1,45 @@
+package util
+
+// big endian
+
+func BytesToUint64(b []byte) (v uint64) {
+ length := uint(len(b))
+ for i := uint(0); i < length-1; i++ {
+ v += uint64(b[i])
+ v <<= 8
+ }
+ v += uint64(b[length-1])
+ return
+}
+func BytesToUint32(b []byte) (v uint32) {
+ length := uint(len(b))
+ for i := uint(0); i < length-1; i++ {
+ v += uint32(b[i])
+ v <<= 8
+ }
+ v += uint32(b[length-1])
+ return
+}
+func BytesToUint16(b []byte) (v uint16) {
+ v += uint16(b[0])
+ v <<= 8
+ v += uint16(b[1])
+ return
+}
+func Uint64toBytes(b []byte, v uint64) {
+ for i := uint(0); i < 8; i++ {
+ b[7-i] = byte(v >> (i * 8))
+ }
+}
+func Uint32toBytes(b []byte, v uint32) {
+ for i := uint(0); i < 4; i++ {
+ b[3-i] = byte(v >> (i * 8))
+ }
+}
+func Uint16toBytes(b []byte, v uint16) {
+ b[0] = byte(v >> 8)
+ b[1] = byte(v)
+}
+func Uint8toBytes(b []byte, v uint8) {
+ b[0] = byte(v)
+}
diff --git a/weed/util/bytes_pool.go b/weed/util/bytes_pool.go
new file mode 100644
index 000000000..58ed6feca
--- /dev/null
+++ b/weed/util/bytes_pool.go
@@ -0,0 +1,127 @@
+package util
+
+import (
+ "bytes"
+ "fmt"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+var (
+ ChunkSizes = []int{
+ 1 << 4, // index 0, 16 bytes, inclusive
+ 1 << 6, // index 1, 64 bytes
+ 1 << 8, // index 2, 256 bytes
+ 1 << 10, // index 3, 1K bytes
+ 1 << 12, // index 4, 4K bytes
+ 1 << 14, // index 5, 16K bytes
+ 1 << 16, // index 6, 64K bytes
+ 1 << 18, // index 7, 256K bytes
+ 1 << 20, // index 8, 1M bytes
+ 1 << 22, // index 9, 4M bytes
+ 1 << 24, // index 10, 16M bytes
+ 1 << 26, // index 11, 64M bytes
+ 1 << 28, // index 12, 128M bytes
+ }
+
+ _DEBUG = false
+)
+
+type BytesPool struct {
+ chunkPools []*byteChunkPool
+}
+
+func NewBytesPool() *BytesPool {
+ var bp BytesPool
+ for _, size := range ChunkSizes {
+ bp.chunkPools = append(bp.chunkPools, newByteChunkPool(size))
+ }
+ ret := &bp
+ if _DEBUG {
+ t := time.NewTicker(10 * time.Second)
+ go func() {
+ for {
+ println("buffer:", ret.String())
+ <-t.C
+ }
+ }()
+ }
+ return ret
+}
+
+func (m *BytesPool) String() string {
+ var buf bytes.Buffer
+ for index, size := range ChunkSizes {
+ if m.chunkPools[index].count > 0 {
+ buf.WriteString(fmt.Sprintf("size:%d count:%d\n", size, m.chunkPools[index].count))
+ }
+ }
+ return buf.String()
+}
+
+func findChunkPoolIndex(size int) int {
+ if size <= 0 {
+ return -1
+ }
+ size = (size - 1) >> 4
+ ret := 0
+ for size > 0 {
+ size = size >> 2
+ ret = ret + 1
+ }
+ if ret >= len(ChunkSizes) {
+ return -1
+ }
+ return ret
+}
+
+func (m *BytesPool) Get(size int) []byte {
+ index := findChunkPoolIndex(size)
+ // println("get index:", index)
+ if index < 0 {
+ return make([]byte, size)
+ }
+ return m.chunkPools[index].Get()
+}
+
+func (m *BytesPool) Put(b []byte) {
+ index := findChunkPoolIndex(len(b))
+ // println("put index:", index)
+ if index < 0 {
+ return
+ }
+ m.chunkPools[index].Put(b)
+}
+
+// a pool of fix-sized []byte chunks. The pool size is managed by Go GC
+type byteChunkPool struct {
+ sync.Pool
+ chunkSizeLimit int
+ count int64
+}
+
+var count int
+
+func newByteChunkPool(chunkSizeLimit int) *byteChunkPool {
+ var m byteChunkPool
+ m.chunkSizeLimit = chunkSizeLimit
+ m.Pool.New = func() interface{} {
+ count++
+ // println("creating []byte size", m.chunkSizeLimit, "new", count, "count", m.count)
+ return make([]byte, m.chunkSizeLimit)
+ }
+ return &m
+}
+
+func (m *byteChunkPool) Get() []byte {
+ // println("before get size:", m.chunkSizeLimit, "count:", m.count)
+ atomic.AddInt64(&m.count, 1)
+ return m.Pool.Get().([]byte)
+}
+
+func (m *byteChunkPool) Put(b []byte) {
+ atomic.AddInt64(&m.count, -1)
+ // println("after put get size:", m.chunkSizeLimit, "count:", m.count)
+ m.Pool.Put(b)
+}
diff --git a/weed/util/bytes_pool_test.go b/weed/util/bytes_pool_test.go
new file mode 100644
index 000000000..3f37c16cf
--- /dev/null
+++ b/weed/util/bytes_pool_test.go
@@ -0,0 +1,41 @@
+package util
+
+import (
+ "testing"
+)
+
+func TestTTLReadWrite(t *testing.T) {
+ var tests = []struct {
+ n int // input
+ expected int // expected result
+ }{
+ {0, -1},
+ {1, 0},
+ {1 << 4, 0},
+ {1 << 6, 1},
+ {1 << 8, 2},
+ {1 << 10, 3},
+ {1 << 12, 4},
+ {1 << 14, 5},
+ {1 << 16, 6},
+ {1 << 18, 7},
+ {1<<4 + 1, 1},
+ {1<<6 + 1, 2},
+ {1<<8 + 1, 3},
+ {1<<10 + 1, 4},
+ {1<<12 + 1, 5},
+ {1<<14 + 1, 6},
+ {1<<16 + 1, 7},
+ {1<<18 + 1, 8},
+ {1<<28 - 1, 12},
+ {1 << 28, 12},
+ {1<<28 + 2134, -1},
+ {1080, 4},
+ }
+ for _, tt := range tests {
+ actual := findChunkPoolIndex(tt.n)
+ if actual != tt.expected {
+ t.Errorf("findChunkPoolIndex(%d): expected %d, actual %d", tt.n, tt.expected, actual)
+ }
+ }
+}
diff --git a/weed/util/concurrent_read_map.go b/weed/util/concurrent_read_map.go
new file mode 100644
index 000000000..28b6ae0f1
--- /dev/null
+++ b/weed/util/concurrent_read_map.go
@@ -0,0 +1,60 @@
+package util
+
+import (
+ "sync"
+)
+
+// A mostly for read map, which can thread-safely
+// initialize the map entries.
+type ConcurrentReadMap struct {
+ sync.RWMutex
+
+ items map[string]interface{}
+}
+
+func NewConcurrentReadMap() *ConcurrentReadMap {
+ return &ConcurrentReadMap{items: make(map[string]interface{})}
+}
+
+func (m *ConcurrentReadMap) initMapEntry(key string, newEntry func() interface{}) (value interface{}) {
+ m.Lock()
+ defer m.Unlock()
+ if value, ok := m.items[key]; ok {
+ return value
+ }
+ value = newEntry()
+ m.items[key] = value
+ return value
+}
+
+func (m *ConcurrentReadMap) Get(key string, newEntry func() interface{}) interface{} {
+ m.RLock()
+ if value, ok := m.items[key]; ok {
+ m.RUnlock()
+ return value
+ }
+ m.RUnlock()
+ return m.initMapEntry(key, newEntry)
+}
+
+func (m *ConcurrentReadMap) Find(key string) (interface{}, bool) {
+ m.RLock()
+ value, ok := m.items[key]
+ m.RUnlock()
+ return value, ok
+}
+
+func (m *ConcurrentReadMap) Items() (itemsCopy []interface{}) {
+ m.RLock()
+ for _, i := range m.items {
+ itemsCopy = append(itemsCopy, i)
+ }
+ m.RUnlock()
+ return itemsCopy
+}
+
+func (m *ConcurrentReadMap) Delete(key string) {
+ m.Lock()
+ delete(m.items, key)
+ m.Unlock()
+}
diff --git a/weed/util/config.go b/weed/util/config.go
new file mode 100644
index 000000000..e4549c322
--- /dev/null
+++ b/weed/util/config.go
@@ -0,0 +1,130 @@
+package util
+
+// Copyright 2011 Numerotron Inc.
+// Use of this source code is governed by an MIT-style license
+// that can be found in the LICENSE file.
+//
+// Developed at www.stathat.com by Patrick Crosby
+// Contact us on twitter with any questions: twitter.com/stat_hat
+
+// The jconfig package provides a simple, basic configuration file parser using JSON.
+
+import (
+ "bytes"
+ "encoding/json"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+type Config struct {
+ data map[string]interface{}
+ filename string
+}
+
+func newConfig() *Config {
+ result := new(Config)
+ result.data = make(map[string]interface{})
+ return result
+}
+
+// Loads config information from a JSON file
+func LoadConfig(filename string) *Config {
+ result := newConfig()
+ result.filename = filename
+ err := result.parse()
+ if err != nil {
+ glog.Fatalf("error loading config file %s: %s", filename, err)
+ }
+ return result
+}
+
+// Loads config information from a JSON string
+func LoadConfigString(s string) *Config {
+ result := newConfig()
+ err := json.Unmarshal([]byte(s), &result.data)
+ if err != nil {
+ glog.Fatalf("error parsing config string %s: %s", s, err)
+ }
+ return result
+}
+
+func (c *Config) StringMerge(s string) {
+ next := LoadConfigString(s)
+ c.merge(next.data)
+}
+
+func (c *Config) LoadMerge(filename string) {
+ next := LoadConfig(filename)
+ c.merge(next.data)
+}
+
+func (c *Config) merge(ndata map[string]interface{}) {
+ for k, v := range ndata {
+ c.data[k] = v
+ }
+}
+
+func (c *Config) parse() error {
+ f, err := os.Open(c.filename)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ b := new(bytes.Buffer)
+ _, err = b.ReadFrom(f)
+ if err != nil {
+ return err
+ }
+ err = json.Unmarshal(b.Bytes(), &c.data)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Returns a string for the config variable key
+func (c *Config) GetString(key string) string {
+ result, present := c.data[key]
+ if !present {
+ return ""
+ }
+ return result.(string)
+}
+
+// Returns an int for the config variable key
+func (c *Config) GetInt(key string) int {
+ x, ok := c.data[key]
+ if !ok {
+ return -1
+ }
+ return int(x.(float64))
+}
+
+// Returns a float for the config variable key
+func (c *Config) GetFloat(key string) float64 {
+ x, ok := c.data[key]
+ if !ok {
+ return -1
+ }
+ return x.(float64)
+}
+
+// Returns a bool for the config variable key
+func (c *Config) GetBool(key string) bool {
+ x, ok := c.data[key]
+ if !ok {
+ return false
+ }
+ return x.(bool)
+}
+
+// Returns an array for the config variable key
+func (c *Config) GetArray(key string) []interface{} {
+ result, present := c.data[key]
+ if !present {
+ return []interface{}(nil)
+ }
+ return result.([]interface{})
+}
diff --git a/weed/util/constants.go b/weed/util/constants.go
new file mode 100644
index 000000000..6b6b0b911
--- /dev/null
+++ b/weed/util/constants.go
@@ -0,0 +1,5 @@
+package util
+
+const (
+ VERSION = "0.71 beta"
+)
diff --git a/weed/util/file_util.go b/weed/util/file_util.go
new file mode 100644
index 000000000..a39fb0860
--- /dev/null
+++ b/weed/util/file_util.go
@@ -0,0 +1,38 @@
+package util
+
+import (
+ "bufio"
+ "errors"
+ "os"
+
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+func TestFolderWritable(folder string) (err error) {
+ fileInfo, err := os.Stat(folder)
+ if err != nil {
+ return err
+ }
+ if !fileInfo.IsDir() {
+ return errors.New("Not a valid folder!")
+ }
+ perm := fileInfo.Mode().Perm()
+ glog.V(0).Infoln("Folder", folder, "Permission:", perm)
+ if 0200&perm != 0 {
+ return nil
+ }
+ return errors.New("Not writable!")
+}
+
+func Readln(r *bufio.Reader) ([]byte, error) {
+ var (
+ isPrefix = true
+ err error
+ line, ln []byte
+ )
+ for isPrefix && err == nil {
+ line, isPrefix, err = r.ReadLine()
+ ln = append(ln, line...)
+ }
+ return ln, err
+}
diff --git a/weed/util/http_util.go b/weed/util/http_util.go
new file mode 100644
index 000000000..a54fc8779
--- /dev/null
+++ b/weed/util/http_util.go
@@ -0,0 +1,163 @@
+package util
+
+import (
+ "bytes"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "net/url"
+ "strings"
+
+ "github.com/chrislusf/seaweedfs/weed/security"
+)
+
+var (
+ client *http.Client
+ Transport *http.Transport
+)
+
+func init() {
+ Transport = &http.Transport{
+ MaxIdleConnsPerHost: 1024,
+ }
+ client = &http.Client{Transport: Transport}
+}
+
+func PostBytes(url string, body []byte) ([]byte, error) {
+ r, err := client.Post(url, "application/octet-stream", bytes.NewReader(body))
+ if err != nil {
+ return nil, fmt.Errorf("Post to %s: %v", url, err)
+ }
+ defer r.Body.Close()
+ b, err := ioutil.ReadAll(r.Body)
+ if err != nil {
+ return nil, fmt.Errorf("Read response body: %v", err)
+ }
+ return b, nil
+}
+
+func Post(url string, values url.Values) ([]byte, error) {
+ r, err := client.PostForm(url, values)
+ if err != nil {
+ return nil, err
+ }
+ defer r.Body.Close()
+ b, err := ioutil.ReadAll(r.Body)
+ if err != nil {
+ return nil, err
+ }
+ return b, nil
+}
+
+func Get(url string) ([]byte, error) {
+ r, err := client.Get(url)
+ if err != nil {
+ return nil, err
+ }
+ defer r.Body.Close()
+ b, err := ioutil.ReadAll(r.Body)
+ if r.StatusCode != 200 {
+ return nil, fmt.Errorf("%s: %s", url, r.Status)
+ }
+ if err != nil {
+ return nil, err
+ }
+ return b, nil
+}
+
+func Delete(url string, jwt security.EncodedJwt) error {
+ req, err := http.NewRequest("DELETE", url, nil)
+ if jwt != "" {
+ req.Header.Set("Authorization", "BEARER "+string(jwt))
+ }
+ if err != nil {
+ return err
+ }
+ resp, e := client.Do(req)
+ if e != nil {
+ return e
+ }
+ defer resp.Body.Close()
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+ switch resp.StatusCode {
+ case http.StatusNotFound, http.StatusAccepted, http.StatusOK:
+ return nil
+ }
+ m := make(map[string]interface{})
+ if e := json.Unmarshal(body, m); e == nil {
+ if s, ok := m["error"].(string); ok {
+ return errors.New(s)
+ }
+ }
+ return errors.New(string(body))
+}
+
+func GetBufferStream(url string, values url.Values, allocatedBytes []byte, eachBuffer func([]byte)) error {
+ r, err := client.PostForm(url, values)
+ if err != nil {
+ return err
+ }
+ defer r.Body.Close()
+ if r.StatusCode != 200 {
+ return fmt.Errorf("%s: %s", url, r.Status)
+ }
+ bufferSize := len(allocatedBytes)
+ for {
+ n, err := r.Body.Read(allocatedBytes)
+ if n == bufferSize {
+ eachBuffer(allocatedBytes)
+ }
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+ }
+ return nil
+}
+
+func GetUrlStream(url string, values url.Values, readFn func(io.Reader) error) error {
+ r, err := client.PostForm(url, values)
+ if err != nil {
+ return err
+ }
+ defer r.Body.Close()
+ if r.StatusCode != 200 {
+ return fmt.Errorf("%s: %s", url, r.Status)
+ }
+ return readFn(r.Body)
+}
+
+func DownloadUrl(fileUrl string) (filename string, rc io.ReadCloser, e error) {
+ response, err := client.Get(fileUrl)
+ if err != nil {
+ return "", nil, err
+ }
+ contentDisposition := response.Header["Content-Disposition"]
+ if len(contentDisposition) > 0 {
+ if strings.HasPrefix(contentDisposition[0], "filename=") {
+ filename = contentDisposition[0][len("filename="):]
+ filename = strings.Trim(filename, "\"")
+ }
+ }
+ rc = response.Body
+ return
+}
+
+func Do(req *http.Request) (resp *http.Response, err error) {
+ return client.Do(req)
+}
+
+func NormalizeUrl(url string) string {
+ if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") {
+ return url
+ }
+ return "http://" + url
+}
diff --git a/weed/util/net_timeout.go b/weed/util/net_timeout.go
new file mode 100644
index 000000000..f46776992
--- /dev/null
+++ b/weed/util/net_timeout.go
@@ -0,0 +1,81 @@
+package util
+
+import (
+ "net"
+ "time"
+
+ "github.com/chrislusf/seaweedfs/weed/stats"
+)
+
+// Listener wraps a net.Listener, and gives a place to store the timeout
+// parameters. On Accept, it will wrap the net.Conn with our own Conn for us.
+type Listener struct {
+ net.Listener
+ ReadTimeout time.Duration
+ WriteTimeout time.Duration
+}
+
+func (l *Listener) Accept() (net.Conn, error) {
+ c, err := l.Listener.Accept()
+ if err != nil {
+ return nil, err
+ }
+ stats.ConnectionOpen()
+ tc := &Conn{
+ Conn: c,
+ ReadTimeout: l.ReadTimeout,
+ WriteTimeout: l.WriteTimeout,
+ }
+ return tc, nil
+}
+
+// Conn wraps a net.Conn, and sets a deadline for every read
+// and write operation.
+type Conn struct {
+ net.Conn
+ ReadTimeout time.Duration
+ WriteTimeout time.Duration
+}
+
+func (c *Conn) Read(b []byte) (count int, e error) {
+ err := c.Conn.SetReadDeadline(time.Now().Add(c.ReadTimeout))
+ if err != nil {
+ return 0, err
+ }
+ count, e = c.Conn.Read(b)
+ if e == nil {
+ stats.BytesIn(int64(count))
+ }
+ return
+}
+
+func (c *Conn) Write(b []byte) (count int, e error) {
+ err := c.Conn.SetWriteDeadline(time.Now().Add(c.WriteTimeout))
+ if err != nil {
+ return 0, err
+ }
+ count, e = c.Conn.Write(b)
+ if e == nil {
+ stats.BytesOut(int64(count))
+ }
+ return
+}
+
+func (c *Conn) Close() error {
+ stats.ConnectionClose()
+ return c.Conn.Close()
+}
+
+func NewListener(addr string, timeout time.Duration) (net.Listener, error) {
+ l, err := net.Listen("tcp", addr)
+ if err != nil {
+ return nil, err
+ }
+
+ tl := &Listener{
+ Listener: l,
+ ReadTimeout: timeout,
+ WriteTimeout: timeout,
+ }
+ return tl, nil
+}
diff --git a/weed/util/parse.go b/weed/util/parse.go
new file mode 100644
index 000000000..0a8317c19
--- /dev/null
+++ b/weed/util/parse.go
@@ -0,0 +1,26 @@
+package util
+
+import (
+ "strconv"
+)
+
+func ParseInt(text string, defaultValue int) int {
+ count, parseError := strconv.ParseInt(text, 10, 64)
+ if parseError != nil {
+ if len(text) > 0 {
+ return 0
+ }
+ return defaultValue
+ }
+ return int(count)
+}
+func ParseUint64(text string, defaultValue uint64) uint64 {
+ count, parseError := strconv.ParseUint(text, 10, 64)
+ if parseError != nil {
+ if len(text) > 0 {
+ return 0
+ }
+ return defaultValue
+ }
+ return count
+}
diff --git a/weed/weed.go b/weed/weed.go
new file mode 100644
index 000000000..c18eac013
--- /dev/null
+++ b/weed/weed.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "math/rand"
+ "os"
+ "strings"
+ "sync"
+ "text/template"
+ "time"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/chrislusf/seaweedfs/weed/command"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+)
+
+var IsDebug *bool
+var server *string
+
+var commands = command.Commands
+
+var exitStatus = 0
+var exitMu sync.Mutex
+
+func setExitStatus(n int) {
+ exitMu.Lock()
+ if exitStatus < n {
+ exitStatus = n
+ }
+ exitMu.Unlock()
+}
+
+func main() {
+ glog.MaxSize = 1024 * 1024 * 32
+ rand.Seed(time.Now().UnixNano())
+ flag.Usage = usage
+ flag.Parse()
+
+ args := flag.Args()
+ if len(args) < 1 {
+ usage()
+ }
+
+ if args[0] == "help" {
+ help(args[1:])
+ for _, cmd := range commands {
+ if len(args) >= 2 && cmd.Name() == args[1] && cmd.Run != nil {
+ fmt.Fprintf(os.Stderr, "Default Parameters:\n")
+ cmd.Flag.PrintDefaults()
+ }
+ }
+ return
+ }
+
+ for _, cmd := range commands {
+ if cmd.Name() == args[0] && cmd.Run != nil {
+ cmd.Flag.Usage = func() { cmd.Usage() }
+ cmd.Flag.Parse(args[1:])
+ args = cmd.Flag.Args()
+ IsDebug = cmd.IsDebug
+ if !cmd.Run(cmd, args) {
+ fmt.Fprintf(os.Stderr, "\n")
+ cmd.Flag.Usage()
+ fmt.Fprintf(os.Stderr, "Default Parameters:\n")
+ cmd.Flag.PrintDefaults()
+ }
+ exit()
+ return
+ }
+ }
+
+ fmt.Fprintf(os.Stderr, "weed: unknown subcommand %q\nRun 'weed help' for usage.\n", args[0])
+ setExitStatus(2)
+ exit()
+}
+
+var usageTemplate = `
+SeaweedFS: store billions of files and serve them fast!
+
+Usage:
+
+ weed command [arguments]
+
+The commands are:
+{{range .}}{{if .Runnable}}
+ {{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}}
+
+Use "weed help [command]" for more information about a command.
+
+`
+
+var helpTemplate = `{{if .Runnable}}Usage: weed {{.UsageLine}}
+{{end}}
+ {{.Long}}
+`
+
+// tmpl executes the given template text on data, writing the result to w.
+func tmpl(w io.Writer, text string, data interface{}) {
+ t := template.New("top")
+ t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize})
+ template.Must(t.Parse(text))
+ if err := t.Execute(w, data); err != nil {
+ panic(err)
+ }
+}
+
+func capitalize(s string) string {
+ if s == "" {
+ return s
+ }
+ r, n := utf8.DecodeRuneInString(s)
+ return string(unicode.ToTitle(r)) + s[n:]
+}
+
+func printUsage(w io.Writer) {
+ tmpl(w, usageTemplate, commands)
+}
+
+func usage() {
+ printUsage(os.Stderr)
+ fmt.Fprintf(os.Stderr, "For Logging, use \"weed [logging_options] [command]\". The logging options are:\n")
+ flag.PrintDefaults()
+ os.Exit(2)
+}
+
+// help implements the 'help' command.
+func help(args []string) {
+ if len(args) == 0 {
+ printUsage(os.Stdout)
+ // not exit 2: succeeded at 'weed help'.
+ return
+ }
+ if len(args) != 1 {
+ fmt.Fprintf(os.Stderr, "usage: weed help command\n\nToo many arguments given.\n")
+ os.Exit(2) // failed at 'weed help'
+ }
+
+ arg := args[0]
+
+ for _, cmd := range commands {
+ if cmd.Name() == arg {
+ tmpl(os.Stdout, helpTemplate, cmd)
+ // not exit 2: succeeded at 'weed help cmd'.
+ return
+ }
+ }
+
+ fmt.Fprintf(os.Stderr, "Unknown help topic %#q. Run 'weed help'.\n", arg)
+ os.Exit(2) // failed at 'weed help cmd'
+}
+
+var atexitFuncs []func()
+
+func atexit(f func()) {
+ atexitFuncs = append(atexitFuncs, f)
+}
+
+func exit() {
+ for _, f := range atexitFuncs {
+ f()
+ }
+ os.Exit(exitStatus)
+}
+
+func debug(params ...interface{}) {
+ glog.V(4).Infoln(params)
+}